Add metadata from title parser

(Closes #5125)
author: phiresky <phiresky@users.noreply.github.com> 2015-03-04 22:33:56 +0100
committer: Jaime Marquínez Ferrándiz <jaime.marquinez.ferrandiz@gmail.com> 2015-03-14 19:46:22 +0100
commit: e7db87f7000143341505cff812d1fa0371ac901e (patch)
tree: bf7990d4310aa157bade02627527763d654ad0b1 /youtube_dl/postprocessor/metadatafromtitle.py
parent: 082b1155a36dc9b51424151f80860e52ee30b55e (diff)
1 files changed, 48 insertions, 0 deletions
diff --git a/youtube_dl/postprocessor/metadatafromtitle.py b/youtube_dl/postprocessor/metadatafromtitle.py
new file mode 100644
index 000000000..4c9d3aafe
--- /dev/null
+++ b/youtube_dl/postprocessor/metadatafromtitle.py
@@ -0,0 +1,48 @@
+# -*- coding: utf-8 -*-
+
+import re
+
+from .common import PostProcessor
+from ..utils import PostProcessingError
+
+
+class MetadataFromTitlePPError(PostProcessingError):
+    pass
+
+
+class MetadataFromTitlePP(PostProcessor):
+    def __init__(self, downloader, titleformat):
+        self._titleformat = titleformat
+        self._titleregex = self.fmtToRegex(titleformat)
+
+    def fmtToRegex(self, fmt):
+        """
+        Converts a string like
+           '%(title)s - %(artist)s'
+        to a regex like
+           '(?P<title>.+)\ \-\ (?P<artist>.+)'
+        and a list of the named groups [title, artist]
+        """
+        lastpos = 0
+        regex = ""
+        groups = []
+        # replace %(..)s with regex group and escape other string parts
+        for match in re.finditer(r'%\((\w+)\)s', fmt):
+            regex += re.escape(fmt[lastpos:match.start()])
+            regex += r'(?P<' + match.group(1) + '>.+)'
+            lastpos = match.end()
+        if lastpos < len(fmt):
+            regex += re.escape(fmt[lastpos:len(fmt)])
+        return regex
+
+    def run(self, info):
+        title = info['title']
+        match = re.match(self._titleregex, title)
+        if match is None:
+            raise MetadataFromTitlePPError('Could not interpret title of video as "%s"' % self._titleformat)
+        for attribute, value in match.groupdict().items():
+            value = match.group(attribute)
+            info[attribute] = value
+            self._downloader.to_screen('[fromtitle] parsed ' + attribute + ': ' + value)
+
+        return True, info
author	phiresky <phiresky@users.noreply.github.com>	2015-03-04 22:33:56 +0100
committer	Jaime Marquínez Ferrándiz <jaime.marquinez.ferrandiz@gmail.com>	2015-03-14 19:46:22 +0100
commit	e7db87f7000143341505cff812d1fa0371ac901e (patch)
tree	bf7990d4310aa157bade02627527763d654ad0b1 /youtube_dl/postprocessor/metadatafromtitle.py
parent	082b1155a36dc9b51424151f80860e52ee30b55e (diff)