diff options
| author | phiresky <phiresky@users.noreply.github.com> | 2015-03-04 22:33:56 +0100 | 
|---|---|---|
| committer | Jaime Marquínez Ferrándiz <jaime.marquinez.ferrandiz@gmail.com> | 2015-03-14 19:46:22 +0100 | 
| commit | e7db87f7000143341505cff812d1fa0371ac901e (patch) | |
| tree | bf7990d4310aa157bade02627527763d654ad0b1 /youtube_dl/postprocessor/metadatafromtitle.py | |
| parent | 082b1155a36dc9b51424151f80860e52ee30b55e (diff) | |
Add metadata from title parser
(Closes #5125)
Diffstat (limited to 'youtube_dl/postprocessor/metadatafromtitle.py')
| -rw-r--r-- | youtube_dl/postprocessor/metadatafromtitle.py | 48 | 
1 files changed, 48 insertions, 0 deletions
| diff --git a/youtube_dl/postprocessor/metadatafromtitle.py b/youtube_dl/postprocessor/metadatafromtitle.py new file mode 100644 index 000000000..4c9d3aafe --- /dev/null +++ b/youtube_dl/postprocessor/metadatafromtitle.py @@ -0,0 +1,48 @@ +# -*- coding: utf-8 -*- + +import re + +from .common import PostProcessor +from ..utils import PostProcessingError + + +class MetadataFromTitlePPError(PostProcessingError): +    pass + + +class MetadataFromTitlePP(PostProcessor): +    def __init__(self, downloader, titleformat): +        self._titleformat = titleformat +        self._titleregex = self.fmtToRegex(titleformat) + +    def fmtToRegex(self, fmt): +        """ +        Converts a string like +           '%(title)s - %(artist)s' +        to a regex like +           '(?P<title>.+)\ \-\ (?P<artist>.+)' +        and a list of the named groups [title, artist] +        """ +        lastpos = 0 +        regex = "" +        groups = [] +        # replace %(..)s with regex group and escape other string parts +        for match in re.finditer(r'%\((\w+)\)s', fmt): +            regex += re.escape(fmt[lastpos:match.start()]) +            regex += r'(?P<' + match.group(1) + '>.+)' +            lastpos = match.end() +        if lastpos < len(fmt): +            regex += re.escape(fmt[lastpos:len(fmt)]) +        return regex + +    def run(self, info): +        title = info['title'] +        match = re.match(self._titleregex, title) +        if match is None: +            raise MetadataFromTitlePPError('Could not interpret title of video as "%s"' % self._titleformat) +        for attribute, value in match.groupdict().items(): +            value = match.group(attribute) +            info[attribute] = value +            self._downloader.to_screen('[fromtitle] parsed ' + attribute + ': ' + value) + +        return True, info | 
