diff options
Diffstat (limited to 'youtube_dl/extractor/common.py')
| -rw-r--r-- | youtube_dl/extractor/common.py | 48 | 
1 files changed, 43 insertions, 5 deletions
| diff --git a/youtube_dl/extractor/common.py b/youtube_dl/extractor/common.py index 9c20d30b4..eb3435c77 100644 --- a/youtube_dl/extractor/common.py +++ b/youtube_dl/extractor/common.py @@ -71,6 +71,10 @@ class InfoExtractor(object):                                  ("3D" or "DASH video")                      * width     Width of the video, if known                      * height    Height of the video, if known +                    * abr       Average audio bitrate in KBit/s +                    * acodec    Name of the audio codec in use +                    * vbr       Average video bitrate in KBit/s +                    * vcodec    Name of the video codec in use      webpage_url:    The url to the video webpage, if given to youtube-dl it                      should allow to get the same result again. (It will be set                      by YoutubeDL if it's missing) @@ -315,13 +319,19 @@ class InfoExtractor(object):      # Helper functions for extracting OpenGraph info      @staticmethod -    def _og_regex(prop): -        return r'<meta.+?property=[\'"]og:%s[\'"].+?content=(?:"(.+?)"|\'(.+?)\')' % re.escape(prop) +    def _og_regexes(prop): +        content_re = r'content=(?:"([^>]+?)"|\'(.+?)\')' +        property_re = r'property=[\'"]og:%s[\'"]' % re.escape(prop) +        template = r'<meta[^>]+?%s[^>]+?%s' +        return [ +            template % (property_re, content_re), +            template % (content_re, property_re), +        ]      def _og_search_property(self, prop, html, name=None, **kargs):          if name is None:              name = 'OpenGraph %s' % prop -        escaped = self._search_regex(self._og_regex(prop), html, name, flags=re.DOTALL, **kargs) +        escaped = self._search_regex(self._og_regexes(prop), html, name, flags=re.DOTALL, **kargs)          if escaped is None:              return None          return unescapeHTML(escaped) @@ -336,10 +346,21 @@ class InfoExtractor(object):          return self._og_search_property('title', html, **kargs)      def _og_search_video_url(self, html, name='video url', secure=True, **kargs): -        regexes = [self._og_regex('video')] -        if secure: regexes.insert(0, self._og_regex('video:secure_url')) +        regexes = self._og_regexes('video') +        if secure: regexes = self._og_regexes('video:secure_url') + regexes          return self._html_search_regex(regexes, html, name, **kargs) +    def _html_search_meta(self, name, html, display_name=None): +        if display_name is None: +            display_name = name +        return self._html_search_regex( +            r'''(?ix)<meta(?=[^>]+(?:name|property)=["\']%s["\']) +                    [^>]+content=["\']([^"\']+)["\']''' % re.escape(name), +            html, display_name, fatal=False) + +    def _dc_search_uploader(self, html): +        return self._html_search_meta('dc.creator', html, 'uploader') +      def _rta_search(self, html):          # See http://www.rtalabel.org/index.php?content=howtofaq#single          if re.search(r'(?ix)<meta\s+name="rating"\s+' @@ -348,6 +369,23 @@ class InfoExtractor(object):              return 18          return 0 +    def _media_rating_search(self, html): +        # See http://www.tjg-designs.com/WP/metadata-code-examples-adding-metadata-to-your-web-pages/ +        rating = self._html_search_meta('rating', html) + +        if not rating: +            return None + +        RATING_TABLE = { +            'safe for kids': 0, +            'general': 8, +            '14 years': 14, +            'mature': 17, +            'restricted': 19, +        } +        return RATING_TABLE.get(rating.lower(), None) + +  class SearchInfoExtractor(InfoExtractor):      """ | 
