diff options
| -rw-r--r-- | youtube_dl/extractor/common.py | 11 | ||||
| -rw-r--r-- | youtube_dl/extractor/youtube.py | 11 | 
2 files changed, 18 insertions, 4 deletions
| diff --git a/youtube_dl/extractor/common.py b/youtube_dl/extractor/common.py index 14b9b4fe2..d54866d1f 100644 --- a/youtube_dl/extractor/common.py +++ b/youtube_dl/extractor/common.py @@ -181,6 +181,7 @@ class InfoExtractor(object):                      by YoutubeDL if it's missing)      categories:     A list of categories that the video falls in, for example                      ["Sports", "Berlin"] +    tags:           A list of tags assigned to the video, e.g. ["sweden", "pop music"]      is_live:        True, False, or None (=unknown). Whether this video is a                      live stream that goes on instead of a fixed-length video.      start_time:     Time in seconds where the reproduction should start, as @@ -630,6 +631,12 @@ class InfoExtractor(object):              template % (content_re, property_re),          ] +    @staticmethod +    def _meta_regex(prop): +        return r'''(?isx)<meta +                    (?=[^>]+(?:itemprop|name|property)=(["\']?)%s\1) +                    [^>]+?content=(["\'])(?P<content>.*?)\2''' % re.escape(prop) +      def _og_search_property(self, prop, html, name=None, **kargs):          if name is None:              name = 'OpenGraph %s' % prop @@ -660,9 +667,7 @@ class InfoExtractor(object):          if display_name is None:              display_name = name          return self._html_search_regex( -            r'''(?isx)<meta -                    (?=[^>]+(?:itemprop|name|property)=(["\']?)%s\1) -                    [^>]+?content=(["\'])(?P<content>.*?)\2''' % re.escape(name), +            self._meta_regex(name),              html, display_name, fatal=fatal, group='content', **kwargs)      def _dc_search_uploader(self, html): diff --git a/youtube_dl/extractor/youtube.py b/youtube_dl/extractor/youtube.py index 0e411bfb6..4c449fd74 100644 --- a/youtube_dl/extractor/youtube.py +++ b/youtube_dl/extractor/youtube.py @@ -329,6 +329,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor):                  'upload_date': '20121002',                  'description': 'test chars:  "\'/\\ä↭𝕐\ntest URL: https://github.com/rg3/youtube-dl/issues/1892\n\nThis is a test video for youtube-dl.\n\nFor more information, contact phihag@phihag.de .',                  'categories': ['Science & Technology'], +                'tags': ['youtube-dl'],                  'like_count': int,                  'dislike_count': int,                  'start_time': 1, @@ -343,7 +344,10 @@ class YoutubeIE(YoutubeBaseInfoExtractor):                  'ext': 'mp4',                  'upload_date': '20120506',                  'title': 'Icona Pop - I Love It (feat. Charli XCX) [OFFICIAL VIDEO]', -                'description': 'md5:fea86fda2d5a5784273df5c7cc994d9f', +                'description': 'md5:782e8651347686cba06e58f71ab51773', +                'tags': ['Icona Pop i love it', 'sweden', 'pop music', 'big beat records', 'big beat', 'charli', +                         'xcx', 'charli xcx', 'girls', 'hbo', 'i love it', "i don't care", 'icona', 'pop', +                         'iconic ep', 'iconic', 'love', 'it'],                  'uploader': 'Icona Pop',                  'uploader_id': 'IconaPop',              } @@ -1072,6 +1076,10 @@ class YoutubeIE(YoutubeBaseInfoExtractor):          else:              video_categories = None +        video_tags = [ +            unescapeHTML(m.group('content')) +            for m in re.finditer(self._meta_regex('og:video:tag'), video_webpage)] +          # description          video_description = get_element_by_id("eow-description", video_webpage)          if video_description: @@ -1260,6 +1268,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor):              'thumbnail': video_thumbnail,              'description': video_description,              'categories': video_categories, +            'tags': video_tags,              'subtitles': video_subtitles,              'automatic_captions': automatic_captions,              'duration': video_duration, | 
