diff options
| -rw-r--r-- | youtube_dl/extractor/youtube.py | 14 | 
1 files changed, 10 insertions, 4 deletions
| diff --git a/youtube_dl/extractor/youtube.py b/youtube_dl/extractor/youtube.py index c0fafbfd5..4c449fd74 100644 --- a/youtube_dl/extractor/youtube.py +++ b/youtube_dl/extractor/youtube.py @@ -329,6 +329,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor):                  'upload_date': '20121002',                  'description': 'test chars:  "\'/\\ä↭𝕐\ntest URL: https://github.com/rg3/youtube-dl/issues/1892\n\nThis is a test video for youtube-dl.\n\nFor more information, contact phihag@phihag.de .',                  'categories': ['Science & Technology'], +                'tags': ['youtube-dl'],                  'like_count': int,                  'dislike_count': int,                  'start_time': 1, @@ -343,7 +344,10 @@ class YoutubeIE(YoutubeBaseInfoExtractor):                  'ext': 'mp4',                  'upload_date': '20120506',                  'title': 'Icona Pop - I Love It (feat. Charli XCX) [OFFICIAL VIDEO]', -                'description': 'md5:fea86fda2d5a5784273df5c7cc994d9f', +                'description': 'md5:782e8651347686cba06e58f71ab51773', +                'tags': ['Icona Pop i love it', 'sweden', 'pop music', 'big beat records', 'big beat', 'charli', +                         'xcx', 'charli xcx', 'girls', 'hbo', 'i love it', "i don't care", 'icona', 'pop', +                         'iconic ep', 'iconic', 'love', 'it'],                  'uploader': 'Icona Pop',                  'uploader_id': 'IconaPop',              } @@ -1072,8 +1076,10 @@ class YoutubeIE(YoutubeBaseInfoExtractor):          else:              video_categories = None -        video_tags = re.findall(r'''<meta(?:\s+[a-zA-Z0-9:._-]+(?:=[a-zA-Z0-9:._-]+|="[^"]+"|='[^']+'))*?\s+property=['"]?og:video:tag['"]?(?:\s+[a-zA-Z0-9:._-]+(?:=[a-zA-Z0-9:._-]+|="[^"]+"|='[^']+'))*?\s+content=['"]?([^>'"]+?)['"]?\s*>''' -        , video_webpage, re.DOTALL | re.IGNORECASE); +        video_tags = [ +            unescapeHTML(m.group('content')) +            for m in re.finditer(self._meta_regex('og:video:tag'), video_webpage)] +          # description          video_description = get_element_by_id("eow-description", video_webpage)          if video_description: @@ -1261,8 +1267,8 @@ class YoutubeIE(YoutubeBaseInfoExtractor):              'title': video_title,              'thumbnail': video_thumbnail,              'description': video_description, -            'tags' : video_tags,              'categories': video_categories, +            'tags': video_tags,              'subtitles': video_subtitles,              'automatic_captions': automatic_captions,              'duration': video_duration, | 
