diff options
Diffstat (limited to 'youtube_dl/extractor/youtube.py')
| -rw-r--r-- | youtube_dl/extractor/youtube.py | 30 | 
1 files changed, 29 insertions, 1 deletions
diff --git a/youtube_dl/extractor/youtube.py b/youtube_dl/extractor/youtube.py index 7324d8080..1e3ff7d44 100644 --- a/youtube_dl/extractor/youtube.py +++ b/youtube_dl/extractor/youtube.py @@ -2162,7 +2162,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor):          # Youtube Music Auto-generated description          release_date = release_year = None          if video_description: -            mobj = re.search(r'(?s)Provided to YouTube by [^\n]+\n+(?P<track>[^·]+)·(?P<artist>[^\n]+)\n+(?P<album>[^\n]+)(?:.+?℗\s*(?P<release_year>\d{4})(?!\d))?(?:.+?Released on\s*:\s*(?P<release_date>\d{4}-\d{2}-\d{2}))?(.+?\nArtist\s*:\s*(?P<clean_artist>[^\n]+))?', video_description) +            mobj = re.search(r'(?s)(?P<track>[^·\n]+)·(?P<artist>[^\n]+)\n+(?P<album>[^\n]+)(?:.+?℗\s*(?P<release_year>\d{4})(?!\d))?(?:.+?Released on\s*:\s*(?P<release_date>\d{4}-\d{2}-\d{2}))?(.+?\nArtist\s*:\s*(?P<clean_artist>[^\n]+))?.+\nAuto-generated by YouTube\.\s*$', video_description)              if mobj:                  if not track:                      track = mobj.group('track').strip() @@ -2179,6 +2179,34 @@ class YoutubeIE(YoutubeBaseInfoExtractor):                  if release_year:                      release_year = int(release_year) +        yt_initial_data = self._extract_yt_initial_data(video_id, video_webpage) +        contents = try_get(yt_initial_data, lambda x: x['contents']['twoColumnWatchNextResults']['results']['results']['contents'], list) or [] +        for content in contents: +            rows = try_get(content, lambda x: x['videoSecondaryInfoRenderer']['metadataRowContainer']['metadataRowContainerRenderer']['rows'], list) or [] +            multiple_songs = False +            for row in rows: +                if try_get(row, lambda x: x['metadataRowRenderer']['hasDividerLine']) is True: +                    multiple_songs = True +                    break +            for row in rows: +                mrr = row.get('metadataRowRenderer') or {} +                mrr_title = try_get( +                    mrr, lambda x: x['title']['simpleText'], compat_str) +                mrr_contents = try_get( +                    mrr, lambda x: x['contents'][0], dict) or {} +                mrr_contents_text = try_get(mrr_contents, [lambda x: x['simpleText'], lambda x: x['runs'][0]['text']], compat_str) +                if not (mrr_title and mrr_contents_text): +                    continue +                if mrr_title == 'License': +                    video_license = mrr_contents_text +                elif not multiple_songs: +                    if mrr_title == 'Album': +                        album = mrr_contents_text +                    elif mrr_title == 'Artist': +                        artist = mrr_contents_text +                    elif mrr_title == 'Song': +                        track = mrr_contents_text +          m_episode = re.search(              r'<div[^>]+id="watch7-headline"[^>]*>\s*<span[^>]*>.*?>(?P<series>[^<]+)</a></b>\s*S(?P<season>\d+)\s*•\s*E(?P<episode>\d+)</span>',              video_webpage)  | 
