diff options
| -rw-r--r-- | youtube_dl/extractor/youtube.py | 123 | 
1 files changed, 122 insertions, 1 deletions
| diff --git a/youtube_dl/extractor/youtube.py b/youtube_dl/extractor/youtube.py index 1bc2c27ad..438eb5aa7 100644 --- a/youtube_dl/extractor/youtube.py +++ b/youtube_dl/extractor/youtube.py @@ -1086,7 +1086,95 @@ class YoutubeIE(YoutubeBaseInfoExtractor):                  'skip_download': True,                  'youtube_include_dash_manifest': False,              }, -        } +        }, +        { +            # artist and track fields should return non-null, per issue #20599 +            'url': 'https://music.youtube.com/watch?v=MgNrAu2pzNs', +            'info_dict': { +                'id': 'MgNrAu2pzNs', +                'ext': 'mp4', +                'title': 'Voyeur Girl', +                'description': 'md5:7ae382a65843d6df2685993e90a8628f', +                'upload_date': '20190312', +                'uploader': 'Various Artists - Topic', +                'uploader_id': 'UCVWKBi1ELZn0QX2CBLSkiyw', +                'artist': 'Stephen', +                'track': 'Voyeur Girl', +                'album': 'it\'s too much love to know my dear', +                'release_date': '20190313', +                'release_year': 2019, +            }, +            'params': { +                'skip_download': True, +            }, +        }, +        { +            # Retrieve 'artist' field from 'Artist:' in video description +            # when it is present on youtube music video +            # Some videos have release_date and no release_year - +            # (release_year should be extracted from release_date) +            # https://github.com/ytdl-org/youtube-dl/pull/20742#issuecomment-485740932 +            'url': 'https://www.youtube.com/watch?v=k0jLE7tTwjY', +            'info_dict': { +                'id': 'k0jLE7tTwjY', +                'ext': 'mp4', +                'title': 'Latch Feat. Sam Smith', +                'description': 'md5:3cb1e8101a7c85fcba9b4fb41b951335', +                'upload_date': '20150110', +                'uploader': 'Various Artists - Topic', +                'uploader_id': 'UCNkEcmYdjrH4RqtNgh7BZ9w', +                'artist': 'Disclosure', +                'track': 'Latch Feat. Sam Smith', +                'album': 'Latch Featuring Sam Smith', +                'release_date': '20121008', +                'release_year': 2012, +            }, +            'params': { +                'skip_download': True, +            }, +        }, +        { +            # handle multiple artists on youtube music video +            'url': 'https://www.youtube.com/watch?v=74qn0eJSjpA', +            'info_dict': { +                'id': '74qn0eJSjpA', +                'ext': 'mp4', +                'title': 'Eastside', +                'description': 'md5:290516bb73dcbfab0dcc4efe6c3de5f2', +                'upload_date': '20180710', +                'uploader': 'Benny Blanco - Topic', +                'uploader_id': 'UCzqz_ksRu_WkIzmivMdIS7A', +                'artist': 'benny blanco, Halsey, Khalid', +                'track': 'Eastside', +                'album': 'Eastside', +                'release_date': '20180713', +                'release_year': 2018, +            }, +            'params': { +                'skip_download': True, +            }, +        }, +        { +            # handle youtube music video with release_year and no release_date +            'url': 'https://www.youtube.com/watch?v=-hcAI0g-f5M', +            'info_dict': { +                'id': '-hcAI0g-f5M', +                'ext': 'mp4', +                'title': 'Put It On Me', +                'description': 'md5:93c55acc682ae7b0c668f2e34e1c069e', +                'upload_date': '20180426', +                'uploader': 'Matt Maeson - Topic', +                'uploader_id': 'UCnEkIGqtGcQMLk73Kp-Q5LQ', +                'artist': 'Matt Maeson', +                'track': 'Put It On Me', +                'album': 'The Hearse', +                'release_date': None, +                'release_year': 2018, +            }, +            'params': { +                'skip_download': True, +            }, +        },      ]      def __init__(self, *args, **kwargs): @@ -2073,6 +2161,36 @@ class YoutubeIE(YoutubeBaseInfoExtractor):          track = extract_meta('Song')          artist = extract_meta('Artist') +        album = None +        release_date = None +        release_year = None + +        description_info = video_description.split('\n\n') +        # If the description of the video has the youtube music auto-generated format, extract additional info +        if len(description_info) >= 5 and description_info[-1] == 'Auto-generated by YouTube.': +            track_artist = description_info[1].split(' · ') +            if len(track_artist) >= 2: +                if track is None: +                    track = track_artist[0] +                if artist is None: +                    artist = re.search(r'Artist: ([^\n]+)', description_info[-2]) +                    if artist: +                        artist = artist.group(1) +                    if artist is None: +                        artist = track_artist[1] +                        # handle multiple artists +                        if len(track_artist) > 2: +                            for i in range(2, len(track_artist)): +                                artist += ', %s' % track_artist[i] +            release_year = re.search(r'℗ ([0-9]+)', video_description) +            if release_year: +                release_year = int_or_none(release_year.group(1)) +            album = description_info[2] +            if description_info[4].startswith('Released on: '): +                release_date = description_info[4].split(': ')[1].replace('-', '') +                # extract release_year from release_date if necessary +                if release_year is None: +                    release_year = int_or_none(release_date[0:4])          m_episode = re.search(              r'<div[^>]+id="watch7-headline"[^>]*>\s*<span[^>]*>.*?>(?P<series>[^<]+)</a></b>\s*S(?P<season>\d+)\s*•\s*E(?P<episode>\d+)</span>', @@ -2226,6 +2344,9 @@ class YoutubeIE(YoutubeBaseInfoExtractor):              'episode_number': episode_number,              'track': track,              'artist': artist, +            'album': album, +            'release_date': release_date, +            'release_year': release_year,          } | 
