aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--youtube_dl/extractor/youtube.py123
1 files changed, 122 insertions, 1 deletions
diff --git a/youtube_dl/extractor/youtube.py b/youtube_dl/extractor/youtube.py
index 1bc2c27ad..438eb5aa7 100644
--- a/youtube_dl/extractor/youtube.py
+++ b/youtube_dl/extractor/youtube.py
@@ -1086,7 +1086,95 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
'skip_download': True,
'youtube_include_dash_manifest': False,
},
- }
+ },
+ {
+ # artist and track fields should return non-null, per issue #20599
+ 'url': 'https://music.youtube.com/watch?v=MgNrAu2pzNs',
+ 'info_dict': {
+ 'id': 'MgNrAu2pzNs',
+ 'ext': 'mp4',
+ 'title': 'Voyeur Girl',
+ 'description': 'md5:7ae382a65843d6df2685993e90a8628f',
+ 'upload_date': '20190312',
+ 'uploader': 'Various Artists - Topic',
+ 'uploader_id': 'UCVWKBi1ELZn0QX2CBLSkiyw',
+ 'artist': 'Stephen',
+ 'track': 'Voyeur Girl',
+ 'album': 'it\'s too much love to know my dear',
+ 'release_date': '20190313',
+ 'release_year': 2019,
+ },
+ 'params': {
+ 'skip_download': True,
+ },
+ },
+ {
+ # Retrieve 'artist' field from 'Artist:' in video description
+ # when it is present on youtube music video
+ # Some videos have release_date and no release_year -
+ # (release_year should be extracted from release_date)
+ # https://github.com/ytdl-org/youtube-dl/pull/20742#issuecomment-485740932
+ 'url': 'https://www.youtube.com/watch?v=k0jLE7tTwjY',
+ 'info_dict': {
+ 'id': 'k0jLE7tTwjY',
+ 'ext': 'mp4',
+ 'title': 'Latch Feat. Sam Smith',
+ 'description': 'md5:3cb1e8101a7c85fcba9b4fb41b951335',
+ 'upload_date': '20150110',
+ 'uploader': 'Various Artists - Topic',
+ 'uploader_id': 'UCNkEcmYdjrH4RqtNgh7BZ9w',
+ 'artist': 'Disclosure',
+ 'track': 'Latch Feat. Sam Smith',
+ 'album': 'Latch Featuring Sam Smith',
+ 'release_date': '20121008',
+ 'release_year': 2012,
+ },
+ 'params': {
+ 'skip_download': True,
+ },
+ },
+ {
+ # handle multiple artists on youtube music video
+ 'url': 'https://www.youtube.com/watch?v=74qn0eJSjpA',
+ 'info_dict': {
+ 'id': '74qn0eJSjpA',
+ 'ext': 'mp4',
+ 'title': 'Eastside',
+ 'description': 'md5:290516bb73dcbfab0dcc4efe6c3de5f2',
+ 'upload_date': '20180710',
+ 'uploader': 'Benny Blanco - Topic',
+ 'uploader_id': 'UCzqz_ksRu_WkIzmivMdIS7A',
+ 'artist': 'benny blanco, Halsey, Khalid',
+ 'track': 'Eastside',
+ 'album': 'Eastside',
+ 'release_date': '20180713',
+ 'release_year': 2018,
+ },
+ 'params': {
+ 'skip_download': True,
+ },
+ },
+ {
+ # handle youtube music video with release_year and no release_date
+ 'url': 'https://www.youtube.com/watch?v=-hcAI0g-f5M',
+ 'info_dict': {
+ 'id': '-hcAI0g-f5M',
+ 'ext': 'mp4',
+ 'title': 'Put It On Me',
+ 'description': 'md5:93c55acc682ae7b0c668f2e34e1c069e',
+ 'upload_date': '20180426',
+ 'uploader': 'Matt Maeson - Topic',
+ 'uploader_id': 'UCnEkIGqtGcQMLk73Kp-Q5LQ',
+ 'artist': 'Matt Maeson',
+ 'track': 'Put It On Me',
+ 'album': 'The Hearse',
+ 'release_date': None,
+ 'release_year': 2018,
+ },
+ 'params': {
+ 'skip_download': True,
+ },
+ },
]
def __init__(self, *args, **kwargs):
@@ -2073,6 +2161,36 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
track = extract_meta('Song')
artist = extract_meta('Artist')
+ album = None
+ release_date = None
+ release_year = None
+
+ description_info = video_description.split('\n\n')
+ # If the description of the video has the youtube music auto-generated format, extract additional info
+ if len(description_info) >= 5 and description_info[-1] == 'Auto-generated by YouTube.':
+ track_artist = description_info[1].split(' · ')
+ if len(track_artist) >= 2:
+ if track is None:
+ track = track_artist[0]
+ if artist is None:
+ artist = re.search(r'Artist: ([^\n]+)', description_info[-2])
+ if artist:
+ artist = artist.group(1)
+ if artist is None:
+ artist = track_artist[1]
+ # handle multiple artists
+ if len(track_artist) > 2:
+ for i in range(2, len(track_artist)):
+ artist += ', %s' % track_artist[i]
+ release_year = re.search(r'℗ ([0-9]+)', video_description)
+ if release_year:
+ release_year = int_or_none(release_year.group(1))
+ album = description_info[2]
+ if description_info[4].startswith('Released on: '):
+ release_date = description_info[4].split(': ')[1].replace('-', '')
+ # extract release_year from release_date if necessary
+ if release_year is None:
+ release_year = int_or_none(release_date[0:4])
m_episode = re.search(
r'<div[^>]+id="watch7-headline"[^>]*>\s*<span[^>]*>.*?>(?P<series>[^<]+)</a></b>\s*S(?P<season>\d+)\s*•\s*E(?P<episode>\d+)</span>',
@@ -2226,6 +2344,9 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
'episode_number': episode_number,
'track': track,
'artist': artist,
+ 'album': album,
+ 'release_date': release_date,
+ 'release_year': release_year,
}