aboutsummaryrefslogtreecommitdiff
path: root/youtube_dl/extractor/youtube.py
diff options
context:
space:
mode:
Diffstat (limited to 'youtube_dl/extractor/youtube.py')
-rw-r--r--youtube_dl/extractor/youtube.py84
1 files changed, 32 insertions, 52 deletions
diff --git a/youtube_dl/extractor/youtube.py b/youtube_dl/extractor/youtube.py
index 7f5aeb25b..bc18276d6 100644
--- a/youtube_dl/extractor/youtube.py
+++ b/youtube_dl/extractor/youtube.py
@@ -256,7 +256,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor, SubtitlesInfoExtractor):
'135': {'ext': 'mp4', 'height': 480, 'format_note': 'DASH video', 'acodec': 'none', 'preference': -40},
'136': {'ext': 'mp4', 'height': 720, 'format_note': 'DASH video', 'acodec': 'none', 'preference': -40},
'137': {'ext': 'mp4', 'height': 1080, 'format_note': 'DASH video', 'acodec': 'none', 'preference': -40},
- '138': {'ext': 'mp4', 'height': 2160, 'format_note': 'DASH video', 'acodec': 'none', 'preference': -40},
+ '138': {'ext': 'mp4', 'format_note': 'DASH video', 'acodec': 'none', 'preference': -40}, # Height can vary (https://github.com/rg3/youtube-dl/issues/4559)
'160': {'ext': 'mp4', 'height': 144, 'format_note': 'DASH video', 'acodec': 'none', 'preference': -40},
'264': {'ext': 'mp4', 'height': 1440, 'format_note': 'DASH video', 'acodec': 'none', 'preference': -40},
'298': {'ext': 'mp4', 'height': 720, 'format_note': 'DASH video', 'acodec': 'none', 'preference': -40, 'fps': 60, 'vcodec': 'h264'},
@@ -264,9 +264,9 @@ class YoutubeIE(YoutubeBaseInfoExtractor, SubtitlesInfoExtractor):
'266': {'ext': 'mp4', 'height': 2160, 'format_note': 'DASH video', 'acodec': 'none', 'preference': -40, 'vcodec': 'h264'},
# Dash mp4 audio
- '139': {'ext': 'm4a', 'format_note': 'DASH audio', 'vcodec': 'none', 'abr': 48, 'preference': -50},
- '140': {'ext': 'm4a', 'format_note': 'DASH audio', 'vcodec': 'none', 'abr': 128, 'preference': -50},
- '141': {'ext': 'm4a', 'format_note': 'DASH audio', 'vcodec': 'none', 'abr': 256, 'preference': -50},
+ '139': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'aac', 'vcodec': 'none', 'abr': 48, 'preference': -50},
+ '140': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'aac', 'vcodec': 'none', 'abr': 128, 'preference': -50},
+ '141': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'aac', 'vcodec': 'none', 'abr': 256, 'preference': -50},
# Dash webm
'167': {'ext': 'webm', 'height': 360, 'width': 640, 'format_note': 'DASH video', 'acodec': 'none', 'container': 'webm', 'vcodec': 'VP8', 'preference': -40},
@@ -287,7 +287,9 @@ class YoutubeIE(YoutubeBaseInfoExtractor, SubtitlesInfoExtractor):
'272': {'ext': 'webm', 'height': 2160, 'format_note': 'DASH video', 'acodec': 'none', 'preference': -40},
'302': {'ext': 'webm', 'height': 720, 'format_note': 'DASH video', 'acodec': 'none', 'preference': -40, 'fps': 60, 'vcodec': 'VP9'},
'303': {'ext': 'webm', 'height': 1080, 'format_note': 'DASH video', 'acodec': 'none', 'preference': -40, 'fps': 60, 'vcodec': 'VP9'},
+ '308': {'ext': 'webm', 'height': 1440, 'format_note': 'DASH video', 'acodec': 'none', 'preference': -40, 'fps': 60, 'vcodec': 'VP9'},
'313': {'ext': 'webm', 'height': 2160, 'format_note': 'DASH video', 'acodec': 'none', 'preference': -40, 'vcodec': 'VP9'},
+ '315': {'ext': 'webm', 'height': 2160, 'format_note': 'DASH video', 'acodec': 'none', 'preference': -40, 'fps': 60, 'vcodec': 'VP9'},
# Dash webm audio
'171': {'ext': 'webm', 'vcodec': 'none', 'format_note': 'DASH audio', 'abr': 128, 'preference': -50},
@@ -412,7 +414,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor, SubtitlesInfoExtractor):
'id': 'HtVdAasjOgU',
'ext': 'mp4',
'title': 'The Witcher 3: Wild Hunt - The Sword Of Destiny Trailer',
- 'description': 'md5:eca57043abae25130f58f655ad9a7771',
+ 'description': 're:(?s).{100,}About the Game\n.*?The Witcher 3: Wild Hunt.{100,}',
'uploader': 'The Witcher',
'uploader_id': 'WitcherGame',
'upload_date': '20140605',
@@ -736,6 +738,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor, SubtitlesInfoExtractor):
'format_id': format_id,
'url': video_url,
'width': int_or_none(r.attrib.get('width')),
+ 'height': int_or_none(r.attrib.get('height')),
'tbr': int_or_none(r.attrib.get('bandwidth'), 1000),
'asr': int_or_none(r.attrib.get('audioSamplingRate')),
'filesize': filesize,
@@ -746,7 +749,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor, SubtitlesInfoExtractor):
fo for fo in formats
if fo['format_id'] == format_id)
except StopIteration:
- f.update(self._formats.get(format_id, {}))
+ f.update(self._formats.get(format_id, {}).items())
formats.append(f)
else:
existing_format.update(f)
@@ -1040,6 +1043,12 @@ class YoutubeIE(YoutubeBaseInfoExtractor, SubtitlesInfoExtractor):
self.report_warning(
'Skipping DASH manifest: %r' % e, video_id)
else:
+ # Hide the formats we found through non-DASH
+ dash_keys = set(df['format_id'] for df in dash_formats)
+ for f in formats:
+ if f['format_id'] in dash_keys:
+ f['format_id'] = 'nondash-%s' % f['format_id']
+ f['preference'] = f.get('preference', 0) - 10000
formats.extend(dash_formats)
self._sort_formats(formats)
@@ -1199,9 +1208,6 @@ class YoutubePlaylistIE(YoutubeBaseInfoExtractor):
if playlist_id.startswith('RD'):
# Mixes require a custom extraction process
return self._extract_mix(playlist_id)
- if playlist_id.startswith('TL'):
- raise ExtractorError('For downloading YouTube.com top lists, use '
- 'the "yttoplist" keyword, for example "youtube-dl \'yttoplist:music:Top Tracks\'"', expected=True)
url = self._TEMPLATE_URL % playlist_id
page = self._download_webpage(url, playlist_id)
@@ -1247,49 +1253,6 @@ class YoutubePlaylistIE(YoutubeBaseInfoExtractor):
return self.playlist_result(url_results, playlist_id, playlist_title)
-class YoutubeTopListIE(YoutubePlaylistIE):
- IE_NAME = 'youtube:toplist'
- IE_DESC = ('YouTube.com top lists, "yttoplist:{channel}:{list title}"'
- ' (Example: "yttoplist:music:Top Tracks")')
- _VALID_URL = r'yttoplist:(?P<chann>.*?):(?P<title>.*?)$'
- _TESTS = [{
- 'url': 'yttoplist:music:Trending',
- 'playlist_mincount': 5,
- 'skip': 'Only works for logged-in users',
- }]
-
- def _real_extract(self, url):
- mobj = re.match(self._VALID_URL, url)
- channel = mobj.group('chann')
- title = mobj.group('title')
- query = compat_urllib_parse.urlencode({'title': title})
- channel_page = self._download_webpage(
- 'https://www.youtube.com/%s' % channel, title)
- link = self._html_search_regex(
- r'''(?x)
- <a\s+href="([^"]+)".*?>\s*
- <span\s+class="branded-page-module-title-text">\s*
- <span[^>]*>.*?%s.*?</span>''' % re.escape(query),
- channel_page, 'list')
- url = compat_urlparse.urljoin('https://www.youtube.com/', link)
-
- video_re = r'data-index="\d+".*?data-video-id="([0-9A-Za-z_-]{11})"'
- ids = []
- # sometimes the webpage doesn't contain the videos
- # retry until we get them
- for i in itertools.count(0):
- msg = 'Downloading Youtube mix'
- if i > 0:
- msg += ', retry #%d' % i
-
- webpage = self._download_webpage(url, title, msg)
- ids = orderedSet(re.findall(video_re, webpage))
- if ids:
- break
- url_results = self._ids_to_results(ids)
- return self.playlist_result(url_results, playlist_title=title)
-
-
class YoutubeChannelIE(InfoExtractor):
IE_DESC = 'YouTube.com channels'
_VALID_URL = r'https?://(?:youtu\.be|(?:\w+\.)?youtube(?:-nocookie)?\.com)/channel/(?P<id>[0-9A-Za-z_-]+)'
@@ -1701,3 +1664,20 @@ class YoutubeTruncatedURLIE(InfoExtractor):
'"http://www.youtube.com/watch?feature=foo&v=BaW_jenozKc" '
' or simply youtube-dl BaW_jenozKc .',
expected=True)
+
+
+class YoutubeTruncatedIDIE(InfoExtractor):
+ IE_NAME = 'youtube:truncated_id'
+ IE_DESC = False # Do not list
+ _VALID_URL = r'https?://(?:www\.)youtube\.com/watch\?v=(?P<id>[0-9A-Za-z_-]{1,10})$'
+
+ _TESTS = [{
+ 'url': 'https://www.youtube.com/watch?v=N_708QY7Ob',
+ 'only_matching': True,
+ }]
+
+ def _real_extract(self, url):
+ video_id = self._match_id(url)
+ raise ExtractorError(
+ 'Incomplete YouTube ID %s. URL %s looks truncated.' % (video_id, url),
+ expected=True)