aboutsummaryrefslogtreecommitdiff
path: root/youtube_dl/extractor/youtube.py
diff options
context:
space:
mode:
Diffstat (limited to 'youtube_dl/extractor/youtube.py')
-rw-r--r--youtube_dl/extractor/youtube.py128
1 files changed, 74 insertions, 54 deletions
diff --git a/youtube_dl/extractor/youtube.py b/youtube_dl/extractor/youtube.py
index 73a01107d..225e2b7f4 100644
--- a/youtube_dl/extractor/youtube.py
+++ b/youtube_dl/extractor/youtube.py
@@ -225,7 +225,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor, SubtitlesInfoExtractor):
'272': {'ext': 'webm', 'height': 2160, 'format_note': 'DASH video', 'acodec': 'none', 'preference': -40},
# Dash webm audio
- '171': {'ext': 'webm', 'vcodec': 'none', 'format_note': 'DASH audio', 'abr': 48, 'preference': -50},
+ '171': {'ext': 'webm', 'vcodec': 'none', 'format_note': 'DASH audio', 'abr': 128, 'preference': -50},
'172': {'ext': 'webm', 'vcodec': 'none', 'format_note': 'DASH audio', 'abr': 256, 'preference': -50},
# RTMP (unnamed)
@@ -344,7 +344,11 @@ class YoutubeIE(YoutubeBaseInfoExtractor, SubtitlesInfoExtractor):
"""Indicate the download will use the RTMP protocol."""
self.to_screen(u'RTMP download detected')
- def _extract_signature_function(self, video_id, player_url, slen):
+ def _signature_cache_id(self, example_sig):
+ """ Return a string representation of a signature """
+ return u'.'.join(compat_str(len(part)) for part in example_sig.split('.'))
+
+ def _extract_signature_function(self, video_id, player_url, example_sig):
id_m = re.match(
r'.*-(?P<id>[a-zA-Z0-9_-]+)(?:/watch_as3|/html5player)?\.(?P<ext>[a-z]+)$',
player_url)
@@ -354,7 +358,8 @@ class YoutubeIE(YoutubeBaseInfoExtractor, SubtitlesInfoExtractor):
player_id = id_m.group('id')
# Read from filesystem cache
- func_id = '%s_%s_%d' % (player_type, player_id, slen)
+ func_id = '%s_%s_%s' % (
+ player_type, player_id, self._signature_cache_id(example_sig))
assert os.path.basename(func_id) == func_id
cache_dir = get_cachedir(self._downloader.params)
@@ -369,6 +374,13 @@ class YoutubeIE(YoutubeBaseInfoExtractor, SubtitlesInfoExtractor):
return lambda s: u''.join(s[i] for i in cache_spec)
except IOError:
pass # No cache available
+ except ValueError:
+ try:
+ file_size = os.path.getsize(cache_fn)
+ except (OSError, IOError) as oe:
+ file_size = str(oe)
+ self._downloader.report_warning(
+ u'Cache %s failed (%s)' % (cache_fn, file_size))
if player_type == 'js':
code = self._download_webpage(
@@ -388,7 +400,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor, SubtitlesInfoExtractor):
if cache_enabled:
try:
- test_string = u''.join(map(compat_chr, range(slen)))
+ test_string = u''.join(map(compat_chr, range(len(example_sig))))
cache_res = res(test_string)
cache_spec = [ord(c) for c in cache_res]
try:
@@ -404,7 +416,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor, SubtitlesInfoExtractor):
return res
- def _print_sig_code(self, func, slen):
+ def _print_sig_code(self, func, example_sig):
def gen_sig_code(idxs):
def _genslice(start, end, step):
starts = u'' if start == 0 else str(start)
@@ -433,11 +445,14 @@ class YoutubeIE(YoutubeBaseInfoExtractor, SubtitlesInfoExtractor):
else:
yield _genslice(start, i, step)
- test_string = u''.join(map(compat_chr, range(slen)))
+ test_string = u''.join(map(compat_chr, range(len(example_sig))))
cache_res = func(test_string)
cache_spec = [ord(c) for c in cache_res]
expr_code = u' + '.join(gen_sig_code(cache_spec))
- code = u'if len(s) == %d:\n return %s\n' % (slen, expr_code)
+ signature_id_tuple = '(%s)' % (
+ ', '.join(compat_str(len(p)) for p in example_sig.split('.')))
+ code = (u'if tuple(len(p) for p in s.split(\'.\')) == %s:\n'
+ u' return %s\n') % (signature_id_tuple, expr_code)
self.to_screen(u'Extracted signature function:\n' + code)
def _parse_sig_js(self, jscode):
@@ -465,20 +480,20 @@ class YoutubeIE(YoutubeBaseInfoExtractor, SubtitlesInfoExtractor):
if player_url.startswith(u'//'):
player_url = u'https:' + player_url
try:
- player_id = (player_url, len(s))
+ player_id = (player_url, self._signature_cache_id(s))
if player_id not in self._player_cache:
func = self._extract_signature_function(
- video_id, player_url, len(s)
+ video_id, player_url, s
)
self._player_cache[player_id] = func
func = self._player_cache[player_id]
if self._downloader.params.get('youtube_print_sig_code'):
- self._print_sig_code(func, len(s))
+ self._print_sig_code(func, s)
return func(s)
except Exception as e:
tb = traceback.format_exc()
raise ExtractorError(
- u'Automatic signature extraction failed: ' + tb, cause=e)
+ u'Signature extraction failed: ' + tb, cause=e)
def _get_available_subtitles(self, video_id, webpage):
try:
@@ -613,7 +628,8 @@ class YoutubeIE(YoutubeBaseInfoExtractor, SubtitlesInfoExtractor):
data = compat_urllib_parse.urlencode({
'video_id': video_id,
'eurl': 'https://youtube.googleapis.com/v/' + video_id,
- 'sts':'16268',
+ 'sts': self._search_regex(
+ r'"sts"\s*:\s*(\d+)', video_webpage, 'sts'),
})
video_info_url = proto + '://www.youtube.com/get_video_info?' + data
video_info_webpage = self._download_webpage(video_info_url, video_id,
@@ -807,50 +823,54 @@ class YoutubeIE(YoutubeBaseInfoExtractor, SubtitlesInfoExtractor):
url_map = {}
for url_data_str in encoded_url_map.split(','):
url_data = compat_parse_qs(url_data_str)
- if 'itag' in url_data and 'url' in url_data:
- url = url_data['url'][0]
- if 'sig' in url_data:
- url += '&signature=' + url_data['sig'][0]
- elif 's' in url_data:
- encrypted_sig = url_data['s'][0]
-
- if not age_gate:
- jsplayer_url_json = self._search_regex(
- r'"assets":.+?"js":\s*("[^"]+")',
- video_webpage, u'JS player URL')
- player_url = json.loads(jsplayer_url_json)
+ if 'itag' not in url_data or 'url' not in url_data:
+ continue
+ format_id = url_data['itag'][0]
+ url = url_data['url'][0]
+
+ if 'sig' in url_data:
+ url += '&signature=' + url_data['sig'][0]
+ elif 's' in url_data:
+ encrypted_sig = url_data['s'][0]
+
+ if not age_gate:
+ jsplayer_url_json = self._search_regex(
+ r'"assets":.+?"js":\s*("[^"]+")',
+ video_webpage, u'JS player URL')
+ player_url = json.loads(jsplayer_url_json)
+ if player_url is None:
+ player_url_json = self._search_regex(
+ r'ytplayer\.config.*?"url"\s*:\s*("[^"]+")',
+ video_webpage, u'age gate player URL')
+ player_url = json.loads(player_url_json)
+
+ if self._downloader.params.get('verbose'):
if player_url is None:
- player_url_json = self._search_regex(
- r'ytplayer\.config.*?"url"\s*:\s*("[^"]+")',
- video_webpage, u'age gate player URL')
- player_url = json.loads(player_url_json)
-
- if self._downloader.params.get('verbose'):
- if player_url is None:
- player_version = 'unknown'
- player_desc = 'unknown'
+ player_version = 'unknown'
+ player_desc = 'unknown'
+ else:
+ if player_url.endswith('swf'):
+ player_version = self._search_regex(
+ r'-(.+?)(?:/watch_as3)?\.swf$', player_url,
+ u'flash player', fatal=False)
+ player_desc = 'flash player %s' % player_version
else:
- if player_url.endswith('swf'):
- player_version = self._search_regex(
- r'-(.+?)(?:/watch_as3)?\.swf$', player_url,
- u'flash player', fatal=False)
- player_desc = 'flash player %s' % player_version
- else:
- player_version = self._search_regex(
- r'html5player-(.+?)\.js', video_webpage,
- 'html5 player', fatal=False)
- player_desc = u'html5 player %s' % player_version
-
- parts_sizes = u'.'.join(compat_str(len(part)) for part in encrypted_sig.split('.'))
- self.to_screen(u'encrypted signature length %d (%s), itag %s, %s' %
- (len(encrypted_sig), parts_sizes, url_data['itag'][0], player_desc))
-
- signature = self._decrypt_signature(
- encrypted_sig, video_id, player_url, age_gate)
- url += '&signature=' + signature
- if 'ratebypass' not in url:
- url += '&ratebypass=yes'
- url_map[url_data['itag'][0]] = url
+ player_version = self._search_regex(
+ r'html5player-([^/]+?)(?:/html5player)?\.js',
+ player_url,
+ 'html5 player', fatal=False)
+ player_desc = u'html5 player %s' % player_version
+
+ parts_sizes = self._signature_cache_id(encrypted_sig)
+ self.to_screen(u'{%s} signature length %s, %s' %
+ (format_id, parts_sizes, player_desc))
+
+ signature = self._decrypt_signature(
+ encrypted_sig, video_id, player_url, age_gate)
+ url += '&signature=' + signature
+ if 'ratebypass' not in url:
+ url += '&ratebypass=yes'
+ url_map[format_id] = url
formats = _map_to_format_list(url_map)
elif video_info.get('hlsvp'):
manifest_url = video_info['hlsvp'][0]