aboutsummaryrefslogtreecommitdiff
path: root/youtube_dl/extractor
diff options
context:
space:
mode:
Diffstat (limited to 'youtube_dl/extractor')
-rw-r--r--youtube_dl/extractor/common.py90
-rw-r--r--youtube_dl/extractor/youtube.py514
2 files changed, 420 insertions, 184 deletions
diff --git a/youtube_dl/extractor/common.py b/youtube_dl/extractor/common.py
index cb67b976d..a64fcfccc 100644
--- a/youtube_dl/extractor/common.py
+++ b/youtube_dl/extractor/common.py
@@ -505,7 +505,7 @@ class InfoExtractor(object):
if not self._x_forwarded_for_ip:
# Geo bypass mechanism is explicitly disabled by user
- if not self._downloader.params.get('geo_bypass', True):
+ if not self.get_param('geo_bypass', True):
return
if not geo_bypass_context:
@@ -527,7 +527,7 @@ class InfoExtractor(object):
# Explicit IP block specified by user, use it right away
# regardless of whether extractor is geo bypassable or not
- ip_block = self._downloader.params.get('geo_bypass_ip_block', None)
+ ip_block = self.get_param('geo_bypass_ip_block', None)
# Otherwise use random IP block from geo bypass context but only
# if extractor is known as geo bypassable
@@ -538,8 +538,8 @@ class InfoExtractor(object):
if ip_block:
self._x_forwarded_for_ip = GeoUtils.random_ipv4(ip_block)
- if self._downloader.params.get('verbose', False):
- self._downloader.to_screen(
+ if self.get_param('verbose', False):
+ self.to_screen(
'[debug] Using fake IP %s as X-Forwarded-For.'
% self._x_forwarded_for_ip)
return
@@ -548,7 +548,7 @@ class InfoExtractor(object):
# Explicit country code specified by user, use it right away
# regardless of whether extractor is geo bypassable or not
- country = self._downloader.params.get('geo_bypass_country', None)
+ country = self.get_param('geo_bypass_country', None)
# Otherwise use random country code from geo bypass context but
# only if extractor is known as geo bypassable
@@ -559,8 +559,8 @@ class InfoExtractor(object):
if country:
self._x_forwarded_for_ip = GeoUtils.random_ipv4(country)
- if self._downloader.params.get('verbose', False):
- self._downloader.to_screen(
+ if self.get_param('verbose', False):
+ self.to_screen(
'[debug] Using fake IP %s (%s) as X-Forwarded-For.'
% (self._x_forwarded_for_ip, country.upper()))
@@ -586,9 +586,9 @@ class InfoExtractor(object):
raise ExtractorError('An extractor error has occurred.', cause=e)
def __maybe_fake_ip_and_retry(self, countries):
- if (not self._downloader.params.get('geo_bypass_country', None)
+ if (not self.get_param('geo_bypass_country', None)
and self._GEO_BYPASS
- and self._downloader.params.get('geo_bypass', True)
+ and self.get_param('geo_bypass', True)
and not self._x_forwarded_for_ip
and countries):
country_code = random.choice(countries)
@@ -698,7 +698,7 @@ class InfoExtractor(object):
if fatal:
raise ExtractorError(errmsg, sys.exc_info()[2], cause=err)
else:
- self._downloader.report_warning(errmsg)
+ self.report_warning(errmsg)
return False
def _download_webpage_handle(self, url_or_request, video_id, note=None, errnote=None, fatal=True, encoding=None, data=None, headers={}, query={}, expected_status=None):
@@ -770,11 +770,11 @@ class InfoExtractor(object):
webpage_bytes = prefix + webpage_bytes
if not encoding:
encoding = self._guess_encoding_from_content(content_type, webpage_bytes)
- if self._downloader.params.get('dump_intermediate_pages', False):
+ if self.get_param('dump_intermediate_pages', False):
self.to_screen('Dumping request to ' + urlh.geturl())
dump = base64.b64encode(webpage_bytes).decode('ascii')
- self._downloader.to_screen(dump)
- if self._downloader.params.get('write_pages', False):
+ self.to_screen(dump)
+ if self.get_param('write_pages', False):
basen = '%s_%s' % (video_id, urlh.geturl())
if len(basen) > 240:
h = '___' + hashlib.md5(basen.encode('utf-8')).hexdigest()
@@ -976,19 +976,9 @@ class InfoExtractor(object):
"""Print msg to screen, prefixing it with '[ie_name]'"""
self._downloader.to_screen(self.__ie_msg(msg))
- def write_debug(self, msg, only_once=False, _cache=[]):
+ def write_debug(self, msg, only_once=False):
'''Log debug message or Print message to stderr'''
- if not self.get_param('verbose', False):
- return
- message = '[debug] ' + self.__ie_msg(msg)
- logger = self.get_param('logger')
- if logger:
- logger.debug(message)
- else:
- if only_once and hash(message) in _cache:
- return
- self._downloader.to_stderr(message)
- _cache.append(hash(message))
+ self._downloader.write_debug(self.__ie_msg(msg), only_once=only_once)
# name, default=None, *args, **kwargs
def get_param(self, name, *args, **kwargs):
@@ -1084,7 +1074,7 @@ class InfoExtractor(object):
if mobj:
break
- if not self._downloader.params.get('no_color') and compat_os_name != 'nt' and sys.stderr.isatty():
+ if not self.get_param('no_color') and compat_os_name != 'nt' and sys.stderr.isatty():
_name = '\033[0;34m%s\033[0m' % name
else:
_name = name
@@ -1102,7 +1092,7 @@ class InfoExtractor(object):
elif fatal:
raise RegexNotFoundError('Unable to extract %s' % _name)
else:
- self._downloader.report_warning('unable to extract %s' % _name + bug_reports_message())
+ self.report_warning('unable to extract %s' % _name + bug_reports_message())
return None
def _search_json(self, start_pattern, string, name, video_id, **kwargs):
@@ -1172,7 +1162,7 @@ class InfoExtractor(object):
username = None
password = None
- if self._downloader.params.get('usenetrc', False):
+ if self.get_param('usenetrc', False):
try:
netrc_machine = netrc_machine or self._NETRC_MACHINE
info = netrc.netrc().authenticators(netrc_machine)
@@ -1183,7 +1173,7 @@ class InfoExtractor(object):
raise netrc.NetrcParseError(
'No authenticators for %s' % netrc_machine)
except (AttributeError, IOError, netrc.NetrcParseError) as err:
- self._downloader.report_warning(
+ self.report_warning(
'parsing .netrc: %s' % error_to_compat_str(err))
return username, password
@@ -1220,10 +1210,10 @@ class InfoExtractor(object):
"""
if self._downloader is None:
return None
- downloader_params = self._downloader.params
- if downloader_params.get('twofactor') is not None:
- return downloader_params['twofactor']
+ twofactor = self.get_param('twofactor')
+ if twofactor is not None:
+ return twofactor
return compat_getpass('Type %s and press [Return]: ' % note)
@@ -1358,7 +1348,7 @@ class InfoExtractor(object):
elif fatal:
raise RegexNotFoundError('Unable to extract JSON-LD')
else:
- self._downloader.report_warning('unable to extract JSON-LD %s' % bug_reports_message())
+ self.report_warning('unable to extract JSON-LD %s' % bug_reports_message())
return {}
def _json_ld(self, json_ld, video_id, fatal=True, expected_type=None):
@@ -1589,7 +1579,7 @@ class InfoExtractor(object):
if f.get('vcodec') == 'none': # audio only
preference -= 50
- if self._downloader.params.get('prefer_free_formats'):
+ if self.get_param('prefer_free_formats'):
ORDER = ['aac', 'mp3', 'm4a', 'webm', 'ogg', 'opus']
else:
ORDER = ['webm', 'opus', 'ogg', 'mp3', 'aac', 'm4a']
@@ -1601,7 +1591,7 @@ class InfoExtractor(object):
else:
if f.get('acodec') == 'none': # video only
preference -= 40
- if self._downloader.params.get('prefer_free_formats'):
+ if self.get_param('prefer_free_formats'):
ORDER = ['flv', 'mp4', 'webm']
else:
ORDER = ['webm', 'flv', 'mp4']
@@ -1667,7 +1657,7 @@ class InfoExtractor(object):
""" Either "http:" or "https:", depending on the user's preferences """
return (
'http:'
- if self._downloader.params.get('prefer_insecure', False)
+ if self.get_param('prefer_insecure', False)
else 'https:')
def _proto_relative_url(self, url, scheme=None):
@@ -3199,7 +3189,7 @@ class InfoExtractor(object):
if fatal:
raise ExtractorError(msg)
else:
- self._downloader.report_warning(msg)
+ self.report_warning(msg)
return res
def _float(self, v, name, fatal=False, **kwargs):
@@ -3209,7 +3199,7 @@ class InfoExtractor(object):
if fatal:
raise ExtractorError(msg)
else:
- self._downloader.report_warning(msg)
+ self.report_warning(msg)
return res
def _set_cookie(self, domain, name, value, expire_time=None, port=None,
@@ -3218,12 +3208,12 @@ class InfoExtractor(object):
0, name, value, port, port is not None, domain, True,
domain.startswith('.'), path, True, secure, expire_time,
discard, None, None, rest)
- self._downloader.cookiejar.set_cookie(cookie)
+ self.cookiejar.set_cookie(cookie)
def _get_cookies(self, url):
""" Return a compat_cookies_SimpleCookie with the cookies for the url """
req = sanitized_Request(url)
- self._downloader.cookiejar.add_cookie_header(req)
+ self.cookiejar.add_cookie_header(req)
return compat_cookies_SimpleCookie(req.get_header('Cookie'))
def _apply_first_set_cookie_header(self, url_handle, cookie):
@@ -3283,8 +3273,8 @@ class InfoExtractor(object):
return not any_restricted
def extract_subtitles(self, *args, **kwargs):
- if (self._downloader.params.get('writesubtitles', False)
- or self._downloader.params.get('listsubtitles')):
+ if (self.get_param('writesubtitles', False)
+ or self.get_param('listsubtitles')):
return self._get_subtitles(*args, **kwargs)
return {}
@@ -3305,7 +3295,11 @@ class InfoExtractor(object):
""" Merge subtitle dictionaries, language by language. """
# ..., * , target=None
- target = kwargs.get('target') or dict(subtitle_dict1)
+ target = kwargs.get('target')
+ if target is None:
+ target = dict(subtitle_dict1)
+ else:
+ subtitle_dicts = (subtitle_dict1,) + subtitle_dicts
for subtitle_dict in subtitle_dicts:
for lang in subtitle_dict:
@@ -3313,8 +3307,8 @@ class InfoExtractor(object):
return target
def extract_automatic_captions(self, *args, **kwargs):
- if (self._downloader.params.get('writeautomaticsub', False)
- or self._downloader.params.get('listsubtitles')):
+ if (self.get_param('writeautomaticsub', False)
+ or self.get_param('listsubtitles')):
return self._get_automatic_captions(*args, **kwargs)
return {}
@@ -3322,9 +3316,9 @@ class InfoExtractor(object):
raise NotImplementedError('This method must be implemented by subclasses')
def mark_watched(self, *args, **kwargs):
- if (self._downloader.params.get('mark_watched', False)
+ if (self.get_param('mark_watched', False)
and (self._get_login_info()[0] is not None
- or self._downloader.params.get('cookiefile') is not None)):
+ or self.get_param('cookiefile') is not None)):
self._mark_watched(*args, **kwargs)
def _mark_watched(self, *args, **kwargs):
@@ -3332,7 +3326,7 @@ class InfoExtractor(object):
def geo_verification_headers(self):
headers = {}
- geo_verification_proxy = self._downloader.params.get('geo_verification_proxy')
+ geo_verification_proxy = self.get_param('geo_verification_proxy')
if geo_verification_proxy:
headers['Ytdl-request-proxy'] = geo_verification_proxy
return headers
diff --git a/youtube_dl/extractor/youtube.py b/youtube_dl/extractor/youtube.py
index ce97fd75b..0b802351d 100644
--- a/youtube_dl/extractor/youtube.py
+++ b/youtube_dl/extractor/youtube.py
@@ -1,5 +1,4 @@
# coding: utf-8
-
from __future__ import unicode_literals
import collections
@@ -49,6 +48,7 @@ from ..utils import (
parse_duration,
parse_qs,
qualities,
+ remove_end,
remove_start,
smuggle_url,
str_or_none,
@@ -109,7 +109,7 @@ class YoutubeBaseInfoExtractor(InfoExtractor):
'INNERTUBE_CONTEXT': {
'client': {
'clientName': 'MWEB',
- 'clientVersion': '2.20250311.03.00',
+ 'clientVersion': '2.2.20250925.01.00',
# mweb previously did not require PO Token with this UA
'userAgent': 'Mozilla/5.0 (iPad; CPU OS 16_7_10 like Mac OS X) AppleWebKit/605.1.15 (KHTML, like Gecko) Version/16.6 Mobile/15E148 Safari/604.1,gzip(gfe)',
},
@@ -123,23 +123,36 @@ class YoutubeBaseInfoExtractor(InfoExtractor):
'client': {
'clientName': 'TVHTML5',
'clientVersion': '7.20250312.16.00',
- 'userAgent': 'Mozilla/5.0 (ChromiumStylePlatform) Cobalt/Version',
+ # See: https://github.com/youtube/cobalt/blob/main/cobalt/browser/user_agent/user_agent_platform_info.cc#L506
+ 'userAgent': 'Mozilla/5.0 (ChromiumStylePlatform) Cobalt/25.lts.30.1034943-gold (unlike Gecko), Unknown_TV_Unknown_0/Unknown (Unknown, Unknown)',
},
},
'INNERTUBE_CONTEXT_CLIENT_NAME': 7,
'SUPPORTS_COOKIES': True,
},
+
'web': {
'INNERTUBE_CONTEXT': {
'client': {
'clientName': 'WEB',
- 'clientVersion': '2.20250312.04.00',
+ 'clientVersion': '2.20250925.01.00',
+ 'userAgent': 'Mozilla/5.0',
},
},
'INNERTUBE_CONTEXT_CLIENT_NAME': 1,
'REQUIRE_PO_TOKEN': True,
'SUPPORTS_COOKIES': True,
},
+ # Safari UA returns pre-merged video+audio 144p/240p/360p/720p/1080p HLS formats
+ 'web_safari': {
+ 'INNERTUBE_CONTEXT': {
+ 'client': {
+ 'clientName': 'WEB',
+ 'clientVersion': '2.20250925.01.00',
+ 'userAgent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/605.1.15 (KHTML, like Gecko) Version/15.5 Safari/605.1.15,gzip(gfe)',
+ },
+ },
+ },
}
def _login(self):
@@ -342,14 +355,7 @@ class YoutubeBaseInfoExtractor(InfoExtractor):
if not self._login():
return
- _DEFAULT_API_DATA = {
- 'context': {
- 'client': {
- 'clientName': 'WEB',
- 'clientVersion': '2.20201021.03.00',
- },
- },
- }
+ _DEFAULT_API_DATA = {'context': _INNERTUBE_CLIENTS['web']['INNERTUBE_CONTEXT']}
_YT_INITIAL_DATA_RE = r'(?:window\s*\[\s*["\']ytInitialData["\']\s*\]|ytInitialData)\s*=\s*({.+?})\s*;'
_YT_INITIAL_PLAYER_RESPONSE_RE = r'ytInitialPlayerResponse\s*=\s*({.+?})\s*;'
@@ -425,10 +431,15 @@ class YoutubeBaseInfoExtractor(InfoExtractor):
T(compat_str)))
def _extract_ytcfg(self, video_id, webpage):
- return self._parse_json(
- self._search_regex(
- r'ytcfg\.set\s*\(\s*({.+?})\s*\)\s*;', webpage, 'ytcfg',
- default='{}'), video_id, fatal=False) or {}
+ ytcfg = self._search_json(
+ r'ytcfg\.set\s*\(', webpage, 'ytcfg', video_id,
+ end_pattern=r'\)\s*;', default={})
+
+ traverse_obj(ytcfg, (
+ 'INNERTUBE_CONTEXT', 'client', 'configInfo',
+ T(lambda x: x.pop('appInstallData', None))))
+
+ return ytcfg
def _extract_video(self, renderer):
video_id = renderer['videoId']
@@ -497,11 +508,15 @@ class YoutubeBaseInfoExtractor(InfoExtractor):
data['params'] = params
for page_num in itertools.count(1):
search = self._download_json(
- 'https://www.youtube.com/youtubei/v1/search?key=AIzaSyAO_FJ2SlqU8Q4STEHLGCilw_Y9_11qcW8',
+ 'https://www.youtube.com/youtubei/v1/search',
video_id='query "%s"' % query,
note='Downloading page %s' % page_num,
errnote='Unable to download API page', fatal=False,
data=json.dumps(data).encode('utf8'),
+ query={
+ # 'key': 'AIzaSyAO_FJ2SlqU8Q4STEHLGCilw_Y9_11qcW8',
+ 'prettyPrint': 'false',
+ },
headers={'content-type': 'application/json'})
if not search:
break
@@ -696,7 +711,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
r'/(?P<id>[a-zA-Z0-9_-]{8,})/player(?:_ias(?:_tce)?\.vflset(?:/[a-zA-Z]{2,3}_[a-zA-Z]{2,3})?|-plasma-ias-(?:phone|tablet)-[a-z]{2}_[A-Z]{2}\.vflset)/base\.js$',
r'\b(?P<id>vfl[a-zA-Z0-9_-]{6,})\b.*?\.js$',
)
- _SUBTITLE_FORMATS = ('json3', 'srv1', 'srv2', 'srv3', 'ttml', 'vtt')
+ _SUBTITLE_FORMATS = ('json3', 'srv1', 'srv2', 'srv3', 'ttml', 'srt', 'vtt')
_GEO_BYPASS = False
@@ -1587,6 +1602,18 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
'397': {'acodec': 'none', 'vcodec': 'av01.0.05M.08'},
}
+ _PLAYER_JS_VARIANT_MAP = (
+ ('main', 'player_ias.vflset/en_US/base.js'),
+ ('tcc', 'player_ias_tcc.vflset/en_US/base.js'),
+ ('tce', 'player_ias_tce.vflset/en_US/base.js'),
+ ('es5', 'player_es5.vflset/en_US/base.js'),
+ ('es6', 'player_es6.vflset/en_US/base.js'),
+ ('tv', 'tv-player-ias.vflset/tv-player-ias.js'),
+ ('tv_es6', 'tv-player-es6.vflset/tv-player-es6.js'),
+ ('phone', 'player-plasma-ias-phone-en_US.vflset/base.js'),
+ ('tablet', 'player-plasma-ias-tablet-en_US.vflset/base.js'),
+ )
+
@classmethod
def suitable(cls, url):
if parse_qs(url).get('list', [None])[0]:
@@ -1598,6 +1625,19 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
self._code_cache = {}
self._player_cache = {}
+ def _get_player_js_version(self):
+ player_js_version = self.get_param('youtube_player_js_version') or '20348@0004de42'
+ sts_hash = self._search_regex(
+ ('^actual$(^)?(^)?', r'^([0-9]{5,})@([0-9a-f]{8,})$'),
+ player_js_version, 'player_js_version', group=(1, 2), default=None)
+ if sts_hash:
+ return sts_hash
+ self.report_warning(
+ 'Invalid player JS version "{0}" specified. '
+ 'It should be "{1}" or in the format of {2}'.format(
+ player_js_version, 'actual', 'SignatureTimeStamp@Hash'), only_once=True)
+ return None, None
+
# *ytcfgs, webpage=None
def _extract_player_url(self, *ytcfgs, **kw_webpage):
if ytcfgs and not isinstance(ytcfgs[0], dict):
@@ -1608,19 +1648,43 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
webpage or '', 'player URL', fatal=False)
if player_url:
ytcfgs = ytcfgs + ({'PLAYER_JS_URL': player_url},)
- return traverse_obj(
+ player_url = traverse_obj(
ytcfgs, (Ellipsis, 'PLAYER_JS_URL'), (Ellipsis, 'WEB_PLAYER_CONTEXT_CONFIGS', Ellipsis, 'jsUrl'),
get_all=False, expected_type=lambda u: urljoin('https://www.youtube.com', u))
+ player_id_override = self._get_player_js_version()[1]
+
+ requested_js_variant = self.get_param('youtube_player_js_variant') or 'main'
+ variant_js = next(
+ (v for k, v in self._PLAYER_JS_VARIANT_MAP if k == requested_js_variant),
+ None)
+ if variant_js:
+ player_id = player_id_override or self._extract_player_info(player_url)
+ original_url = player_url
+ player_url = '/s/player/{0}/{1}'.format(player_id, variant_js)
+ if original_url != player_url:
+ self.write_debug(
+ 'Forcing "{0}" player JS variant for player {1}\n'
+ ' original url = {2}'.format(
+ requested_js_variant, player_id, original_url),
+ only_once=True)
+ elif requested_js_variant != 'actual':
+ self.report_warning(
+ 'Invalid player JS variant name "{0}" requested. '
+ 'Valid choices are: {1}'.format(
+ requested_js_variant, ','.join(k for k, _ in self._PLAYER_JS_VARIANT_MAP)),
+ only_once=True)
+
+ return urljoin('https://www.youtube.com', player_url)
+
def _download_player_url(self, video_id, fatal=False):
res = self._download_webpage(
'https://www.youtube.com/iframe_api',
note='Downloading iframe API JS', video_id=video_id, fatal=fatal)
player_version = self._search_regex(
r'player\\?/([0-9a-fA-F]{8})\\?/', res or '', 'player version', fatal=fatal,
- default=NO_DEFAULT if res else None)
- if player_version:
- return 'https://www.youtube.com/s/player/{0}/player_ias.vflset/en_US/base.js'.format(player_version)
+ default=NO_DEFAULT if res else None) or None
+ return player_version and 'https://www.youtube.com/s/player/{0}/player_ias.vflset/en_US/base.js'.format(player_version)
def _signature_cache_id(self, example_sig):
""" Return a string representation of a signature """
@@ -1634,36 +1698,89 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
raise ExtractorError(
'Cannot identify player %r' % (player_url,), cause=e)
- def _load_player(self, video_id, player_url, fatal=True, player_id=None):
- if not player_id:
+ def _player_js_cache_key(self, player_url, extra_id=None, _cache={}):
+ if player_url not in _cache:
player_id = self._extract_player_info(player_url)
- if player_id not in self._code_cache:
+ player_path = remove_start(
+ compat_urllib_parse.urlparse(player_url).path,
+ '/s/player/{0}/'.format(player_id))
+ variant = next((k for k, v in self._PLAYER_JS_VARIANT_MAP
+ if v == player_path), None)
+ if not variant:
+ variant = next(
+ (k for k, v in self._PLAYER_JS_VARIANT_MAP
+ if re.match(re.escape(v).replace('en_US', r'\w+') + '$', player_path)),
+ None)
+ if not variant:
+ self.write_debug(
+ 'Unable to determine player JS variant\n'
+ ' player = {0}'.format(player_url), only_once=True)
+ variant = re.sub(r'[^a-zA-Z0-9]', '_', remove_end(player_path, '.js'))
+ _cache[player_url] = join_nonempty(player_id, variant)
+
+ if extra_id:
+ extra_id = '-'.join((_cache[player_url], extra_id))
+ assert os.path.basename(extra_id) == extra_id
+ return extra_id
+ return _cache[player_url]
+
+ def _load_player(self, video_id, player_url, fatal=True):
+ player_js_key = self._player_js_cache_key(player_url)
+ if player_js_key not in self._code_cache:
code = self._download_webpage(
player_url, video_id, fatal=fatal,
- note='Downloading player ' + player_id,
- errnote='Download of %s failed' % player_url)
+ note='Downloading player {0}'.format(player_js_key),
+ errnote='Download of {0} failed'.format(player_url))
if code:
- self._code_cache[player_id] = code
- return self._code_cache[player_id] if fatal else self._code_cache.get(player_id)
+ self._code_cache[player_js_key] = code
+ return self._code_cache.get(player_js_key)
+
+ def _load_player_data_from_cache(self, name, player_url, extra_id=None):
+ cache_id = ('youtube-{0}'.format(name), self._player_js_cache_key(player_url, extra_id))
+ data = self._player_cache.get(cache_id)
+ if data:
+ return data
+
+ data = self.cache.load(*cache_id, min_ver='2025.04.07')
+ if data:
+ self._player_cache[cache_id] = data
+ return data
+
+ def _store_player_data_to_cache(self, name, player_url, data, extra_id=None):
+ cache_id = ('youtube-{0}'.format(name), self._player_js_cache_key(player_url, extra_id))
+
+ if cache_id not in self._player_cache:
+ self.cache.store(cache_id[0], cache_id[1], data)
+ self._player_cache[cache_id] = data
+
+ def _remove_player_data_from_cache(self, name, player_url, extra_id=None):
+ cache_id = ('youtube-{0}'.format(name), self._player_js_cache_key(player_url, extra_id))
+
+ if cache_id in self._player_cache:
+ self.cache.clear(*cache_id)
+ self._player_cache.pop(cache_id, None)
def _extract_signature_function(self, video_id, player_url, example_sig):
- player_id = self._extract_player_info(player_url)
+ # player_id = self._extract_player_info(player_url)
# Read from filesystem cache
- func_id = 'js_{0}_{1}'.format(
- player_id, self._signature_cache_id(example_sig))
- assert os.path.basename(func_id) == func_id
-
- self.write_debug('Extracting signature function {0}'.format(func_id))
- cache_spec, code = self.cache.load('youtube-sigfuncs', func_id), None
+ extra_id = self._signature_cache_id(example_sig)
+ self.write_debug('Extracting signature function {0}-{1}'.format(player_url, extra_id))
+ cache_spec, code = self._load_player_data_from_cache(
+ 'sigfuncs', player_url, extra_id=extra_id), None
if not cache_spec:
- code = self._load_player(video_id, player_url, player_id)
- if code:
- res = self._parse_sig_js(code)
- test_string = ''.join(map(compat_chr, range(len(example_sig))))
- cache_spec = [ord(c) for c in res(test_string)]
- self.cache.store('youtube-sigfuncs', func_id, cache_spec)
+ code = self._load_player(video_id, player_url)
+ if code:
+ res = self._parse_sig_js(code)
+ test_string = ''.join(map(compat_chr, range(len(example_sig))))
+ cache_spec = [ord(c) for c in res(test_string)]
+ self._store_player_data_to_cache(
+ 'sigfuncs', player_url, cache_spec, extra_id=extra_id)
+ else:
+ self.report_warning(
+ 'Failed to compute signature function {0}-{1}'.format(
+ player_url, extra_id))
return lambda s: ''.join(s[i] for i in cache_spec)
@@ -1816,6 +1933,24 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
return ret
def _extract_n_function_name(self, jscode):
+ func_name, idx = None, None
+
+ def generic_n_function_search(func_name=None):
+ return self._search_regex(
+ r'''(?xs)
+ (?:(?<=[^\w$])|^) # instead of \b, which ignores $
+ (?P<name>%s)\s*=\s*function\((?!\d)[a-zA-Z\d_$]+\)
+ \s*\{(?:(?!};).)+?(?:
+ ["']enhanced_except_ |
+ return\s*(?P<q>"|')[a-zA-Z\d-]+_w8_(?P=q)\s*\+\s*[\w$]+
+ )
+ ''' % (func_name or r'(?!\d)[a-zA-Z\d_$]+',), jscode,
+ 'Initial JS player n function name', group='name',
+ default=None if func_name else NO_DEFAULT)
+
+ # these special cases are redundant and probably obsolete (2025-04):
+ # they make the tests run ~10% faster without fallback warnings
+ r"""
func_name, idx = self._search_regex(
# (y=NuD(),Mw(k),q=k.Z[y]||null)&&(q=narray[idx](q),k.set(y,q),k.V||NuD(''))}};
# (R="nn"[+J.Z],mW(J),N=J.K[R]||null)&&(N=narray[idx](N),J.set(R,N))}};
@@ -1842,45 +1977,59 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
\(\s*[\w$]+\s*\)
''', jscode, 'Initial JS player n function name', group=('nfunc', 'idx'),
default=(None, None))
+ """
+
+ if not func_name:
+ # nfunc=function(x){...}|function nfunc(x); ...
+ # ... var y=[nfunc]|y[idx]=nfunc);
+ # obvious REs hang, so use a two-stage tactic
+ for m in re.finditer(r'''(?x)
+ [\n;]var\s(?:(?:(?!,).)+,|\s)*?(?!\d)[\w$]+(?:\[(?P<idx>\d+)\])?\s*=\s*
+ (?(idx)|\[\s*)(?P<nfunc>(?!\d)[\w$]+)(?(idx)|\s*\])
+ \s*?[;\n]
+ ''', jscode):
+ fn = self._search_regex(
+ r'[;,]\s*(function\s+)?({0})(?(1)|\s*=\s*function)\s*\((?!\d)[\w$]+\)\s*\{1}(?!\s*return\s)'.format(
+ re.escape(m.group('nfunc')), '{'),
+ jscode, 'Initial JS player n function name (2)', group=2, default=None)
+ if fn:
+ func_name = fn
+ idx = m.group('idx')
+ if generic_n_function_search(func_name):
+ # don't look any further
+ break
+
# thx bashonly: yt-dlp/yt-dlp/pull/10611
if not func_name:
- self.report_warning('Falling back to generic n function search')
- return self._search_regex(
- r'''(?xs)
- (?:(?<=[^\w$])|^) # instead of \b, which ignores $
- (?P<name>(?!\d)[a-zA-Z\d_$]+)\s*=\s*function\((?!\d)[a-zA-Z\d_$]+\)
- \s*\{(?:(?!};).)+?(?:
- ["']enhanced_except_ |
- return\s*(?P<q>"|')[a-zA-Z\d-]+_w8_(?P=q)\s*\+\s*[\w$]+
- )
- ''', jscode, 'Initial JS player n function name', group='name')
+ self.report_warning('Falling back to generic n function search', only_once=True)
+ return generic_n_function_search()
+
if not idx:
return func_name
return self._search_json(
- r'var\s+{0}\s*='.format(re.escape(func_name)), jscode,
+ r'(?<![\w-])var\s(?:(?:(?!,).)+,|\s)*?{0}\s*='.format(re.escape(func_name)), jscode,
'Initial JS player n function list ({0}.{1})'.format(func_name, idx),
- func_name, contains_pattern=r'\[[\s\S]+\]', end_pattern='[,;]',
+ func_name, contains_pattern=r'\[.+\]', end_pattern='[,;]',
transform_source=js_to_json)[int(idx)]
def _extract_n_function_code(self, video_id, player_url):
player_id = self._extract_player_info(player_url)
- func_code = self.cache.load('youtube-nsig', player_id)
+ func_code = self._load_player_data_from_cache('nsig', player_url)
jscode = func_code or self._load_player(video_id, player_url)
jsi = JSInterpreter(jscode)
if func_code:
return jsi, player_id, func_code
- return self._extract_n_function_code_jsi(video_id, jsi, player_id)
- def _extract_n_function_code_jsi(self, video_id, jsi, player_id=None):
+ return self._extract_n_function_code_jsi(video_id, jsi, player_id, player_url)
+ def _extract_n_function_code_jsi(self, video_id, jsi, player_id=None, player_url=None):
func_name = self._extract_n_function_name(jsi.code)
func_code = self._extract_sig_fn(jsi, func_name)
-
- if player_id:
- self.cache.store('youtube-nsig', player_id, func_code)
+ if player_url:
+ self._store_player_data_to_cache('nsig', player_url, func_code)
return jsi, player_id, func_code
def _extract_n_function_from_code(self, jsi, func_code):
@@ -1913,7 +2062,8 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
n_param = n_param[-1]
n_response = decrypt_nsig(n_param)(n_param, video_id, player_url)
if n_response is None:
- # give up if descrambling failed
+ # give up and forget cached data if descrambling failed
+ self._remove_player_data_from_cache('nsig', player_url)
break
fmt['url'] = update_url_query(fmt['url'], {'n': n_response})
@@ -1921,21 +2071,37 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
def _extract_signature_timestamp(self, video_id, player_url, ytcfg=None, fatal=False):
"""
Extract signatureTimestamp (sts)
+
Required to tell API what sig/player version is in use.
"""
- sts = traverse_obj(ytcfg, 'STS', expected_type=int)
- if not sts:
- # Attempt to extract from player
- if player_url is None:
- error_msg = 'Cannot extract signature timestamp without player_url.'
- if fatal:
- raise ExtractorError(error_msg)
- self.report_warning(error_msg)
- return
- code = self._load_player(video_id, player_url, fatal=fatal)
- sts = int_or_none(self._search_regex(
- r'(?:signatureTimestamp|sts)\s*:\s*(?P<sts>[0-9]{5})', code or '',
- 'JS player signature timestamp', group='sts', fatal=fatal))
+ sts = traverse_obj(
+ (self._get_player_js_version(), ytcfg),
+ (0, 0),
+ (1, 'STS'),
+ expected_type=int_or_none)
+
+ if sts:
+ return sts
+
+ if not player_url:
+ error_msg = 'Cannot extract signature timestamp without player url'
+ if fatal:
+ raise ExtractorError(error_msg)
+ self.report_warning(error_msg)
+ return None
+
+ sts = self._load_player_data_from_cache('sts', player_url)
+ if sts:
+ return sts
+
+ # Attempt to extract from player
+ code = self._load_player(video_id, player_url, fatal=fatal)
+ sts = int_or_none(self._search_regex(
+ r'(?:signatureTimestamp|sts)\s*:\s*(?P<sts>[0-9]{5})', code or '',
+ 'JS player signature timestamp', group='sts', fatal=fatal))
+ if sts:
+ self._store_player_data_to_cache('sts', player_url, sts)
+
return sts
def _mark_watched(self, video_id, player_response):
@@ -2060,8 +2226,12 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
video_id = self._match_id(url)
base_url = self.http_scheme() + '//www.youtube.com/'
webpage_url = base_url + 'watch?v=' + video_id
+ ua = traverse_obj(self._INNERTUBE_CLIENTS, (
+ 'web', 'INNERTUBE_CONTEXT', 'client', 'userAgent'))
+ headers = {'User-Agent': ua} if ua else None
webpage = self._download_webpage(
- webpage_url + '&bpctr=9999999999&has_verified=1', video_id, fatal=False)
+ webpage_url + '&bpctr=9999999999&has_verified=1', video_id,
+ headers=headers, fatal=False)
player_response = None
player_url = None
@@ -2071,12 +2241,14 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
video_id, 'initial player response')
is_live = traverse_obj(player_response, ('videoDetails', 'isLive'))
+ fetched_timestamp = None
if False and not player_response:
player_response = self._call_api(
'player', {'videoId': video_id}, video_id)
if True or not player_response:
origin = 'https://www.youtube.com'
pb_context = {'html5Preference': 'HTML5_PREF_WANTS'}
+ fetched_timestamp = int(time.time())
player_url = self._extract_player_url(webpage)
ytcfg = self._extract_ytcfg(video_id, webpage or '')
@@ -2143,6 +2315,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
hls = traverse_obj(
(player_response, api_player_response),
(Ellipsis, 'streamingData', 'hlsManifestUrl', T(url_or_none)))
+ fetched_timestamp = int(time.time())
if len(hls) == 2 and not hls[0] and hls[1]:
player_response['streamingData']['hlsManifestUrl'] = hls[1]
else:
@@ -2154,13 +2327,14 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
player_response['videoDetails'] = video_details
def is_agegated(playability):
- if not isinstance(playability, dict):
- return
+ # playability: dict
+ if not playability:
+ return False
if playability.get('desktopLegacyAgeGateReason'):
return True
- reasons = filter(None, (playability.get(r) for r in ('status', 'reason')))
+ reasons = traverse_obj(playability, (('status', 'reason'),))
AGE_GATE_REASONS = (
'confirm your age', 'age-restricted', 'inappropriate', # reason
'age_verification_required', 'age_check_required', # status
@@ -2218,15 +2392,9 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
trailer_video_id, self.ie_key(), trailer_video_id)
def get_text(x):
- if not x:
- return
- text = x.get('simpleText')
- if text and isinstance(text, compat_str):
- return text
- runs = x.get('runs')
- if not isinstance(runs, list):
- return
- return ''.join([r['text'] for r in runs if isinstance(r.get('text'), compat_str)])
+ return ''.join(traverse_obj(
+ x, (('simpleText',),), ('runs', Ellipsis, 'text'),
+ expected_type=compat_str))
search_meta = (
(lambda x: self._html_search_meta(x, webpage, default=None))
@@ -2309,6 +2477,14 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
lower = lambda s: s.lower()
+ if is_live:
+ fetched_timestamp = None
+ elif fetched_timestamp is not None:
+ # Handle preroll waiting period
+ preroll_sleep = self.get_param('youtube_preroll_sleep')
+ preroll_sleep = int_or_none(preroll_sleep, default=6)
+ fetched_timestamp += preroll_sleep
+
for fmt in streaming_formats:
if fmt.get('targetDurationSec'):
continue
@@ -2405,6 +2581,9 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
'downloader_options': {'http_chunk_size': CHUNK_SIZE}, # No longer useful?
})
+ if fetched_timestamp:
+ dct['available_at'] = fetched_timestamp
+
formats.append(dct)
def process_manifest_format(f, proto, client_name, itag, all_formats=False):
@@ -2422,6 +2601,10 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
if f.get('source_preference') is None:
f['source_preference'] = -1
+ # Deprioritize since its pre-merged m3u8 formats may have lower quality audio streams
+ if client_name == 'web_safari' and proto == 'hls' and not is_live:
+ f['source_preference'] -= 1
+
if itag in ('616', '235'):
f['format_note'] = join_nonempty(f.get('format_note'), 'Premium', delim=' ')
f['source_preference'] += 100
@@ -2438,15 +2621,15 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
hls_manifest_url = streaming_data.get('hlsManifestUrl')
if hls_manifest_url:
- for f in self._extract_m3u8_formats(
+ formats.extend(
+ f for f in self._extract_m3u8_formats(
hls_manifest_url, video_id, 'mp4',
- entry_protocol='m3u8_native', live=is_live, fatal=False):
+ entry_protocol='m3u8_native', live=is_live, fatal=False)
if process_manifest_format(
- f, 'hls', None, self._search_regex(
- r'/itag/(\d+)', f['url'], 'itag', default=None)):
- formats.append(f)
+ f, 'hls', None, self._search_regex(
+ r'/itag/(\d+)', f['url'], 'itag', default=None)))
- if self._downloader.params.get('youtube_include_dash_manifest', True):
+ if self.get_param('youtube_include_dash_manifest', True):
dash_manifest_url = streaming_data.get('dashManifestUrl')
if dash_manifest_url:
for f in self._extract_mpd_formats(
@@ -2473,7 +2656,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
playability_status,
lambda x: x['errorScreen']['playerErrorMessageRenderer'],
dict) or {}
- reason = get_text(pemr.get('reason')) or playability_status.get('reason')
+ reason = get_text(pemr.get('reason')) or playability_status.get('reason') or ''
subreason = pemr.get('subreason')
if subreason:
subreason = clean_html(get_text(subreason))
@@ -2485,7 +2668,12 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
self.raise_geo_restricted(
subreason, countries)
reason += '\n' + subreason
+
if reason:
+ if 'sign in' in reason.lower():
+ self.raise_login_required(remove_end(reason, 'This helps protect our community. Learn more'))
+ elif traverse_obj(playability_status, ('errorScreen', 'playerCaptchaViewModel', T(dict))):
+ reason += '. YouTube is requiring a captcha challenge before playback'
raise ExtractorError(reason, expected=True)
self._sort_formats(formats)
@@ -2588,6 +2776,9 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
for fmt in self._SUBTITLE_FORMATS:
query.update({
'fmt': fmt,
+ # xosf=1 causes undesirable text position data for vtt, json3 & srv* subtitles
+ # See: https://github.com/yt-dlp/yt-dlp/issues/13654
+ 'xosf': []
})
lang_subs.append({
'ext': fmt,
@@ -2629,7 +2820,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
for d_k, s_ks in [('start', ('start', 't')), ('end', ('end',))]:
d_k += '_time'
if d_k not in info and k in s_ks:
- info[d_k] = parse_duration(query[k][0])
+ info[d_k] = parse_duration(v[0])
if video_description:
# Youtube Music Auto-generated description
@@ -2658,6 +2849,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
initial_data = self._call_api(
'next', {'videoId': video_id}, video_id, fatal=False)
+ initial_sdcr = None
if initial_data:
chapters = self._extract_chapters_from_json(
initial_data, video_id, duration)
@@ -2677,9 +2869,9 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
for next_num, content in enumerate(contents, start=1):
mmlir = content.get('macroMarkersListItemRenderer') or {}
start_time = chapter_time(mmlir)
- end_time = chapter_time(try_get(
- contents, lambda x: x[next_num]['macroMarkersListItemRenderer'])) \
- if next_num < len(contents) else duration
+ end_time = (traverse_obj(
+ contents, (next_num, 'macroMarkersListItemRenderer', T(chapter_time)))
+ if next_num < len(contents) else duration)
if start_time is None or end_time is None:
continue
chapters.append({
@@ -2785,12 +2977,13 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
info['track'] = mrr_contents_text
# this is not extraction but spelunking!
- carousel_lockups = traverse_obj(
- initial_data,
- ('engagementPanels', Ellipsis, 'engagementPanelSectionListRenderer',
- 'content', 'structuredDescriptionContentRenderer', 'items', Ellipsis,
- 'videoDescriptionMusicSectionRenderer', 'carouselLockups', Ellipsis),
- expected_type=dict) or []
+ initial_sdcr = traverse_obj(initial_data, (
+ 'engagementPanels', Ellipsis, 'engagementPanelSectionListRenderer',
+ 'content', 'structuredDescriptionContentRenderer', T(dict)),
+ get_all=False)
+ carousel_lockups = traverse_obj(initial_sdcr, (
+ 'items', Ellipsis, 'videoDescriptionMusicSectionRenderer',
+ 'carouselLockups', Ellipsis, T(dict))) or []
# try to reproduce logic from metadataRowContainerRenderer above (if it still is)
fields = (('ALBUM', 'album'), ('ARTIST', 'artist'), ('SONG', 'track'), ('LICENSES', 'license'))
# multiple_songs ?
@@ -2815,6 +3008,23 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
self.mark_watched(video_id, player_response)
+ # Fallbacks for missing metadata
+ if initial_sdcr:
+ if info.get('description') is None:
+ info['description'] = traverse_obj(initial_sdcr, (
+ 'items', Ellipsis, 'expandableVideoDescriptionBodyRenderer',
+ 'attributedDescriptionBodyText', 'content', T(compat_str)),
+ get_all=False)
+ # videoDescriptionHeaderRenderer also has publishDate/channel/handle/ucid, but not needed
+ if info.get('title') is None:
+ info['title'] = traverse_obj(
+ (initial_sdcr, initial_data),
+ (0, 'items', Ellipsis, 'videoDescriptionHeaderRenderer', T(dict)),
+ (1, 'playerOverlays', 'playerOverlayRenderer', 'videoDetails',
+ 'playerOverlayVideoDetailsRenderer', T(dict)),
+ expected_type=lambda x: self._get_text(x, 'title'),
+ get_all=False)
+
return merge_dicts(
info, {
'uploader_id': self._extract_uploader_id(owner_profile_url),
@@ -3325,24 +3535,46 @@ class YoutubeTabIE(YoutubeBaseInfoExtractor):
if not content_id:
return
content_type = view_model.get('contentType')
- if content_type not in ('LOCKUP_CONTENT_TYPE_PLAYLIST', 'LOCKUP_CONTENT_TYPE_PODCAST'):
+ if content_type == 'LOCKUP_CONTENT_TYPE_VIDEO':
+ ie = YoutubeIE
+ url = update_url_query(
+ 'https://www.youtube.com/watch', {'v': content_id}),
+ thumb_keys = (None,)
+ elif content_type in ('LOCKUP_CONTENT_TYPE_PLAYLIST', 'LOCKUP_CONTENT_TYPE_PODCAST'):
+ ie = YoutubeTabIE
+ url = update_url_query(
+ 'https://www.youtube.com/playlist', {'list': content_id}),
+ thumb_keys = ('collectionThumbnailViewModel', 'primaryThumbnail')
+ else:
self.report_warning(
- 'Unsupported lockup view model content type "{0}"{1}'.format(content_type, bug_reports_message()), only_once=True)
+ 'Unsupported lockup view model content type "{0}"{1}'.format(content_type, bug_reports_message()),
+ only_once=True)
return
+ thumb_keys = ('contentImage',) + thumb_keys + ('thumbnailViewModel', 'image')
return merge_dicts(self.url_result(
- update_url_query('https://www.youtube.com/playlist', {'list': content_id}),
- ie=YoutubeTabIE.ie_key(), video_id=content_id), {
+ url, ie=ie.ie_key(), video_id=content_id), {
'title': traverse_obj(view_model, (
- 'metadata', 'lockupMetadataViewModel', 'title', 'content', T(compat_str))),
- 'thumbnails': self._extract_thumbnails(view_model, (
- 'contentImage', 'collectionThumbnailViewModel', 'primaryThumbnail',
- 'thumbnailViewModel', 'image'), final_key='sources'),
+ 'metadata', 'lockupMetadataViewModel', 'title',
+ 'content', T(compat_str))),
+ 'thumbnails': self._extract_thumbnails(
+ view_model, thumb_keys, final_key='sources'),
})
+ def _extract_shorts_lockup_view_model(self, view_model):
+ content_id = traverse_obj(view_model, (
+ 'onTap', 'innertubeCommand', 'reelWatchEndpoint', 'videoId',
+ T(lambda v: v if YoutubeIE.suitable(v) else None)))
+ return merge_dicts(self.url_result(
+ content_id, ie=YoutubeIE.ie_key(), video_id=content_id), {
+ 'title': traverse_obj(view_model, (
+ 'overlayMetadata', 'primaryText', 'content', T(compat_str))),
+ 'thumbnails': self._extract_thumbnails(
+ view_model, 'thumbnail', final_key='sources'),
+ }) if content_id else None
+
def _video_entry(self, video_renderer):
video_id = video_renderer.get('videoId')
- if video_id:
- return self._extract_video(video_renderer)
+ return self._extract_video(video_renderer) if video_id else None
def _post_thread_entries(self, post_thread_renderer):
post_renderer = try_get(
@@ -3385,10 +3617,9 @@ class YoutubeTabIE(YoutubeBaseInfoExtractor):
yield entry
def _rich_grid_entries(self, contents):
- for content in contents:
- content = traverse_obj(
- content, ('richItemRenderer', 'content'),
- expected_type=dict) or {}
+ for content in traverse_obj(
+ contents, (Ellipsis, 'richItemRenderer', 'content'),
+ expected_type=dict):
video_renderer = traverse_obj(
content, 'videoRenderer', 'reelItemRenderer',
expected_type=dict)
@@ -3396,6 +3627,12 @@ class YoutubeTabIE(YoutubeBaseInfoExtractor):
entry = self._video_entry(video_renderer)
if entry:
yield entry
+ # shorts item
+ shorts_lockup_view_model = content.get('shortsLockupViewModel')
+ if shorts_lockup_view_model:
+ entry = self._extract_shorts_lockup_view_model(shorts_lockup_view_model)
+ if entry:
+ yield entry
# playlist
renderer = traverse_obj(
content, 'playlistRenderer', expected_type=dict) or {}
@@ -3434,23 +3671,15 @@ class YoutubeTabIE(YoutubeBaseInfoExtractor):
next_continuation = cls._extract_next_continuation_data(renderer)
if next_continuation:
return next_continuation
- contents = []
- for key in ('contents', 'items'):
- contents.extend(try_get(renderer, lambda x: x[key], list) or [])
- for content in contents:
- if not isinstance(content, dict):
- continue
- continuation_ep = try_get(
- content, lambda x: x['continuationItemRenderer']['continuationEndpoint'],
- dict)
- if not continuation_ep:
- continue
- continuation = try_get(
- continuation_ep, lambda x: x['continuationCommand']['token'], compat_str)
+ for command in traverse_obj(renderer, (
+ ('contents', 'items', 'rows'), Ellipsis, 'continuationItemRenderer',
+ ('continuationEndpoint', ('button', 'buttonRenderer', 'command')),
+ (('commandExecutorCommand', 'commands', Ellipsis), None), T(dict))):
+ continuation = traverse_obj(command, ('continuationCommand', 'token', T(compat_str)))
if not continuation:
continue
- ctp = continuation_ep.get('clickTrackingParams')
- return YoutubeTabIE._build_continuation_query(continuation, ctp)
+ ctp = command.get('clickTrackingParams')
+ return cls._build_continuation_query(continuation, ctp)
def _entries(self, tab, item_id, webpage):
tab_content = try_get(tab, lambda x: x['content'], dict)
@@ -3499,6 +3728,13 @@ class YoutubeTabIE(YoutubeBaseInfoExtractor):
entry = self._video_entry(renderer)
if entry:
yield entry
+ renderer = isr_content.get('richGridRenderer')
+ if renderer:
+ for from_ in self._rich_grid_entries(
+ traverse_obj(renderer, ('contents', Ellipsis, T(dict)))):
+ yield from_
+ continuation = self._extract_continuation(renderer)
+ continue
if not continuation:
continuation = self._extract_continuation(is_renderer)
@@ -3508,8 +3744,9 @@ class YoutubeTabIE(YoutubeBaseInfoExtractor):
rich_grid_renderer = tab_content.get('richGridRenderer')
if not rich_grid_renderer:
return
- for entry in self._rich_grid_entries(rich_grid_renderer.get('contents') or []):
- yield entry
+ for from_ in self._rich_grid_entries(
+ traverse_obj(rich_grid_renderer, ('contents', Ellipsis, T(dict)))):
+ yield from_
continuation = self._extract_continuation(rich_grid_renderer)
@@ -3555,8 +3792,12 @@ class YoutubeTabIE(YoutubeBaseInfoExtractor):
# Downloading page may result in intermittent 5xx HTTP error
# that is usually worked around with a retry
response = self._download_json(
- 'https://www.youtube.com/youtubei/v1/browse?key=AIzaSyAO_FJ2SlqU8Q4STEHLGCilw_Y9_11qcW8',
+ 'https://www.youtube.com/youtubei/v1/browse',
None, 'Downloading page %d%s' % (page_num, ' (retry #%d)' % count if count else ''),
+ query={
+ # 'key': 'AIzaSyAO_FJ2SlqU8Q4STEHLGCilw_Y9_11qcW8',
+ 'prettyPrint': 'false',
+ },
headers=headers, data=json.dumps(data).encode('utf8'))
break
except ExtractorError as e:
@@ -3993,6 +4234,7 @@ class YoutubeFeedsInfoExtractor(YoutubeTabIE):
Subclasses must define the _FEED_NAME property.
"""
+
_LOGIN_REQUIRED = True
@property