aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authordirkf <fieldhouse@gmx.net>2025-01-15 03:19:15 +0000
committerdirkf <fieldhouse@gmx.net>2025-01-15 03:19:15 +0000
commit21fff051217d1c14a7d50fa752052eadbcafee4e (patch)
treeaa9864877ee53d4ff385baac5f0aef99831ece85
parent1036478d130c5f2001eca2d7d12558abe601d933 (diff)
[YouTube] Switch to TV API client
* thx yt-dlp/yt-dlp#12059
-rw-r--r--youtube_dl/extractor/youtube.py102
1 files changed, 83 insertions, 19 deletions
diff --git a/youtube_dl/extractor/youtube.py b/youtube_dl/extractor/youtube.py
index 6171df84a..1424277ac 100644
--- a/youtube_dl/extractor/youtube.py
+++ b/youtube_dl/extractor/youtube.py
@@ -31,7 +31,9 @@ from ..utils import (
dict_get,
error_to_compat_str,
ExtractorError,
+ filter_dict,
float_or_none,
+ get_first,
extract_attributes,
get_element_by_attribute,
int_or_none,
@@ -82,6 +84,34 @@ class YoutubeBaseInfoExtractor(InfoExtractor):
_PLAYLIST_ID_RE = r'(?:(?:PL|LL|EC|UU|FL|RD|UL|TL|PU|OLAK5uy_)[0-9A-Za-z-_]{10,}|RDMM)'
+ _INNERTUBE_CLIENTS = {
+ # mweb has 'ultralow' formats
+ # See: https://github.com/yt-dlp/yt-dlp/pull/557
+ 'mweb': {
+ 'INNERTUBE_CONTEXT': {
+ 'client': {
+ 'clientName': 'MWEB',
+ 'clientVersion': '2.20241202.07.00',
+ # mweb previously did not require PO Token with this UA
+ 'userAgent': 'Mozilla/5.0 (iPad; CPU OS 16_7_10 like Mac OS X) AppleWebKit/605.1.15 (KHTML, like Gecko) Version/16.6 Mobile/15E148 Safari/604.1,gzip(gfe)',
+ },
+ },
+ 'INNERTUBE_CONTEXT_CLIENT_NAME': 2,
+ 'REQUIRE_PO_TOKEN': True,
+ 'SUPPORTS_COOKIES': True,
+ },
+ 'tv': {
+ 'INNERTUBE_CONTEXT': {
+ 'client': {
+ 'clientName': 'TVHTML5',
+ 'clientVersion': '7.20241201.18.00',
+ },
+ },
+ 'INNERTUBE_CONTEXT_CLIENT_NAME': 7,
+ 'SUPPORTS_COOKIES': True,
+ },
+ }
+
def _login(self):
"""
Attempt to log in to YouTube.
@@ -321,19 +351,24 @@ class YoutubeBaseInfoExtractor(InfoExtractor):
'{0} {1} {2}'.format(time_now, self._SAPISID, origin).encode('utf-8')).hexdigest()
return 'SAPISIDHASH {0}_{1}'.format(time_now, sapisidhash)
- def _call_api(self, ep, query, video_id, fatal=True, headers=None):
+ def _call_api(self, ep, query, video_id, fatal=True, headers=None,
+ note='Downloading API JSON'):
data = self._DEFAULT_API_DATA.copy()
data.update(query)
real_headers = {'content-type': 'application/json'}
if headers:
real_headers.update(headers)
+ # was: 'AIzaSyAO_FJ2SlqU8Q4STEHLGCilw_Y9_11qcW8'
+ api_key = self.get_param('youtube_innertube_key')
return self._download_json(
'https://www.youtube.com/youtubei/v1/%s' % ep, video_id=video_id,
- note='Downloading API JSON', errnote='Unable to download API page',
+ note=note, errnote='Unable to download API page',
data=json.dumps(data).encode('utf8'), fatal=fatal,
- headers=real_headers,
- query={'key': 'AIzaSyAO_FJ2SlqU8Q4STEHLGCilw_Y9_11qcW8'})
+ headers=real_headers, query=filter_dict({
+ 'key': api_key,
+ 'prettyPrint': 'false',
+ }))
def _extract_yt_initial_data(self, video_id, webpage):
return self._parse_json(
@@ -342,6 +377,22 @@ class YoutubeBaseInfoExtractor(InfoExtractor):
self._YT_INITIAL_DATA_RE), webpage, 'yt initial data'),
video_id)
+ def _extract_visitor_data(self, *args):
+ """
+ Extract visitorData from an API response or ytcfg
+
+ Appears to be used to track session state
+ """
+ visitor_data = self.get_param('youtube_visitor_data')
+ if visitor_data:
+ return visitor_data
+
+ return get_first(
+ args, (('VISITOR_DATA',
+ ('INNERTUBE_CONTEXT', 'client', 'visitorData'),
+ ('responseContext', 'visitorData')),
+ T(compat_str)))
+
def _extract_ytcfg(self, video_id, webpage):
return self._parse_json(
self._search_regex(
@@ -1957,6 +2008,10 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
if sts:
pb_context['signatureTimestamp'] = sts
+ client = traverse_obj(self._INNERTUBE_CLIENTS, (
+ lambda _, v: not v.get('REQUIRE_PO_TOKEN')),
+ get_all=False)
+
query = {
'playbackContext': {
'contentPlaybackContext': pb_context,
@@ -1964,30 +2019,39 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
'racyCheckOk': True,
},
'context': {
- 'client': {
- 'clientName': 'MWEB',
- 'clientVersion': '2.20241202.07.00',
- 'hl': 'en',
- 'userAgent': 'Mozilla/5.0 (iPad; CPU OS 16_7_10 like Mac OS X) AppleWebKit/605.1.15 (KHTML, like Gecko) Version/16.6 Mobile/15E148 Safari/604.1,gzip(gfe)',
- 'timeZone': 'UTC',
- 'utcOffsetMinutes': 0,
- },
+ 'client': merge_dicts(
+ traverse_obj(client, ('INNERTUBE_CONTEXT', 'client')), {
+ 'hl': 'en',
+ 'timeZone': 'UTC',
+ 'utcOffsetMinutes': 0,
+ }),
},
'videoId': video_id,
}
- headers = {
- 'X-YouTube-Client-Name': '2',
- 'X-YouTube-Client-Version': '2.20241202.07.00',
- 'Origin': origin,
+
+ headers = merge_dicts({
'Sec-Fetch-Mode': 'navigate',
- 'User-Agent': query['context']['client']['userAgent'],
- }
+ 'Origin': origin,
+ # 'X-Goog-Visitor-Id': self._extract_visitor_data(ytcfg) or '',
+ }, traverse_obj(client, {
+ 'X-YouTube-Client-Name': 'INNERTUBE_CONTEXT_CLIENT_NAME',
+ 'X-YouTube-Client-Version': (
+ 'INNERTUBE_CONTEXT', 'client', 'clientVersion'),
+ 'User-Agent': (
+ 'INNERTUBE_CONTEXT', 'client', 'userAgent'),
+ }))
+
auth = self._generate_sapisidhash_header(origin)
if auth is not None:
headers['Authorization'] = auth
headers['X-Origin'] = origin
- player_response = self._call_api('player', query, video_id, fatal=False, headers=headers)
+ player_response = self._call_api(
+ 'player', query, video_id, fatal=False, headers=headers,
+ note=join_nonempty(
+ 'Downloading', traverse_obj(query, (
+ 'context', 'client', 'clientName')),
+ 'API JSON', delim=' '))
def is_agegated(playability):
if not isinstance(playability, dict):