diff options
author | bashonly <88596187+bashonly@users.noreply.github.com> | 2024-07-23 18:08:24 -0500 |
---|---|---|
committer | GitHub <noreply@github.com> | 2024-07-23 23:08:24 +0000 |
commit | 1a34a802f44a1dab8f642c79c3cc810e21541d3b (patch) | |
tree | ebb2a4c9acece5cf286e1e6d3e7d87195eaa0f81 | |
parent | a0a1bc3d8d8e3bb9a48a06e835815a0460e90e77 (diff) |
[ie/facebook] Fix extraction (#10531)
Closes #10532
Authored by: bashonly
-rw-r--r-- | yt_dlp/extractor/facebook.py | 24 |
1 files changed, 15 insertions, 9 deletions
diff --git a/yt_dlp/extractor/facebook.py b/yt_dlp/extractor/facebook.py index a3ca291fc..6aba477a6 100644 --- a/yt_dlp/extractor/facebook.py +++ b/yt_dlp/extractor/facebook.py @@ -571,16 +571,21 @@ class FacebookIE(InfoExtractor): # Formats larger than ~500MB will return error 403 unless chunk size is regulated f.setdefault('downloader_options', {})['http_chunk_size'] = 250 << 20 - def extract_relay_data(_filter): - return self._parse_json(self._search_regex( - rf'data-sjs>({{.*?{_filter}.*?}})</script>', - webpage, 'replay data', default='{}'), video_id, fatal=False) or {} + def yield_all_relay_data(_filter): + for relay_data in re.findall(rf'data-sjs>({{.*?{_filter}.*?}})</script>', webpage): + yield self._parse_json(relay_data, video_id, fatal=False) or {} - def extract_relay_prefetched_data(_filter): - return traverse_obj(extract_relay_data(_filter), ( - 'require', (None, (..., ..., ..., '__bbox', 'require')), + def extract_relay_data(_filter): + return next(filter(None, yield_all_relay_data(_filter)), {}) + + def extract_relay_prefetched_data(_filter, target_keys=None): + path = 'data' + if target_keys is not None: + path = lambda k, v: k == 'data' and any(target in v for target in variadic(target_keys)) + return traverse_obj(yield_all_relay_data(_filter), ( + ..., 'require', (None, (..., ..., ..., '__bbox', 'require')), lambda _, v: any(key.startswith('RelayPrefetchedStreamCache') for key in v), - ..., ..., '__bbox', 'result', 'data', {dict}), get_all=False) or {} + ..., ..., '__bbox', 'result', path, {dict}), get_all=False) or {} if not video_data: server_js_data = self._parse_json(self._search_regex([ @@ -591,7 +596,8 @@ class FacebookIE(InfoExtractor): if not video_data: data = extract_relay_prefetched_data( - r'"(?:dash_manifest|playable_url(?:_quality_hd)?)') + r'"(?:dash_manifest|playable_url(?:_quality_hd)?)', + target_keys=('video', 'event', 'nodes', 'node', 'mediaset')) if data: entries = [] |