aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorkclauhk <78251477+kclauhk@users.noreply.github.com>2023-12-25 06:43:35 +0800
committerGitHub <noreply@github.com>2023-12-24 23:43:35 +0100
commitc39358a54bc6675ae0c50b81024e5a086e41656a (patch)
tree2a83c9548eee8efb92a89b3cb96632c08c7de7b5
parent1f8bd8eba82ba10ddb49ee7cc0be4540dab103d5 (diff)
[ie/Facebook] Fix Memories extraction (#8681)
- Support group /posts/ URLs - Raise a proper error message if no formats are found Closes #8669 Authored by: kclauhk
-rw-r--r--yt_dlp/extractor/facebook.py25
1 files changed, 21 insertions, 4 deletions
diff --git a/yt_dlp/extractor/facebook.py b/yt_dlp/extractor/facebook.py
index 58162cc5f..a07a0d344 100644
--- a/yt_dlp/extractor/facebook.py
+++ b/yt_dlp/extractor/facebook.py
@@ -52,7 +52,7 @@ class FacebookIE(InfoExtractor):
)\?(?:.*?)(?:v|video_id|story_fbid)=|
[^/]+/videos/(?:[^/]+/)?|
[^/]+/posts/|
- groups/[^/]+/permalink/|
+ groups/[^/]+/(?:permalink|posts)/|
watchparty/
)|
facebook:
@@ -233,6 +233,21 @@ class FacebookIE(InfoExtractor):
},
'skip': 'Requires logging in',
}, {
+ # data.node.comet_sections.content.story.attachments[].throwbackStyles.attachment_target_renderer.attachment.target.attachments[].styles.attachment.media
+ 'url': 'https://www.facebook.com/groups/1645456212344334/posts/3737828833107051/',
+ 'info_dict': {
+ 'id': '1569199726448814',
+ 'ext': 'mp4',
+ 'title': 'Pence MUST GO!',
+ 'description': 'Vickie Gentry shared a memory.',
+ 'timestamp': 1511548260,
+ 'upload_date': '20171124',
+ 'uploader': 'Vickie Gentry',
+ 'uploader_id': 'pfbid0FuZhHCeWDAxWxEbr3yKPFaRstXvRxgsp9uCPG6GjD4J2AitB35NUAuJ4Q75KcjiDl',
+ 'thumbnail': r're:^https?://.*',
+ 'duration': 148.435,
+ },
+ }, {
'url': 'https://www.facebook.com/video.php?v=10204634152394104',
'only_matching': True,
}, {
@@ -612,9 +627,11 @@ class FacebookIE(InfoExtractor):
nodes = variadic(traverse_obj(data, 'nodes', 'node') or [])
attachments = traverse_obj(nodes, (
..., 'comet_sections', 'content', 'story', (None, 'attached_story'), 'attachments',
- ..., ('styles', 'style_type_renderer'), 'attachment'), expected_type=dict) or []
+ ..., ('styles', 'style_type_renderer', ('throwbackStyles', 'attachment_target_renderer')),
+ 'attachment', {dict}))
for attachment in attachments:
- ns = try_get(attachment, lambda x: x['all_subattachments']['nodes'], list) or []
+ ns = traverse_obj(attachment, ('all_subattachments', 'nodes', ..., {dict}),
+ ('target', 'attachments', ..., 'styles', 'attachment', {dict}))
for n in ns:
parse_attachment(n)
parse_attachment(attachment)
@@ -637,7 +654,7 @@ class FacebookIE(InfoExtractor):
if len(entries) > 1:
return self.playlist_result(entries, video_id)
- video_info = entries[0]
+ video_info = entries[0] if entries else {'id': video_id}
webpage_info = extract_metadata(webpage)
# honor precise duration in video info
if video_info.get('duration'):