From bfd973ece3369c593b5e82a88cc16de80088a73e Mon Sep 17 00:00:00 2001 From: pukkandan Date: Mon, 1 Aug 2022 06:53:25 +0530 Subject: [extractors] Use new framework for existing embeds (#4307) `Brightcove` is difficult to migrate because it's subclasses may depend on the signature of the current functions. So it is left as-is for now Note: Tests have not been migrated --- yt_dlp/extractor/facebook.py | 22 +++++++--------------- 1 file changed, 7 insertions(+), 15 deletions(-) (limited to 'yt_dlp/extractor/facebook.py') diff --git a/yt_dlp/extractor/facebook.py b/yt_dlp/extractor/facebook.py index 5b34f3bff..d434b359a 100644 --- a/yt_dlp/extractor/facebook.py +++ b/yt_dlp/extractor/facebook.py @@ -57,6 +57,13 @@ class FacebookIE(InfoExtractor): ) (?P[0-9]+) ''' + _EMBED_REGEX = [ + r']+?src=(["\'])(?Phttps?://www\.facebook\.com/(?:video/embed|plugins/video\.php).+?)\1', + # Facebook API embed https://developers.facebook.com/docs/plugins/embedded-video-player + r'''(?x)]+ + class=(?P[\'"])[^\'"]*\bfb-(?:video|post)\b[^\'"]*(?P=q1)[^>]+ + data-href=(?P[\'"])(?P(?:https?:)?//(?:www\.)?facebook.com/.+?)(?P=q2)''', + ] _LOGIN_URL = 'https://www.facebook.com/login.php?next=http%3A%2F%2Ffacebook.com%2Fhome.php&login_attempt=1' _CHECKPOINT_URL = 'https://www.facebook.com/checkpoint/?next=http%3A%2F%2Ffacebook.com%2Fhome.php&_fb_noscript=1' _NETRC_MACHINE = 'facebook' @@ -311,21 +318,6 @@ class FacebookIE(InfoExtractor): 'graphURI': '/api/graphql/' } - @staticmethod - def _extract_urls(webpage): - urls = [] - for mobj in re.finditer( - r']+?src=(["\'])(?Phttps?://www\.facebook\.com/(?:video/embed|plugins/video\.php).+?)\1', - webpage): - urls.append(mobj.group('url')) - # Facebook API embed - # see https://developers.facebook.com/docs/plugins/embedded-video-player - for mobj in re.finditer(r'''(?x)]+ - class=(?P[\'"])[^\'"]*\bfb-(?:video|post)\b[^\'"]*(?P=q1)[^>]+ - data-href=(?P[\'"])(?P(?:https?:)?//(?:www\.)?facebook.com/.+?)(?P=q2)''', webpage): - urls.append(mobj.group('url')) - return urls - def _perform_login(self, username, password): login_page_req = sanitized_Request(self._LOGIN_URL) self._set_cookie('facebook.com', 'locale', 'en_US') -- cgit v1.2.3