aboutsummaryrefslogtreecommitdiff
path: root/youtube_dl/extractor
diff options
context:
space:
mode:
authorYen Chi Hsuan <yan12125@gmail.com>2016-07-02 21:33:23 +0800
committerYen Chi Hsuan <yan12125@gmail.com>2016-07-02 21:58:07 +0800
commitfd6ca382628afbc4a229a15cd26552e226ac4536 (patch)
tree322f0af5d222d97510e8a586c6ae834f28692d0c /youtube_dl/extractor
parentbdafd88da07046f91e0585f083dea7795096e5d7 (diff)
[facebook] Improve Facebook embedded detection
Related to #9938. Another example comes from 9834872bf63b4e03b66c5e3b8f306556e735d8c5.
Diffstat (limited to 'youtube_dl/extractor')
-rw-r--r--youtube_dl/extractor/facebook.py15
-rw-r--r--youtube_dl/extractor/generic.py26
2 files changed, 37 insertions, 4 deletions
diff --git a/youtube_dl/extractor/facebook.py b/youtube_dl/extractor/facebook.py
index 9b87b37ae..6eaa22d89 100644
--- a/youtube_dl/extractor/facebook.py
+++ b/youtube_dl/extractor/facebook.py
@@ -129,6 +129,21 @@ class FacebookIE(InfoExtractor):
'only_matching': True,
}]
+ @staticmethod
+ def _extract_url(webpage):
+ mobj = re.search(
+ r'<iframe[^>]+?src=(["\'])(?P<url>https://www\.facebook\.com/video/embed.+?)\1', webpage)
+ if mobj is not None:
+ return mobj.group('url')
+
+ # Facebook API embed
+ # see https://developers.facebook.com/docs/plugins/embedded-video-player
+ mobj = re.search(r'''(?x)<div[^>]+
+ class=(?P<q1>[\'"])[^\'"]*\bfb-video\b[^\'"]*(?P=q1)[^>]+
+ data-href=(?P<q2>[\'"])(?P<url>[^\'"]+)(?P=q2)''', webpage)
+ if mobj is not None:
+ return mobj.group('url')
+
def _login(self):
(useremail, password) = self._get_login_info()
if useremail is None:
diff --git a/youtube_dl/extractor/generic.py b/youtube_dl/extractor/generic.py
index 9315b9e21..7212e0edd 100644
--- a/youtube_dl/extractor/generic.py
+++ b/youtube_dl/extractor/generic.py
@@ -66,6 +66,7 @@ from .theplatform import ThePlatformIE
from .vessel import VesselIE
from .kaltura import KalturaIE
from .eagleplatform import EaglePlatformIE
+from .facebook import FacebookIE
class GenericIE(InfoExtractor):
@@ -1260,6 +1261,24 @@ class GenericIE(InfoExtractor):
'uploader': 'TheAtlantic',
},
'add_ie': ['BrightcoveLegacy'],
+ },
+ # Facebook <iframe> embed
+ {
+ 'url': 'https://www.hostblogger.de/blog/archives/6181-Auto-jagt-Betonmischer.html',
+ 'info_dict': {
+ 'id': '599637780109885',
+ 'ext': 'mp4',
+ 'title': 'Facebook video #599637780109885',
+ },
+ },
+ # Facebook API embed
+ {
+ 'url': 'http://www.lothype.com/blue-stars-2016-preview-standstill-full-show/',
+ 'info_dict': {
+ 'id': '10153467542406923',
+ 'ext': 'mp4',
+ 'title': 'Facebook video #10153467542406923',
+ },
}
]
@@ -1759,10 +1778,9 @@ class GenericIE(InfoExtractor):
return self.url_result(mobj.group('url'))
# Look for embedded Facebook player
- mobj = re.search(
- r'<iframe[^>]+?src=(["\'])(?P<url>https://www\.facebook\.com/video/embed.+?)\1', webpage)
- if mobj is not None:
- return self.url_result(mobj.group('url'), 'Facebook')
+ facebook_url = FacebookIE._extract_url(webpage)
+ if facebook_url is not None:
+ return self.url_result(facebook_url, 'Facebook')
# Look for embedded VK player
mobj = re.search(r'<iframe[^>]+?src=(["\'])(?P<url>https?://vk\.com/video_ext\.php.+?)\1', webpage)