diff options
| author | Yen Chi Hsuan <yan12125@gmail.com> | 2016-07-02 21:33:23 +0800 | 
|---|---|---|
| committer | Yen Chi Hsuan <yan12125@gmail.com> | 2016-07-02 21:58:07 +0800 | 
| commit | fd6ca382628afbc4a229a15cd26552e226ac4536 (patch) | |
| tree | 322f0af5d222d97510e8a586c6ae834f28692d0c | |
| parent | bdafd88da07046f91e0585f083dea7795096e5d7 (diff) | |
[facebook] Improve Facebook embedded detection
Related to #9938.
Another example comes from 9834872bf63b4e03b66c5e3b8f306556e735d8c5.
| -rw-r--r-- | youtube_dl/extractor/facebook.py | 15 | ||||
| -rw-r--r-- | youtube_dl/extractor/generic.py | 26 | 
2 files changed, 37 insertions, 4 deletions
| diff --git a/youtube_dl/extractor/facebook.py b/youtube_dl/extractor/facebook.py index 9b87b37ae..6eaa22d89 100644 --- a/youtube_dl/extractor/facebook.py +++ b/youtube_dl/extractor/facebook.py @@ -129,6 +129,21 @@ class FacebookIE(InfoExtractor):          'only_matching': True,      }] +    @staticmethod +    def _extract_url(webpage): +        mobj = re.search( +            r'<iframe[^>]+?src=(["\'])(?P<url>https://www\.facebook\.com/video/embed.+?)\1', webpage) +        if mobj is not None: +            return mobj.group('url') + +        # Facebook API embed +        # see https://developers.facebook.com/docs/plugins/embedded-video-player +        mobj = re.search(r'''(?x)<div[^>]+ +                class=(?P<q1>[\'"])[^\'"]*\bfb-video\b[^\'"]*(?P=q1)[^>]+ +                data-href=(?P<q2>[\'"])(?P<url>[^\'"]+)(?P=q2)''', webpage) +        if mobj is not None: +            return mobj.group('url') +      def _login(self):          (useremail, password) = self._get_login_info()          if useremail is None: diff --git a/youtube_dl/extractor/generic.py b/youtube_dl/extractor/generic.py index 9315b9e21..7212e0edd 100644 --- a/youtube_dl/extractor/generic.py +++ b/youtube_dl/extractor/generic.py @@ -66,6 +66,7 @@ from .theplatform import ThePlatformIE  from .vessel import VesselIE  from .kaltura import KalturaIE  from .eagleplatform import EaglePlatformIE +from .facebook import FacebookIE  class GenericIE(InfoExtractor): @@ -1260,6 +1261,24 @@ class GenericIE(InfoExtractor):                  'uploader': 'TheAtlantic',              },              'add_ie': ['BrightcoveLegacy'], +        }, +        # Facebook <iframe> embed +        { +            'url': 'https://www.hostblogger.de/blog/archives/6181-Auto-jagt-Betonmischer.html', +            'info_dict': { +                'id': '599637780109885', +                'ext': 'mp4', +                'title': 'Facebook video #599637780109885', +            }, +        }, +        # Facebook API embed +        { +            'url': 'http://www.lothype.com/blue-stars-2016-preview-standstill-full-show/', +            'info_dict': { +                'id': '10153467542406923', +                'ext': 'mp4', +                'title': 'Facebook video #10153467542406923', +            },          }      ] @@ -1759,10 +1778,9 @@ class GenericIE(InfoExtractor):              return self.url_result(mobj.group('url'))          # Look for embedded Facebook player -        mobj = re.search( -            r'<iframe[^>]+?src=(["\'])(?P<url>https://www\.facebook\.com/video/embed.+?)\1', webpage) -        if mobj is not None: -            return self.url_result(mobj.group('url'), 'Facebook') +        facebook_url = FacebookIE._extract_url(webpage) +        if facebook_url is not None: +            return self.url_result(facebook_url, 'Facebook')          # Look for embedded VK player          mobj = re.search(r'<iframe[^>]+?src=(["\'])(?P<url>https?://vk\.com/video_ext\.php.+?)\1', webpage) | 
