diff options
Diffstat (limited to 'youtube_dl/extractor/facebook.py')
| -rw-r--r-- | youtube_dl/extractor/facebook.py | 29 | 
1 files changed, 21 insertions, 8 deletions
diff --git a/youtube_dl/extractor/facebook.py b/youtube_dl/extractor/facebook.py index f0e575320..e17bb9aea 100644 --- a/youtube_dl/extractor/facebook.py +++ b/youtube_dl/extractor/facebook.py @@ -9,7 +9,7 @@ from ..compat import (      compat_http_client,      compat_str,      compat_urllib_error, -    compat_urllib_parse, +    compat_urllib_parse_unquote,      compat_urllib_request,  )  from ..utils import ( @@ -24,8 +24,12 @@ class FacebookIE(InfoExtractor):      _VALID_URL = r'''(?x)          https?://(?:\w+\.)?facebook\.com/          (?:[^#]*?\#!/)? -        (?:video/video\.php|photo\.php|video\.php|video/embed)\?(?:.*?) -        (?:v|video_id)=(?P<id>[0-9]+) +        (?: +            (?:video/video\.php|photo\.php|video\.php|video/embed)\?(?:.*?) +            (?:v|video_id)=| +            [^/]+/videos/(?:[^/]+/)? +        ) +        (?P<id>[0-9]+)          (?:.*)'''      _LOGIN_URL = 'https://www.facebook.com/login.php?next=http%3A%2F%2Ffacebook.com%2Fhome.php&login_attempt=1'      _CHECKPOINT_URL = 'https://www.facebook.com/checkpoint/?next=http%3A%2F%2Ffacebook.com%2Fhome.php&_fb_noscript=1' @@ -46,10 +50,19 @@ class FacebookIE(InfoExtractor):              'id': '274175099429670',              'ext': 'mp4',              'title': 'Facebook video #274175099429670', -        } +        }, +        'expected_warnings': [ +            'title' +        ]      }, {          'url': 'https://www.facebook.com/video.php?v=10204634152394104',          'only_matching': True, +    }, { +        'url': 'https://www.facebook.com/amogood/videos/1618742068337349/?fref=nf', +        'only_matching': True, +    }, { +        'url': 'https://www.facebook.com/ChristyClarkForBC/videos/vb.22819070941/10153870694020942/?type=2&theater', +        'only_matching': True,      }]      def _login(self): @@ -123,7 +136,7 @@ class FacebookIE(InfoExtractor):              else:                  raise ExtractorError('Cannot parse data')          data = dict(json.loads(m.group(1))) -        params_raw = compat_urllib_parse.unquote(data['params']) +        params_raw = compat_urllib_parse_unquote(data['params'])          params = json.loads(params_raw)          video_data = params['video_data'][0] @@ -139,12 +152,12 @@ class FacebookIE(InfoExtractor):              raise ExtractorError('Cannot find video formats')          video_title = self._html_search_regex( -            r'<h2 class="uiHeaderTitle">([^<]*)</h2>', webpage, 'title', -            fatal=False) +            r'<h2\s+[^>]*class="uiHeaderTitle"[^>]*>([^<]*)</h2>', webpage, 'title', +            default=None)          if not video_title:              video_title = self._html_search_regex(                  r'(?s)<span class="fbPhotosPhotoCaption".*?id="fbPhotoPageCaption"><span class="hasCaption">(.*?)</span>', -                webpage, 'alternative title', default=None) +                webpage, 'alternative title', fatal=False)              video_title = limit_length(video_title, 80)          if not video_title:              video_title = 'Facebook video #%s' % video_id  | 
