diff options
| -rw-r--r-- | youtube_dl/extractor/cspan.py | 13 | ||||
| -rw-r--r-- | youtube_dl/extractor/facebook.py | 20 | 
2 files changed, 25 insertions, 8 deletions
diff --git a/youtube_dl/extractor/cspan.py b/youtube_dl/extractor/cspan.py index 7b685d157..b3ee67018 100644 --- a/youtube_dl/extractor/cspan.py +++ b/youtube_dl/extractor/cspan.py @@ -58,18 +58,23 @@ class CSpanIE(InfoExtractor):      def _real_extract(self, url):          video_id = self._match_id(url) +        video_type = None          webpage = self._download_webpage(url, video_id) -        matches = re.search(r'data-(prog|clip)id=\'([0-9]+)\'', webpage) -        if matches: +        # We first look for clipid, because clipprog always appears before +        patterns = [r'id=\'clip(%s)\'\s*value=\'([0-9]+)\'' % t for t in ('id', 'prog')] +        results = list(filter(None, (re.search(p, webpage) for p in patterns))) +        if results: +            matches = results[0]              video_type, video_id = matches.groups() -            if video_type == 'prog': -                video_type = 'program' +            video_type = 'clip' if video_type == 'id' else 'program'          else:              senate_isvp_url = SenateISVPIE._search_iframe_url(webpage)              if senate_isvp_url:                  title = self._og_search_title(webpage)                  surl = smuggle_url(senate_isvp_url, {'force_title': title})                  return self.url_result(surl, 'SenateISVP', video_id, title) +        if video_type is None or video_id is None: +            raise ExtractorError('unable to find video id and type')          def get_text_attr(d, attr):              return d.get(attr, {}).get('#text') diff --git a/youtube_dl/extractor/facebook.py b/youtube_dl/extractor/facebook.py index 39c481068..5e43f2359 100644 --- a/youtube_dl/extractor/facebook.py +++ b/youtube_dl/extractor/facebook.py @@ -74,7 +74,7 @@ class FacebookIE(InfoExtractor):              return          login_page_req = sanitized_Request(self._LOGIN_URL) -        login_page_req.add_header('Cookie', 'locale=en_US') +        self._set_cookie('facebook.com', 'locale', 'en_US')          login_page = self._download_webpage(login_page_req, None,                                              note='Downloading login page',                                              errnote='Unable to download login page') @@ -100,13 +100,25 @@ class FacebookIE(InfoExtractor):              login_results = self._download_webpage(request, None,                                                     note='Logging in', errnote='unable to fetch login page')              if re.search(r'<form(.*)name="login"(.*)</form>', login_results) is not None: +                error = self._html_search_regex( +                    r'(?s)<div[^>]+class=(["\']).*?login_error_box.*?\1[^>]*><div[^>]*>.*?</div><div[^>]*>(?P<error>.+?)</div>', +                    login_results, 'login error', default=None, group='error') +                if error: +                    raise ExtractorError('Unable to login: %s' % error, expected=True)                  self._downloader.report_warning('unable to log in: bad username/password, or exceded login rate limit (~3/min). Check credentials or wait.')                  return +            fb_dtsg = self._search_regex( +                r'name="fb_dtsg" value="(.+?)"', login_results, 'fb_dtsg', default=None) +            h = self._search_regex( +                r'name="h"\s+(?:\w+="[^"]+"\s+)*?value="([^"]+)"', login_results, 'h', default=None) + +            if not fb_dtsg or not h: +                return +              check_form = { -                'fb_dtsg': self._search_regex(r'name="fb_dtsg" value="(.+?)"', login_results, 'fb_dtsg'), -                'h': self._search_regex( -                    r'name="h"\s+(?:\w+="[^"]+"\s+)*?value="([^"]+)"', login_results, 'h'), +                'fb_dtsg': fb_dtsg, +                'h': h,                  'name_action_selected': 'dont_save',              }              check_req = sanitized_Request(self._CHECKPOINT_URL, urlencode_postdata(check_form))  | 
