diff options
| -rw-r--r-- | youtube_dl/extractor/common.py | 30 | ||||
| -rw-r--r-- | youtube_dl/extractor/youtube.py | 69 | 
2 files changed, 58 insertions, 41 deletions
| diff --git a/youtube_dl/extractor/common.py b/youtube_dl/extractor/common.py index 92a0c5050..534908a2b 100644 --- a/youtube_dl/extractor/common.py +++ b/youtube_dl/extractor/common.py @@ -154,27 +154,38 @@ class InfoExtractor(object):      def IE_NAME(self):          return type(self).__name__[:-2] -    def _request_webpage(self, url_or_request, video_id, note=None, errnote=None): +    def _request_webpage(self, url_or_request, video_id, note=None, errnote=None, fatal=True):          """ Returns the response handle """          if note is None:              self.report_download_webpage(video_id)          elif note is not False: -            self.to_screen(u'%s: %s' % (video_id, note)) +            if video_id is None: +                self.to_screen(u'%s' % (note,)) +            else: +                self.to_screen(u'%s: %s' % (video_id, note))          try:              return self._downloader.urlopen(url_or_request)          except (compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error) as err:              if errnote is None:                  errnote = u'Unable to download webpage' -            raise ExtractorError(u'%s: %s' % (errnote, compat_str(err)), sys.exc_info()[2], cause=err) +            errmsg = u'%s: %s' % (errnote, compat_str(err)) +            if fatal: +                raise ExtractorError(errmsg, sys.exc_info()[2], cause=err) +            else: +                self._downloader.report_warning(errmsg) +                return False -    def _download_webpage_handle(self, url_or_request, video_id, note=None, errnote=None): +    def _download_webpage_handle(self, url_or_request, video_id, note=None, errnote=None, fatal=True):          """ Returns a tuple (page content as string, URL handle) """          # Strip hashes from the URL (#1038)          if isinstance(url_or_request, (compat_str, str)):              url_or_request = url_or_request.partition('#')[0] -        urlh = self._request_webpage(url_or_request, video_id, note, errnote) +        urlh = self._request_webpage(url_or_request, video_id, note, errnote, fatal) +        if urlh is False: +            assert not fatal +            return False          content_type = urlh.headers.get('Content-Type', '')          webpage_bytes = urlh.read()          m = re.match(r'[a-zA-Z0-9_.-]+/[a-zA-Z0-9_.-]+\s*;\s*charset=(.+)', content_type) @@ -209,9 +220,14 @@ class InfoExtractor(object):          content = webpage_bytes.decode(encoding, 'replace')          return (content, urlh) -    def _download_webpage(self, url_or_request, video_id, note=None, errnote=None): +    def _download_webpage(self, url_or_request, video_id, note=None, errnote=None, fatal=True):          """ Returns the data of the page as a string """ -        return self._download_webpage_handle(url_or_request, video_id, note, errnote)[0] +        res = self._download_webpage_handle(url_or_request, video_id, note, errnote, fatal) +        if res is False: +            return res +        else: +            content, _ = res +            return content      def _download_xml(self, url_or_request, video_id,                        note=u'Downloading XML', errnote=u'Unable to download XML'): diff --git a/youtube_dl/extractor/youtube.py b/youtube_dl/extractor/youtube.py index 1d211c450..7f7508c74 100644 --- a/youtube_dl/extractor/youtube.py +++ b/youtube_dl/extractor/youtube.py @@ -42,19 +42,11 @@ class YoutubeBaseInfoExtractor(InfoExtractor):      # If True it will raise an error if no login info is provided      _LOGIN_REQUIRED = False -    def report_lang(self): -        """Report attempt to set language.""" -        self.to_screen(u'Setting language') -      def _set_language(self): -        request = compat_urllib_request.Request(self._LANG_URL) -        try: -            self.report_lang() -            self._download_webpage(self._LANG_URL, None, False) -        except ExtractorError as err: -            self._downloader.report_warning(u'unable to set language: %s' % compat_str(err.cause)) -            return False -        return True +        return bool(self._download_webpage( +            self._LANG_URL, None, +            note=u'Setting language', errnote='unable to set language', +            fatal=False))      def _login(self):          (username, password) = self._get_login_info() @@ -64,8 +56,12 @@ class YoutubeBaseInfoExtractor(InfoExtractor):                  raise ExtractorError(u'No login info available, needed for using %s.' % self.IE_NAME, expected=True)              return False -        login_page = self._download_webpage(self._LOGIN_URL, None, False, -            u'Unable to fetch login page') +        login_page = self._download_webpage( +            self._LOGIN_URL, None, +            note=u'Downloading login page', +            errnote=u'unable to fetch login page', fatal=False) +        if login_page is False: +            return          galx = self._search_regex(r'(?s)<input.+?name="GALX".+?value="(.+?)"',                                    login_page, u'Login GALX parameter') @@ -95,26 +91,28 @@ class YoutubeBaseInfoExtractor(InfoExtractor):          # chokes on unicode          login_form = dict((k.encode('utf-8'), v.encode('utf-8')) for k,v in login_form_strs.items())          login_data = compat_urllib_parse.urlencode(login_form).encode('ascii') -        request = compat_urllib_request.Request(self._LOGIN_URL, login_data) -        try: -            self.report_login() -            login_results = self._download_webpage(request, None, False) -            if re.search(r'(?i)<form[^>]* id="gaia_loginform"', login_results) is not None: -                self._downloader.report_warning(u'unable to log in: bad username or password') -                return False -        except ExtractorError as err: -            self._downloader.report_warning(u'unable to log in: %s' % compat_str(err.cause)) + +        req = compat_urllib_request.Request(self._LOGIN_URL, login_data) +        login_results = self._download_webpage( +            req, None, +            note=u'Logging in', errnote=u'unable to log in', fatal=False) +        if login_results is False: +            return False +        if re.search(r'(?i)<form[^>]* id="gaia_loginform"', login_results) is not None: +            self._downloader.report_warning(u'unable to log in: bad username or password')              return False          return True      def _confirm_age(self):          age_form = { -                'next_url':     '/', -                'action_confirm':   'Confirm', -                } -        request = compat_urllib_request.Request(self._AGE_URL, compat_urllib_parse.urlencode(age_form)) -        self.report_age_confirmation() -        self._download_webpage(request, None, False, u'Unable to confirm age') +            'next_url': '/', +            'action_confirm': 'Confirm', +        } +        req = compat_urllib_request.Request(self._AGE_URL, compat_urllib_parse.urlencode(age_form)) + +        self._download_webpage( +            req, None, +            note=u'Confirming age', errnote=u'Unable to confirm age')          return True      def _real_initialize(self): @@ -1736,11 +1734,14 @@ class YoutubeSearchIE(SearchInfoExtractor):          while (50 * pagenum) < limit:              result_url = self._API_URL % (compat_urllib_parse.quote_plus(query), (50*pagenum)+1) -            data = self._download_webpage(result_url, u'query "%s"' % query, -                u'Downloading page %s' % pagenum, u'Unable to download API page') -            api_response = json.loads(data)['data'] - -            if not 'items' in api_response: +            data_json = self._download_webpage( +                result_url, video_id=u'query "%s"' % query, +                note=u'Downloading page %s' % (pagenum + 1), +                errnote=u'Unable to download API page') +            data = json.loads(data_json) +            api_response = data['data'] + +            if 'items' not in api_response:                  raise ExtractorError(u'[youtube] No video results')              new_ids = list(video['id'] for video in api_response['items']) | 
