diff options
Diffstat (limited to 'youtube_dl/extractor')
| -rw-r--r-- | youtube_dl/extractor/common.py | 6 | ||||
| -rw-r--r-- | youtube_dl/extractor/gorillavid.py | 7 | ||||
| -rw-r--r-- | youtube_dl/extractor/hostingbulk.py | 6 | ||||
| -rw-r--r-- | youtube_dl/extractor/played.py | 4 | ||||
| -rw-r--r-- | youtube_dl/extractor/primesharetv.py | 7 | ||||
| -rw-r--r-- | youtube_dl/extractor/promptfile.py | 5 | ||||
| -rw-r--r-- | youtube_dl/extractor/shared.py | 3 | ||||
| -rw-r--r-- | youtube_dl/extractor/twitch.py | 36 | ||||
| -rw-r--r-- | youtube_dl/extractor/vimeo.py | 6 | ||||
| -rw-r--r-- | youtube_dl/extractor/vk.py | 4 | ||||
| -rw-r--r-- | youtube_dl/extractor/vodlocker.py | 7 | 
11 files changed, 31 insertions, 60 deletions
diff --git a/youtube_dl/extractor/common.py b/youtube_dl/extractor/common.py index d859aea52..82f5de2d8 100644 --- a/youtube_dl/extractor/common.py +++ b/youtube_dl/extractor/common.py @@ -705,6 +705,12 @@ class InfoExtractor(object):          return self._html_search_meta('twitter:player', html,                                        'twitter card player') +    @staticmethod +    def _form_hidden_inputs(html): +        return dict(re.findall( +            r'<input\s+type="hidden"\s+name="([^"]+)"\s+(?:id="[^"]+"\s+)?value="([^"]*)"', +            html)) +      def _sort_formats(self, formats, field_preference=None):          if not formats:              raise ExtractorError('No video formats found') diff --git a/youtube_dl/extractor/gorillavid.py b/youtube_dl/extractor/gorillavid.py index 6147596e4..aabf07a20 100644 --- a/youtube_dl/extractor/gorillavid.py +++ b/youtube_dl/extractor/gorillavid.py @@ -78,12 +78,7 @@ class GorillaVidIE(InfoExtractor):          if re.search(self._FILE_NOT_FOUND_REGEX, webpage) is not None:              raise ExtractorError('Video %s does not exist' % video_id, expected=True) -        fields = dict(re.findall(r'''(?x)<input\s+ -            type="hidden"\s+ -            name="([^"]+)"\s+ -            (?:id="[^"]+"\s+)? -            value="([^"]*)" -            ''', webpage)) +        fields = self._form_hidden_inputs(webpage)          if fields['op'] == 'download1':              countdown = int_or_none(self._search_regex( diff --git a/youtube_dl/extractor/hostingbulk.py b/youtube_dl/extractor/hostingbulk.py index 704d0285d..63f579592 100644 --- a/youtube_dl/extractor/hostingbulk.py +++ b/youtube_dl/extractor/hostingbulk.py @@ -58,11 +58,7 @@ class HostingBulkIE(InfoExtractor):              r'<img src="([^"]+)".+?class="pic"',              webpage, 'thumbnail', fatal=False) -        fields = dict(re.findall(r'''(?x)<input\s+ -            type="hidden"\s+ -            name="([^"]+)"\s+ -            value="([^"]*)" -            ''', webpage)) +        fields = self._form_hidden_inputs(webpage)          request = compat_urllib_request.Request(url, urlencode_postdata(fields))          request.add_header('Content-type', 'application/x-www-form-urlencoded') diff --git a/youtube_dl/extractor/played.py b/youtube_dl/extractor/played.py index 45716c75d..9fe1524f2 100644 --- a/youtube_dl/extractor/played.py +++ b/youtube_dl/extractor/played.py @@ -38,9 +38,7 @@ class PlayedIE(InfoExtractor):          if m_error:              raise ExtractorError(m_error.group('msg'), expected=True) -        fields = re.findall( -            r'type="hidden" name="([^"]+)"\s+value="([^"]+)">', orig_webpage) -        data = dict(fields) +        data = self._form_hidden_inputs(orig_webpage)          self._sleep(2, video_id) diff --git a/youtube_dl/extractor/primesharetv.py b/youtube_dl/extractor/primesharetv.py index 01cc3d9ea..94c9fb2cb 100644 --- a/youtube_dl/extractor/primesharetv.py +++ b/youtube_dl/extractor/primesharetv.py @@ -31,12 +31,7 @@ class PrimeShareTVIE(InfoExtractor):          if '>File not exist<' in webpage:              raise ExtractorError('Video %s does not exist' % video_id, expected=True) -        fields = dict(re.findall(r'''(?x)<input\s+ -            type="hidden"\s+ -            name="([^"]+)"\s+ -            (?:id="[^"]+"\s+)? -            value="([^"]*)" -            ''', webpage)) +        fields = self._form_hidden_inputs(webpage)          headers = {              'Referer': url, diff --git a/youtube_dl/extractor/promptfile.py b/youtube_dl/extractor/promptfile.py index f536e6e6c..81a63c7fc 100644 --- a/youtube_dl/extractor/promptfile.py +++ b/youtube_dl/extractor/promptfile.py @@ -35,10 +35,7 @@ class PromptFileIE(InfoExtractor):              raise ExtractorError('Video %s does not exist' % video_id,                                   expected=True) -        fields = dict(re.findall(r'''(?x)type="hidden"\s+ -            name="(.+?)"\s+ -            value="(.*?)" -            ''', webpage)) +        fields = self._form_hidden_inputs(webpage)          post = compat_urllib_parse.urlencode(fields)          req = compat_urllib_request.Request(url, post)          req.add_header('Content-type', 'application/x-www-form-urlencoded') diff --git a/youtube_dl/extractor/shared.py b/youtube_dl/extractor/shared.py index 9f3e944e7..7fb68bc2d 100644 --- a/youtube_dl/extractor/shared.py +++ b/youtube_dl/extractor/shared.py @@ -35,8 +35,7 @@ class SharedIE(InfoExtractor):              raise ExtractorError(                  'Video %s does not exist' % video_id, expected=True) -        download_form = dict(re.findall( -            r'<input type="hidden" name="([^"]+)" value="([^"]*)"', webpage)) +        download_form = self._form_hidden_inputs(webpage)          request = compat_urllib_request.Request(              url, compat_urllib_parse.urlencode(download_form))          request.add_header('Content-Type', 'application/x-www-form-urlencoded') diff --git a/youtube_dl/extractor/twitch.py b/youtube_dl/extractor/twitch.py index b56ee2959..af2b798fb 100644 --- a/youtube_dl/extractor/twitch.py +++ b/youtube_dl/extractor/twitch.py @@ -22,8 +22,8 @@ class TwitchBaseIE(InfoExtractor):      _API_BASE = 'https://api.twitch.tv'      _USHER_BASE = 'http://usher.twitch.tv' -    _LOGIN_URL = 'https://secure.twitch.tv/user/login' -    _LOGIN_POST_URL = 'https://secure-login.twitch.tv/login' +    _LOGIN_URL = 'https://secure.twitch.tv/login' +    _LOGIN_POST_URL = 'https://passport.twitch.tv/authorize'      _NETRC_MACHINE = 'twitch'      def _handle_error(self, response): @@ -59,20 +59,12 @@ class TwitchBaseIE(InfoExtractor):          login_page = self._download_webpage(              self._LOGIN_URL, None, 'Downloading login page') -        authenticity_token = self._search_regex( -            r'<input name="authenticity_token" type="hidden" value="([^"]+)"', -            login_page, 'authenticity token') - -        login_form = { -            'utf8': '✓'.encode('utf-8'), -            'authenticity_token': authenticity_token, -            'redirect_on_login': '', -            'embed_form': 'false', -            'mp_source_action': 'login-button', -            'follow': '', -            'login': username, -            'password': password, -        } +        login_form = self._form_hidden_inputs(login_page) + +        login_form.update({ +            'login': username.encode('utf-8'), +            'password': password.encode('utf-8'), +        })          request = compat_urllib_request.Request(              self._LOGIN_POST_URL, compat_urllib_parse.urlencode(login_form).encode('utf-8')) @@ -80,11 +72,15 @@ class TwitchBaseIE(InfoExtractor):          response = self._download_webpage(              request, None, 'Logging in as %s' % username) -        m = re.search( -            r"id=([\"'])login_error_message\1[^>]*>(?P<msg>[^<]+)", response) -        if m: +        error_message = self._search_regex( +            r'<div[^>]+class="subwindow_notice"[^>]*>([^<]+)</div>', +            response, 'error message', default=None) +        if error_message:              raise ExtractorError( -                'Unable to login: %s' % m.group('msg').strip(), expected=True) +                'Unable to login. Twitch said: %s' % error_message, expected=True) + +        if '>Reset your password<' in response: +            self.report_warning('Twitch asks you to reset your password, go to https://secure.twitch.tv/reset/submit')      def _prefer_source(self, formats):          try: diff --git a/youtube_dl/extractor/vimeo.py b/youtube_dl/extractor/vimeo.py index cae90205d..d63c03183 100644 --- a/youtube_dl/extractor/vimeo.py +++ b/youtube_dl/extractor/vimeo.py @@ -452,11 +452,7 @@ class VimeoChannelIE(InfoExtractor):          password = self._downloader.params.get('videopassword', None)          if password is None:              raise ExtractorError('This album is protected by a password, use the --video-password option', expected=True) -        fields = dict(re.findall(r'''(?x)<input\s+ -            type="hidden"\s+ -            name="([^"]+)"\s+ -            value="([^"]*)" -            ''', login_form)) +        fields = self._form_hidden_inputs(login_form)          token = self._search_regex(r'xsrft[\s=:"\']+([^"\']+)', webpage, 'login token')          fields['token'] = token          fields['password'] = password diff --git a/youtube_dl/extractor/vk.py b/youtube_dl/extractor/vk.py index 23d153031..c0292095b 100644 --- a/youtube_dl/extractor/vk.py +++ b/youtube_dl/extractor/vk.py @@ -154,9 +154,7 @@ class VKIE(InfoExtractor):          login_page = self._download_webpage(              'https://vk.com', None, 'Downloading login page') -        login_form = dict(re.findall( -            r'<input\s+type="hidden"\s+name="([^"]+)"\s+(?:id="[^"]+"\s+)?value="([^"]*)"', -            login_page)) +        login_form = self._form_hidden_inputs(login_page)          login_form.update({              'email': username.encode('cp1251'), diff --git a/youtube_dl/extractor/vodlocker.py b/youtube_dl/extractor/vodlocker.py index 1c0966a79..431f4e2e3 100644 --- a/youtube_dl/extractor/vodlocker.py +++ b/youtube_dl/extractor/vodlocker.py @@ -28,12 +28,7 @@ class VodlockerIE(InfoExtractor):          video_id = self._match_id(url)          webpage = self._download_webpage(url, video_id) -        fields = dict(re.findall(r'''(?x)<input\s+ -            type="hidden"\s+ -            name="([^"]+)"\s+ -            (?:id="[^"]+"\s+)? -            value="([^"]*)" -            ''', webpage)) +        fields = self._form_hidden_inputs(webpage)          if fields['op'] == 'download1':              self._sleep(3, video_id)  # they do detect when requests happen too fast!  | 
