diff options
| author | Philipp Hagemeister <phihag@phihag.de> | 2014-08-24 01:31:35 +0200 | 
|---|---|---|
| committer | Philipp Hagemeister <phihag@phihag.de> | 2014-08-24 01:31:35 +0200 | 
| commit | d769be6c96ebca432a076670a0ed4e72aec101c1 (patch) | |
| tree | b48af039ea19d4e9bf990a042a45e41eb2f5c5b8 | |
| parent | 00558d94145f97c644e66ec086fa9b9d8c58280f (diff) | |
[grooveshark,http] Make HTTP POST downloads work
| -rw-r--r-- | youtube_dl/downloader/http.py | 12 | ||||
| -rw-r--r-- | youtube_dl/extractor/common.py | 6 | ||||
| -rw-r--r-- | youtube_dl/extractor/grooveshark.py | 65 | 
3 files changed, 42 insertions, 41 deletions
| diff --git a/youtube_dl/downloader/http.py b/youtube_dl/downloader/http.py index f79e6a995..d01d1897e 100644 --- a/youtube_dl/downloader/http.py +++ b/youtube_dl/downloader/http.py @@ -27,8 +27,16 @@ class HttpFD(FileDownloader):              headers['Youtubedl-user-agent'] = info_dict['user_agent']          if 'http_referer' in info_dict:              headers['Referer'] = info_dict['http_referer'] -        basic_request = compat_urllib_request.Request(url, None, headers) -        request = compat_urllib_request.Request(url, None, headers) +        add_headers = info_dict.get('http_headers') +        if add_headers: +            headers.update(add_headers) +        data = info_dict.get('http_post_data') +        http_method = info_dict.get('http_method') +        basic_request = compat_urllib_request.Request(url, data, headers) +        request = compat_urllib_request.Request(url, data, headers) +        if http_method is not None: +            basic_request.get_method = lambda: http_method +            request.get_method = lambda: http_method          is_test = self.params.get('test', False) diff --git a/youtube_dl/extractor/common.py b/youtube_dl/extractor/common.py index 2e6eeac08..9d85a538c 100644 --- a/youtube_dl/extractor/common.py +++ b/youtube_dl/extractor/common.py @@ -84,6 +84,12 @@ class InfoExtractor(object):                                   format, irrespective of the file format.                                   -1 for default (order by other properties),                                   -2 or smaller for less than default. +                    * http_referer  HTTP Referer header value to set. +                    * http_method  HTTP method to use for the download. +                    * http_headers  A dictionary of additional HTTP headers +                                 to add to the request. +                    * http_post_data  Additional data to send with a POST +                                 request.      url:            Final video URL.      ext:            Video filename extension.      format:         The video format, defaults to ext (used for --get-format) diff --git a/youtube_dl/extractor/grooveshark.py b/youtube_dl/extractor/grooveshark.py index 77c5a9172..1f2c65afe 100644 --- a/youtube_dl/extractor/grooveshark.py +++ b/youtube_dl/extractor/grooveshark.py @@ -12,7 +12,7 @@ from .common import InfoExtractor  from ..utils import ExtractorError, compat_urllib_request, compat_html_parser  from ..utils import compat_urlparse -urlparse = compat_urlparse.urlparse +  urlunparse = compat_urlparse.urlunparse  urldefrag = compat_urlparse.urldefrag @@ -52,7 +52,7 @@ class GroovesharkIE(InfoExtractor):              'id': '6SS1DW',              'title': 'Jolene (Tenth Key Remix ft. Will Sessions)',              'ext': 'mp3', -            'duration': 227 +            'duration': 227,          }      } @@ -60,7 +60,7 @@ class GroovesharkIE(InfoExtractor):      do_bootstrap_request = True      def _parse_target(self, target): -        uri = urlparse(target) +        uri = compat_urlparse.urlparse(target)          hash = uri.fragment[1:].split('?')[0]          token = basename(hash.rstrip('/'))          return (uri, hash, token) @@ -123,22 +123,36 @@ class GroovesharkIE(InfoExtractor):      def _get_playerpage(self, target):          (_, _, token) = self._parse_target(target) -        res = self._download_webpage( + +        webpage = self._download_webpage(              target, token,              note='Downloading player page',              errnote='Unable to download player page',              fatal=False) -        if res is not None: -            o = GroovesharkHtmlParser.extract_object_tags(res) -            return (res, [x for x in o if x['attrs']['id'] == 'jsPlayerEmbed']) +        if webpage is not None: +            # Search (for example German) error message +            error_msg = self._html_search_regex( +                r'<div id="content">\s*<h2>(.*?)</h2>', webpage, +                'error message', default=None) +            if error_msg is not None: +                error_msg = error_msg.replace('\n', ' ') +                raise ExtractorError('Grooveshark said: %s' % error_msg) -        return (res, None) +        if webpage is not None: +            o = GroovesharkHtmlParser.extract_object_tags(webpage) +            return (webpage, [x for x in o if x['attrs']['id'] == 'jsPlayerEmbed']) + +        return (webpage, None) + +    def _real_initialize(self): +        self.ts = int(time.time() * 1000)  # timestamp in millis      def _real_extract(self, url):          (target_uri, _, token) = self._parse_target(url)          # 1. Fill cookiejar by making a request to the player page +        swf_referer = None          if self.do_playerpage_request:              (_, player_objs) = self._get_playerpage(url)              if player_objs is not None: @@ -162,11 +176,10 @@ class GroovesharkIE(InfoExtractor):              'Content-Length': len(post_data),              'Content-Type': 'application/x-www-form-urlencoded'          } - -        if 'swf_referer' in locals(): +        if swf_referer is not None:              headers['Referer'] = swf_referer -        info_dict = { +        return {              'id': token,              'title': meta['song']['Name'],              'http_method': 'POST', @@ -174,32 +187,6 @@ class GroovesharkIE(InfoExtractor):              'ext': 'mp3',              'format': 'mp3 audio',              'duration': duration, - -            # various ways of supporting the download request. -            # remove keys unnecessary to the eventual post implementation -            'post_data': post_data, -            'post_dict': post_dict, -            'headers': headers +            'http_post_data': post_data, +            'http_headers': headers,          } - -        if 'swf_referer' in locals(): -            info_dict['http_referer'] = swf_referer - -        return info_dict - -    def _real_initialize(self): -        self.ts = int(time.time() * 1000)  # timestamp in millis - -    def _download_json(self, url_or_request, video_id, -                       note=u'Downloading JSON metadata', -                       errnote=u'Unable to download JSON metadata', -                       fatal=True, -                       transform_source=None): -        try: -            out = super(GroovesharkIE, self)._download_json( -                url_or_request, video_id, note, errnote, transform_source) -            return out -        except ExtractorError as ee: -            if fatal: -                raise ee -        return None | 
