diff options
| author | Philipp Hagemeister <phihag@phihag.de> | 2015-01-22 12:04:07 +0100 | 
|---|---|---|
| committer | Philipp Hagemeister <phihag@phihag.de> | 2015-01-22 12:04:13 +0100 | 
| commit | b55ee18ff3a9642fe25a977e1152472877294493 (patch) | |
| tree | 7b7134729d44552b4156b6e904067e6e20b33ddc | |
| parent | e5763a7a7e630cfd94b8993a9592cb3243890e0c (diff) | |
[hearthisat] Add support for more high-quality download links
| -rw-r--r-- | youtube_dl/extractor/hearthisat.py | 49 | ||||
| -rw-r--r-- | youtube_dl/utils.py | 8 | 
2 files changed, 45 insertions, 12 deletions
| diff --git a/youtube_dl/extractor/hearthisat.py b/youtube_dl/extractor/hearthisat.py index c1177a9c5..d83072c1a 100644 --- a/youtube_dl/extractor/hearthisat.py +++ b/youtube_dl/extractor/hearthisat.py @@ -4,10 +4,15 @@ from __future__ import unicode_literals  import re  from .common import InfoExtractor -from ..compat import compat_urllib_request +from ..compat import ( +    compat_urllib_request, +    compat_urlparse, +)  from ..utils import ( +    HEADRequest,      str_to_int,      urlencode_postdata, +    urlhandle_detect_ext,  ) @@ -16,10 +21,10 @@ class HearThisAtIE(InfoExtractor):      _PLAYLIST_URL = 'https://hearthis.at/playlist.php'      _TEST = {          'url': 'https://hearthis.at/moofi/dr-kreep', -        'md5': 'd594c573227a89f4256f0b03e68c80cc', +        'md5': 'ab6ec33c8fed6556029337c7885eb4e0',          'info_dict': {              'id': '150939', -            'ext': 'mp3', +            'ext': 'wav',              'title': 'Moofi - Dr. Kreep',              'thumbnail': 're:^https?://.*\.jpg$',              'timestamp': 1421564134, @@ -67,18 +72,38 @@ class HearThisAtIE(InfoExtractor):          timestamp = str_to_int(self._search_regex(              r'<span[^>]+class="calctime"[^>]+data-time="(\d+)', webpage, 'timestamp', fatal=False)) -        track_url = self._search_regex( -            r'<a[^>]+data-mp3="([^"]+)"', webpage, 'track URL') - -        formats = [{ -            'format_id': 'mp3', -            'url': track_url, -            'vcodec': 'none', -        }] +        formats = [] +        mp3_url = self._search_regex( +            r'(?s)<a class="player-link"\s+(?:[a-zA-Z0-9_:-]+="[^"]+"\s+)*?data-mp3="([^"]+)"', +            webpage, 'title', fatal=False) +        if mp3_url: +            formats.append({ +                'format_id': 'mp3', +                'vcodec': 'none', +                'acodec': 'mp3', +                'url': mp3_url, +            }) +        download_path = self._search_regex( +            r'<a class="[^"]*download_fct[^"]*"\s+href="([^"]+)"', +            webpage, 'download URL', default=None) +        if download_path: +            download_url = compat_urlparse.urljoin(url, download_path) +            ext_req = HEADRequest(download_url) +            ext_handle = self._request_webpage( +                ext_req, display_id, note='Determining extension') +            ext = urlhandle_detect_ext(ext_handle) +            formats.append({ +                'format_id': 'download', +                'vcodec': 'none', +                'ext': ext, +                'url': download_url, +                'preference': 2,  # Usually better quality +            }) +        self._sort_formats(formats)          return {              'id': track_id, -            'display-id': display_id, +            'display_id': display_id,              'title': title,              'formats': formats,              'thumbnail': thumbnail, diff --git a/youtube_dl/utils.py b/youtube_dl/utils.py index 7832ed87f..764474c33 100644 --- a/youtube_dl/utils.py +++ b/youtube_dl/utils.py @@ -1612,6 +1612,14 @@ def urlhandle_detect_ext(url_handle):      except AttributeError:  # Python < 3          getheader = url_handle.info().getheader +    cd = getheader('Content-Disposition') +    if cd: +        m = re.match(r'attachment;\s*filename="(?P<filename>[^"]+)"', cd) +        if m: +            e = determine_ext(m.group('filename'), default_ext=None) +            if e: +                return e +      return getheader('Content-Type').split("/")[1] | 
