diff options
| -rw-r--r-- | youtube_dl/extractor/soundcloud.py | 93 | 
1 files changed, 25 insertions, 68 deletions
| diff --git a/youtube_dl/extractor/soundcloud.py b/youtube_dl/extractor/soundcloud.py index a1372d389..ff6be0b54 100644 --- a/youtube_dl/extractor/soundcloud.py +++ b/youtube_dl/extractor/soundcloud.py @@ -27,6 +27,7 @@ from ..utils import (      unified_timestamp,      update_url_query,      url_or_none, +    urlhandle_detect_ext,  ) @@ -96,7 +97,7 @@ class SoundcloudIE(InfoExtractor):                  'repost_count': int,              }          }, -        # not streamable song, preview +        # geo-restricted          {              'url': 'https://soundcloud.com/the-concept-band/goldrushed-mastered?in=the-concept-band/sets/the-royal-concept-ep',              'info_dict': { @@ -108,17 +109,13 @@ class SoundcloudIE(InfoExtractor):                  'uploader_id': '9615865',                  'timestamp': 1337635207,                  'upload_date': '20120521', -                'duration': 30, +                'duration': 227.155,                  'license': 'all-rights-reserved',                  'view_count': int,                  'like_count': int,                  'comment_count': int,                  'repost_count': int,              }, -            'params': { -                # rtmp -                'skip_download': True, -            },          },          # private link          { @@ -229,7 +226,6 @@ class SoundcloudIE(InfoExtractor):                  'skip_download': True,              },          }, -        # not available via api.soundcloud.com/i1/tracks/id/streams          {              'url': 'https://soundcloud.com/giovannisarani/mezzo-valzer',              'md5': 'e22aecd2bc88e0e4e432d7dcc0a1abf7', @@ -250,11 +246,9 @@ class SoundcloudIE(InfoExtractor):                  'comment_count': int,                  'repost_count': int,              }, -            'expected_warnings': ['Unable to download JSON metadata'],          }      ] -    _API_BASE = 'https://api.soundcloud.com/'      _API_V2_BASE = 'https://api-v2.soundcloud.com/'      _BASE_URL = 'https://soundcloud.com/'      _IMAGE_REPL_RE = r'-([0-9a-z]+)\.jpg' @@ -316,10 +310,9 @@ class SoundcloudIE(InfoExtractor):      def _resolv_url(cls, url):          return SoundcloudIE._API_V2_BASE + 'resolve?url=' + url -    def _extract_info_dict(self, info, full_title=None, secret_token=None, version=2): +    def _extract_info_dict(self, info, full_title=None, secret_token=None):          track_id = compat_str(info['id'])          title = info['title'] -        track_base_url = self._API_BASE + 'tracks/%s' % track_id          format_urls = set()          formats = [] @@ -328,21 +321,22 @@ class SoundcloudIE(InfoExtractor):              query['secret_token'] = secret_token          if info.get('downloadable') and info.get('has_downloads_left'): -            format_url = update_url_query( -                info.get('download_url') or track_base_url + '/download', query) -            format_urls.add(format_url) -            if version == 2: -                v1_info = self._download_json( -                    track_base_url, track_id, query=query, fatal=False) or {} -            else: -                v1_info = info -            formats.append({ -                'format_id': 'download', -                'ext': v1_info.get('original_format') or 'mp3', -                'filesize': int_or_none(v1_info.get('original_content_size')), -                'url': format_url, -                'preference': 10, -            }) +            download_url = update_url_query( +                self._API_V2_BASE + 'tracks/' + track_id + '/download', query) +            redirect_url = (self._download_json(download_url, track_id, fatal=False) or {}).get('redirectUri') +            if redirect_url: +                urlh = self._request_webpage( +                    HEADRequest(redirect_url), track_id, fatal=False) +                if urlh: +                    format_url = urlh.geturl() +                    format_urls.add(format_url) +                    formats.append({ +                        'format_id': 'download', +                        'ext': urlhandle_detect_ext(urlh) or 'mp3', +                        'filesize': int_or_none(urlh.headers.get('Content-Length')), +                        'url': format_url, +                        'preference': 10, +                    })          def invalid_url(url):              return not url or url in format_urls @@ -406,42 +400,11 @@ class SoundcloudIE(InfoExtractor):              }, 'http' if protocol == 'progressive' else protocol,                  t.get('snipped') or '/preview/' in format_url) -        if not formats: -            # Old API, does not work for some tracks (e.g. -            # https://soundcloud.com/giovannisarani/mezzo-valzer) -            # and might serve preview URLs (e.g. -            # http://www.soundcloud.com/snbrn/ele) -            format_dict = self._download_json( -                track_base_url + '/streams', track_id, -                'Downloading track url', query=query, fatal=False) or {} - -            for key, stream_url in format_dict.items(): -                if invalid_url(stream_url): -                    continue -                format_urls.add(stream_url) -                mobj = re.search(r'(http|hls)_([^_]+)_(\d+)_url', key) -                if mobj: -                    protocol, ext, abr = mobj.groups() -                    add_format({ -                        'abr': abr, -                        'ext': ext, -                        'url': stream_url, -                    }, protocol) - -        if not formats: -            # We fallback to the stream_url in the original info, this -            # cannot be always used, sometimes it can give an HTTP 404 error -            urlh = self._request_webpage( -                HEADRequest(info.get('stream_url') or track_base_url + '/stream'), -                track_id, query=query, fatal=False) -            if urlh: -                stream_url = urlh.geturl() -                if not invalid_url(stream_url): -                    add_format({'url': stream_url}, 'http') -          for f in formats:              f['vcodec'] = 'none' +        if not formats and info.get('policy') == 'BLOCK': +            self.raise_geo_restricted()          self._sort_formats(formats)          user = info.get('user') or {} @@ -511,16 +474,10 @@ class SoundcloudIE(InfoExtractor):                  resolve_title += '/%s' % token              info_json_url = self._resolv_url(self._BASE_URL + resolve_title) -        version = 2          info = self._download_json( -            info_json_url, full_title, 'Downloading info JSON', query=query, fatal=False) -        if not info: -            info = self._download_json( -                info_json_url.replace(self._API_V2_BASE, self._API_BASE), -                full_title, 'Downloading info JSON', query=query) -            version = 1 - -        return self._extract_info_dict(info, full_title, token, version) +            info_json_url, full_title, 'Downloading info JSON', query=query) + +        return self._extract_info_dict(info, full_title, token)  class SoundcloudPlaylistBaseIE(SoundcloudIE): | 
