aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorRemita Amine <remitamine@gmail.com>2019-10-27 17:52:46 +0100
committerRemita Amine <remitamine@gmail.com>2019-10-27 17:52:46 +0100
commit548c395716b1d5aa215e526fcb052a03926c1573 (patch)
tree221b0549320cda129b5b7d69534f28049bb6ac2a
parent0b98f3a7517601b7d2aabc789997016b9c3c24f2 (diff)
downloadyoutube-dl-548c395716b1d5aa215e526fcb052a03926c1573.tar.xz
[soundcloud] improve extraction
- improve format extraction(closes #22123) - extract uploader_id and uploader_url(closes #21916) - extract all known thumbnails(closes #19071)(closes #20659) - fix extration for private playlists(closes #20976) - add support for playlist embeds(#20976) - skip preview formats(closes #22806)
-rw-r--r--youtube_dl/extractor/extractors.py1
-rw-r--r--youtube_dl/extractor/generic.py6
-rw-r--r--youtube_dl/extractor/soundcloud.py499
3 files changed, 249 insertions, 257 deletions
diff --git a/youtube_dl/extractor/extractors.py b/youtube_dl/extractor/extractors.py
index a8fe0de1a..388c1ebe6 100644
--- a/youtube_dl/extractor/extractors.py
+++ b/youtube_dl/extractor/extractors.py
@@ -1033,6 +1033,7 @@ from .snotr import SnotrIE
from .sohu import SohuIE
from .sonyliv import SonyLIVIE
from .soundcloud import (
+ SoundcloudEmbedIE,
SoundcloudIE,
SoundcloudSetIE,
SoundcloudUserIE,
diff --git a/youtube_dl/extractor/generic.py b/youtube_dl/extractor/generic.py
index f66cae0eb..1c0780e98 100644
--- a/youtube_dl/extractor/generic.py
+++ b/youtube_dl/extractor/generic.py
@@ -80,7 +80,7 @@ from .theplatform import ThePlatformIE
from .kaltura import KalturaIE
from .eagleplatform import EaglePlatformIE
from .facebook import FacebookIE
-from .soundcloud import SoundcloudIE
+from .soundcloud import SoundcloudEmbedIE
from .tunein import TuneInBaseIE
from .vbox7 import Vbox7IE
from .dbtv import DBTVIE
@@ -2749,9 +2749,9 @@ class GenericIE(InfoExtractor):
return self.url_result(myvi_url)
# Look for embedded soundcloud player
- soundcloud_urls = SoundcloudIE._extract_urls(webpage)
+ soundcloud_urls = SoundcloudEmbedIE._extract_urls(webpage)
if soundcloud_urls:
- return self.playlist_from_matches(soundcloud_urls, video_id, video_title, getter=unescapeHTML, ie=SoundcloudIE.ie_key())
+ return self.playlist_from_matches(soundcloud_urls, video_id, video_title, getter=unescapeHTML)
# Look for tunein player
tunein_urls = TuneInBaseIE._extract_urls(webpage)
diff --git a/youtube_dl/extractor/soundcloud.py b/youtube_dl/extractor/soundcloud.py
index 05538f3d6..875b9d887 100644
--- a/youtube_dl/extractor/soundcloud.py
+++ b/youtube_dl/extractor/soundcloud.py
@@ -11,14 +11,13 @@ from .common import (
from ..compat import (
compat_str,
compat_urlparse,
- compat_urllib_parse_urlencode,
)
from ..utils import (
ExtractorError,
float_or_none,
+ HEADRequest,
int_or_none,
KNOWN_EXTENSIONS,
- merge_dicts,
mimetype2ext,
str_or_none,
try_get,
@@ -28,6 +27,20 @@ from ..utils import (
)
+class SoundcloudEmbedIE(InfoExtractor):
+ _VALID_URL = r'https?://(?:w|player|p)\.soundcloud\.com/player/?.*?url=(?P<id>.*)'
+
+ @staticmethod
+ def _extract_urls(webpage):
+ return [m.group('url') for m in re.finditer(
+ r'<iframe[^>]+src=(["\'])(?P<url>(?:https?://)?(?:w\.)?soundcloud\.com/player.+?)\1',
+ webpage)]
+
+ def _real_extract(self, url):
+ return self.url_result(compat_urlparse.parse_qs(
+ compat_urlparse.urlparse(url).query)['url'][0])
+
+
class SoundcloudIE(InfoExtractor):
"""Information extractor for soundcloud.com
To access the media, the uid of the song and a stream token
@@ -44,9 +57,8 @@ class SoundcloudIE(InfoExtractor):
(?!(?:tracks|albums|sets(?:/.+?)?|reposts|likes|spotlight)/?(?:$|[?#]))
(?P<title>[\w\d-]+)/?
(?P<token>[^?]+?)?(?:[?].*)?$)
- |(?:api\.soundcloud\.com/tracks/(?P<track_id>\d+)
+ |(?:api(?:-v2)?\.soundcloud\.com/tracks/(?P<track_id>\d+)
(?:/?\?secret_token=(?P<secret_token>[^&]+))?)
- |(?P<player>(?:w|player|p.)\.soundcloud\.com/player/?.*?url=.*)
)
'''
IE_NAME = 'soundcloud'
@@ -60,6 +72,7 @@ class SoundcloudIE(InfoExtractor):
'title': 'Lostin Powers - She so Heavy (SneakPreview) Adrian Ackers Blueprint 1',
'description': 'No Downloads untill we record the finished version this weekend, i was too pumped n i had to post it , earl is prolly gonna b hella p.o\'d',
'uploader': 'E.T. ExTerrestrial Music',
+ 'uploader_id': '1571244',
'timestamp': 1349920598,
'upload_date': '20121011',
'duration': 143.216,
@@ -79,6 +92,7 @@ class SoundcloudIE(InfoExtractor):
'title': 'Goldrushed',
'description': 'From Stockholm Sweden\r\nPovel / Magnus / Filip / David\r\nwww.theroyalconcept.com',
'uploader': 'The Royal Concept',
+ 'uploader_id': '9615865',
'timestamp': 1337635207,
'upload_date': '20120521',
'duration': 30,
@@ -92,6 +106,7 @@ class SoundcloudIE(InfoExtractor):
# rtmp
'skip_download': True,
},
+ 'skip': 'Preview',
},
# private link
{
@@ -103,6 +118,7 @@ class SoundcloudIE(InfoExtractor):
'title': 'Youtube - Dl Test Video \'\' Ä↭',
'description': 'test chars: \"\'/\\ä↭',
'uploader': 'jaimeMF',
+ 'uploader_id': '69767071',
'timestamp': 1386604920,
'upload_date': '20131209',
'duration': 9.927,
@@ -123,6 +139,7 @@ class SoundcloudIE(InfoExtractor):
'title': 'Youtube - Dl Test Video \'\' Ä↭',
'description': 'test chars: \"\'/\\ä↭',
'uploader': 'jaimeMF',
+ 'uploader_id': '69767071',
'timestamp': 1386604920,
'upload_date': '20131209',
'duration': 9.927,
@@ -143,6 +160,7 @@ class SoundcloudIE(InfoExtractor):
'title': 'Bus Brakes',
'description': 'md5:0053ca6396e8d2fd7b7e1595ef12ab66',
'uploader': 'oddsamples',
+ 'uploader_id': '73680509',
'timestamp': 1389232924,
'upload_date': '20140109',
'duration': 17.346,
@@ -163,6 +181,7 @@ class SoundcloudIE(InfoExtractor):
'title': 'Uplifting Only 238 [No Talking] (incl. Alex Feed Guestmix) (Aug 31, 2017) [wav]',
'description': 'md5:fa20ee0fca76a3d6df8c7e57f3715366',
'uploader': 'Ori Uplift Music',
+ 'uploader_id': '12563093',
'timestamp': 1504206263,
'upload_date': '20170831',
'duration': 7449.096,
@@ -183,6 +202,7 @@ class SoundcloudIE(InfoExtractor):
'title': 'Sideways (Prod. Mad Real)',
'description': 'md5:d41d8cd98f00b204e9800998ecf8427e',
'uploader': 'garyvee',
+ 'uploader_id': '2366352',
'timestamp': 1488152409,
'upload_date': '20170226',
'duration': 207.012,
@@ -207,6 +227,7 @@ class SoundcloudIE(InfoExtractor):
'title': 'Mezzo Valzer',
'description': 'md5:4138d582f81866a530317bae316e8b61',
'uploader': 'Giovanni Sarani',
+ 'uploader_id': '3352531',
'timestamp': 1551394171,
'upload_date': '20190228',
'duration': 180.157,
@@ -221,114 +242,81 @@ class SoundcloudIE(InfoExtractor):
}
]
+ _API_BASE = 'https://api.soundcloud.com/'
+ _API_V2_BASE = 'https://api-v2.soundcloud.com/'
+ _BASE_URL = 'https://soundcloud.com/'
_CLIENT_ID = 'BeGVhOrGmfboy1LtiHTQF6Ejpt9ULJCI'
-
- @staticmethod
- def _extract_urls(webpage):
- return [m.group('url') for m in re.finditer(
- r'<iframe[^>]+src=(["\'])(?P<url>(?:https?://)?(?:w\.)?soundcloud\.com/player.+?)\1',
- webpage)]
+ _IMAGE_REPL_RE = r'-([0-9a-z]+)\.jpg'
+
+ _ARTWORK_MAP = {
+ 'mini': 16,
+ 'tiny': 20,
+ 'small': 32,
+ 'badge': 47,
+ 't67x67': 67,
+ 'large': 100,
+ 't300x300': 300,
+ 'crop': 400,
+ 't500x500': 500,
+ 'original': 0,
+ }
@classmethod
def _resolv_url(cls, url):
- return 'https://api.soundcloud.com/resolve.json?url=' + url + '&client_id=' + cls._CLIENT_ID
+ return SoundcloudIE._API_V2_BASE + 'resolve?url=' + url + '&client_id=' + cls._CLIENT_ID
- def _extract_info_dict(self, info, full_title=None, quiet=False, secret_token=None):
+ def _extract_info_dict(self, info, full_title=None, secret_token=None, version=2):
track_id = compat_str(info['id'])
title = info['title']
- name = full_title or track_id
- if quiet:
- self.report_extraction(name)
- thumbnail = info.get('artwork_url') or info.get('user', {}).get('avatar_url')
- if isinstance(thumbnail, compat_str):
- thumbnail = thumbnail.replace('-large', '-t500x500')
- username = try_get(info, lambda x: x['user']['username'], compat_str)
-
- def extract_count(key):
- return int_or_none(info.get('%s_count' % key))
-
- like_count = extract_count('favoritings')
- if like_count is None:
- like_count = extract_count('likes')
-
- result = {
- 'id': track_id,
- 'uploader': username,
- 'timestamp': unified_timestamp(info.get('created_at')),
- 'title': title,
- 'description': info.get('description'),
- 'thumbnail': thumbnail,
- 'duration': float_or_none(info.get('duration'), 1000),
- 'webpage_url': info.get('permalink_url'),
- 'license': info.get('license'),
- 'view_count': extract_count('playback'),
- 'like_count': like_count,
- 'comment_count': extract_count('comment'),
- 'repost_count': extract_count('reposts'),
- 'genre': info.get('genre'),
- }
+ track_base_url = self._API_BASE + 'tracks/%s' % track_id
format_urls = set()
formats = []
query = {'client_id': self._CLIENT_ID}
- if secret_token is not None:
+ if secret_token:
query['secret_token'] = secret_token
- if info.get('downloadable', False):
- # We can build a direct link to the song
+
+ if info.get('downloadable'):
format_url = update_url_query(
- 'https://api.soundcloud.com/tracks/%s/download' % track_id, query)
+ info.get('download_url') or track_base_url + '/download', query)
format_urls.add(format_url)
+ if version == 2:
+ v1_info = self._download_json(
+ track_base_url, track_id, query=query, fatal=False) or {}
+ else:
+ v1_info = info
formats.append({
'format_id': 'download',
- 'ext': info.get('original_format', 'mp3'),
+ 'ext': v1_info.get('original_format') or 'mp3',
+ 'filesize': int_or_none(v1_info.get('original_content_size')),
'url': format_url,
- 'vcodec': 'none',
'preference': 10,
})
- # Old API, does not work for some tracks (e.g.
- # https://soundcloud.com/giovannisarani/mezzo-valzer)
- format_dict = self._download_json(
- 'https://api.soundcloud.com/i1/tracks/%s/streams' % track_id,
- track_id, 'Downloading track url', query=query, fatal=False)
-
- if format_dict:
- for key, stream_url in format_dict.items():
- if stream_url in format_urls:
- continue
- format_urls.add(stream_url)
- ext, abr = 'mp3', None
- mobj = re.search(r'_([^_]+)_(\d+)_url', key)
- if mobj:
- ext, abr = mobj.groups()
- abr = int(abr)
- if key.startswith('http'):
- stream_formats = [{
- 'format_id': key,
- 'ext': ext,
- 'url': stream_url,
- }]
- elif key.startswith('rtmp'):
- # The url doesn't have an rtmp app, we have to extract the playpath
- url, path = stream_url.split('mp3:', 1)
- stream_formats = [{
- 'format_id': key,
- 'url': url,
- 'play_path': 'mp3:' + path,
- 'ext': 'flv',
- }]
- elif key.startswith('hls'):
- stream_formats = self._extract_m3u8_formats(
- stream_url, track_id, ext, entry_protocol='m3u8_native',
- m3u8_id=key, fatal=False)
- else:
- continue
-
- if abr:
- for f in stream_formats:
- f['abr'] = abr
+ def invalid_url(url):
+ return not url or url in format_urls or re.search(r'/(?:preview|playlist)/0/30/', url)
- formats.extend(stream_formats)
+ def add_format(f, protocol):
+ mobj = re.search(r'\.(?P<abr>\d+)\.(?P<ext>[0-9a-z]{3,4})(?=[/?])', stream_url)
+ if mobj:
+ for k, v in mobj.groupdict().items():
+ if not f.get(k):
+ f[k] = v
+ format_id_list = []
+ if protocol:
+ format_id_list.append(protocol)
+ for k in ('ext', 'abr'):
+ v = f.get(k)
+ if v:
+ format_id_list.append(v)
+ abr = f.get('abr')
+ if abr:
+ f['abr'] = int(abr)
+ f.update({
+ 'format_id': '_'.join(format_id_list),
+ 'protocol': 'm3u8_native' if protocol == 'hls' else 'http',
+ })
+ formats.append(f)
# New API
transcodings = try_get(
@@ -337,129 +325,165 @@ class SoundcloudIE(InfoExtractor):
if not isinstance(t, dict):
continue
format_url = url_or_none(t.get('url'))
- if not format_url:
+ if not format_url or t.get('snipped') or '/preview/' in format_url:
continue
stream = self._download_json(
- update_url_query(format_url, query), track_id, fatal=False)
+ format_url, track_id, query=query, fatal=False)
if not isinstance(stream, dict):
continue
stream_url = url_or_none(stream.get('url'))
- if not stream_url:
- continue
- if stream_url in format_urls:
+ if invalid_url(stream_url):
continue
format_urls.add(stream_url)
- protocol = try_get(t, lambda x: x['format']['protocol'], compat_str)
+ stream_format = t.get('format') or {}
+ protocol = stream_format.get('protocol')
if protocol != 'hls' and '/hls' in format_url:
protocol = 'hls'
ext = None
preset = str_or_none(t.get('preset'))
if preset:
ext = preset.split('_')[0]
- if ext not in KNOWN_EXTENSIONS:
- mimetype = try_get(
- t, lambda x: x['format']['mime_type'], compat_str)
- ext = mimetype2ext(mimetype) or 'mp3'
- format_id_list = []
- if protocol:
- format_id_list.append(protocol)
- format_id_list.append(ext)
- format_id = '_'.join(format_id_list)
- formats.append({
+ if ext not in KNOWN_EXTENSIONS:
+ ext = mimetype2ext(stream_format.get('mime_type'))
+ add_format({
'url': stream_url,
- 'format_id': format_id,
'ext': ext,
- 'protocol': 'm3u8_native' if protocol == 'hls' else 'http',
- })
+ }, 'http' if protocol == 'progressive' else protocol)
+
+ if not formats:
+ # Old API, does not work for some tracks (e.g.
+ # https://soundcloud.com/giovannisarani/mezzo-valzer)
+ # and might serve preview URLs (e.g.
+ # http://www.soundcloud.com/snbrn/ele)
+ format_dict = self._download_json(
+ track_base_url + '/streams', track_id,
+ 'Downloading track url', query=query, fatal=False) or {}
+
+ for key, stream_url in format_dict.items():
+ if invalid_url(stream_url):
+ continue
+ format_urls.add(stream_url)
+ mobj = re.search(r'(http|hls)_([^_]+)_(\d+)_url', key)
+ if mobj:
+ protocol, ext, abr = mobj.groups()
+ add_format({
+ 'abr': abr,
+ 'ext': ext,
+ 'url': stream_url,
+ }, protocol)
if not formats:
# We fallback to the stream_url in the original info, this
# cannot be always used, sometimes it can give an HTTP 404 error
- formats.append({
- 'format_id': 'fallback',
- 'url': update_url_query(info['stream_url'], query),
- 'ext': 'mp3',
- })
- self._check_formats(formats, track_id)
+ urlh = self._request_webpage(
+ HEADRequest(info.get('stream_url') or track_base_url + '/stream'),
+ track_id, query=query, fatal=False)
+ if urlh:
+ stream_url = urlh.geturl()
+ if not invalid_url(stream_url):
+ add_format({'url': stream_url}, 'http')
for f in formats:
f['vcodec'] = 'none'
self._sort_formats(formats)
- result['formats'] = formats
- return result
+ user = info.get('user') or {}
+
+ thumbnails = []
+ artwork_url = info.get('artwork_url')
+ thumbnail = artwork_url or user.get('avatar_url')
+ if isinstance(thumbnail, compat_str):
+ if re.search(self._IMAGE_REPL_RE, thumbnail):
+ for image_id, size in self._ARTWORK_MAP.items():
+ i = {
+ 'id': image_id,
+ 'url': re.sub(self._IMAGE_REPL_RE, '-%s.jpg' % image_id, thumbnail),
+ }
+ if image_id == 'tiny' and not artwork_url:
+ size = 18
+ elif image_id == 'original':
+ i['preference'] = 10
+ if size:
+ i.update({
+ 'width': size,
+ 'height': size,
+ })
+ thumbnails.append(i)
+ else:
+ thumbnails = [{'url': thumbnail}]
+
+ def extract_count(key):
+ return int_or_none(info.get('%s_count' % key))
+
+ return {
+ 'id': track_id,
+ 'uploader': user.get('username'),
+ 'uploader_id': str_or_none(user.get('id')) or user.get('permalink'),
+ 'uploader_url': user.get('permalink_url'),
+ 'timestamp': unified_timestamp(info.get('created_at')),
+ 'title': title,
+ 'description': info.get('description'),
+ 'thumbnails': thumbnails,
+ 'duration': float_or_none(info.get('duration'), 1000),
+ 'webpage_url': info.get('permalink_url'),
+ 'license': info.get('license'),
+ 'view_count': extract_count('playback'),
+ 'like_count': extract_count('favoritings') or extract_count('likes'),
+ 'comment_count': extract_count('comment'),
+ 'repost_count': extract_count('reposts'),
+ 'genre': info.get('genre'),
+ 'formats': formats
+ }
def _real_extract(self, url):
- mobj = re.match(self._VALID_URL, url, flags=re.VERBOSE)
- if mobj is None:
- raise ExtractorError('Invalid URL: %s' % url)
+ mobj = re.match(self._VALID_URL, url)
track_id = mobj.group('track_id')
- new_info = {}
- if track_id is not None:
- info_json_url = 'https://api.soundcloud.com/tracks/' + track_id + '.json?client_id=' + self._CLIENT_ID
+ query = {
+ 'client_id': self._CLIENT_ID,
+ }
+ if track_id:
+ info_json_url = self._API_V2_BASE + 'tracks/' + track_id
full_title = track_id
token = mobj.group('secret_token')
if token:
- info_json_url += '&secret_token=' + token
- elif mobj.group('player'):
- query = compat_urlparse.parse_qs(compat_urlparse.urlparse(url).query)
- real_url = query['url'][0]
- # If the token is in the query of the original url we have to
- # manually add it
- if 'secret_token' in query:
- real_url += '?secret_token=' + query['secret_token'][0]
- return self.url_result(real_url)
+ query['secret_token'] = token
else:
- # extract uploader (which is in the url)
- uploader = mobj.group('uploader')
- # extract simple title (uploader + slug of song title)
- slug_title = mobj.group('title')
+ full_title = resolve_title = '%s/%s' % mobj.group('uploader', 'title')
token = mobj.group('token')
- full_title = resolve_title = '%s/%s' % (uploader, slug_title)
if token:
resolve_title += '/%s' % token
+ info_json_url = self._resolv_url(self._BASE_URL + resolve_title)
- webpage = self._download_webpage(url, full_title, fatal=False)
- if webpage:
- entries = self._parse_json(
- self._search_regex(
- r'var\s+c\s*=\s*(\[.+?\])\s*,\s*o\s*=Date\b', webpage,
- 'data', default='[]'), full_title, fatal=False)
- if entries:
- for e in entries:
- if not isinstance(e, dict):
- continue
- if e.get('id') != 67:
- continue
- data = try_get(e, lambda x: x['data'][0], dict)
- if data:
- new_info = data
- break
- info_json_url = self._resolv_url(
- 'https://soundcloud.com/%s' % resolve_title)
-
- # Contains some additional info missing from new_info
+ version = 2
info = self._download_json(
- info_json_url, full_title, 'Downloading info JSON')
+ info_json_url, full_title, 'Downloading info JSON', query=query, fatal=False)
+ if not info:
+ info = self._download_json(
+ info_json_url.replace(self._API_V2_BASE, self._API_BASE),
+ full_title, 'Downloading info JSON', query=query)
+ version = 1
- return self._extract_info_dict(
- merge_dicts(info, new_info), full_title, secret_token=token)
+ return self._extract_info_dict(info, full_title, token, version)
class SoundcloudPlaylistBaseIE(SoundcloudIE):
- @staticmethod
- def _extract_id(e):
- return compat_str(e['id']) if e.get('id') else None
-
- def _extract_track_entries(self, tracks):
- return [
- self.url_result(
- track['permalink_url'], SoundcloudIE.ie_key(),
- video_id=self._extract_id(track))
- for track in tracks if track.get('permalink_url')]
+ def _extract_track_entries(self, tracks, token=None):
+ entries = []
+ for track in tracks:
+ track_id = str_or_none(track.get('id'))
+ url = track.get('permalink_url')
+ if not url:
+ if not track_id:
+ continue
+ url = self._API_V2_BASE + 'tracks/' + track_id
+ if token:
+ url += '?secret_token=' + token
+ entries.append(self.url_result(
+ url, SoundcloudIE.ie_key(), track_id))
+ return entries
class SoundcloudSetIE(SoundcloudPlaylistBaseIE):
@@ -480,41 +504,28 @@ class SoundcloudSetIE(SoundcloudPlaylistBaseIE):
def _real_extract(self, url):
mobj = re.match(self._VALID_URL, url)
- # extract uploader (which is in the url)
- uploader = mobj.group('uploader')
- # extract simple title (uploader + slug of song title)
- slug_title = mobj.group('slug_title')
- full_title = '%s/sets/%s' % (uploader, slug_title)
- url = 'https://soundcloud.com/%s/sets/%s' % (uploader, slug_title)
-
+ full_title = '%s/sets/%s' % mobj.group('uploader', 'slug_title')
token = mobj.group('token')
if token:
full_title += '/' + token
- url += '/' + token
- resolv_url = self._resolv_url(url)
- info = self._download_json(resolv_url, full_title)
+ info = self._download_json(self._resolv_url(
+ self._BASE_URL + full_title), full_title)
if 'errors' in info:
msgs = (compat_str(err['error_message']) for err in info['errors'])
raise ExtractorError('unable to download video webpage: %s' % ','.join(msgs))
- entries = self._extract_track_entries(info['tracks'])
+ entries = self._extract_track_entries(info['tracks'], token)
- return {
- '_type': 'playlist',
- 'entries': entries,
- 'id': '%s' % info['id'],
- 'title': info['title'],
- }
+ return self.playlist_result(
+ entries, str_or_none(info.get('id')), info.get('title'))
class SoundcloudPagedPlaylistBaseIE(SoundcloudPlaylistBaseIE):
- _API_V2_BASE = 'https://api-v2.soundcloud.com'
-
def _extract_playlist(self, base_url, playlist_id, playlist_title):
COMMON_QUERY = {
- 'limit': 50,
+ 'limit': 2000000000,
'client_id': self._CLIENT_ID,
'linked_partitioning': '1',
}
@@ -522,12 +533,13 @@ class SoundcloudPagedPlaylistBaseIE(SoundcloudPlaylistBaseIE):
query = COMMON_QUERY.copy()
query['offset'] = 0
- next_href = base_url + '?' + compat_urllib_parse_urlencode(query)
+ next_href = base_url
entries = []
for i in itertools.count():
response = self._download_json(
- next_href, playlist_id, 'Downloading track page %s' % (i + 1))
+ next_href, playlist_id,
+ 'Downloading track page %s' % (i + 1), query=query)
collection = response['collection']
@@ -546,9 +558,8 @@ class SoundcloudPagedPlaylistBaseIE(SoundcloudPlaylistBaseIE):
continue
return self.url_result(
permalink_url,
- ie=SoundcloudIE.ie_key() if SoundcloudIE.suitable(permalink_url) else None,
- video_id=self._extract_id(cand),
- video_title=cand.get('title'))
+ SoundcloudIE.ie_key() if SoundcloudIE.suitable(permalink_url) else None,
+ str_or_none(cand.get('id')), cand.get('title'))
for e in collection:
entry = resolve_entry((e, e.get('track'), e.get('playlist')))
@@ -559,11 +570,10 @@ class SoundcloudPagedPlaylistBaseIE(SoundcloudPlaylistBaseIE):
if not next_href:
break
- parsed_next_href = compat_urlparse.urlparse(response['next_href'])
- qs = compat_urlparse.parse_qs(parsed_next_href.query)
- qs.update(COMMON_QUERY)
- next_href = compat_urlparse.urlunparse(
- parsed_next_href._replace(query=compat_urllib_parse_urlencode(qs, True)))
+ next_href = response['next_href']
+ parsed_next_href = compat_urlparse.urlparse(next_href)
+ query = compat_urlparse.parse_qs(parsed_next_href.query)
+ query.update(COMMON_QUERY)
return {
'_type': 'playlist',
@@ -609,7 +619,7 @@ class SoundcloudUserIE(SoundcloudPagedPlaylistBaseIE):
'url': 'https://soundcloud.com/jcv246/sets',
'info_dict': {
'id': '12982173',
- 'title': 'Jordi / cv (Playlists)',
+ 'title': 'Jordi / cv (Sets)',
},
'playlist_mincount': 2,
}, {
@@ -636,39 +646,29 @@ class SoundcloudUserIE(SoundcloudPagedPlaylistBaseIE):
}]
_BASE_URL_MAP = {
- 'all': '%s/stream/users/%%s' % SoundcloudPagedPlaylistBaseIE._API_V2_BASE,
- 'tracks': '%s/users/%%s/tracks' % SoundcloudPagedPlaylistBaseIE._API_V2_BASE,
- 'albums': '%s/users/%%s/albums' % SoundcloudPagedPlaylistBaseIE._API_V2_BASE,
- 'sets': '%s/users/%%s/playlists' % SoundcloudPagedPlaylistBaseIE._API_V2_BASE,
- 'reposts': '%s/stream/users/%%s/reposts' % SoundcloudPagedPlaylistBaseIE._API_V2_BASE,
- 'likes': '%s/users/%%s/likes' % SoundcloudPagedPlaylistBaseIE._API_V2_BASE,
- 'spotlight': '%s/users/%%s/spotlight' % SoundcloudPagedPlaylistBaseIE._API_V2_BASE,
- }
-
- _TITLE_MAP = {
- 'all': 'All',
- 'tracks': 'Tracks',
- 'albums': 'Albums',
- 'sets': 'Playlists',
- 'reposts': 'Reposts',
- 'likes': 'Likes',
- 'spotlight': 'Spotlight',
+ 'all': 'stream/users/%s',
+ 'tracks': 'users/%s/tracks',
+ 'albums': 'users/%s/albums',
+ 'sets': 'users/%s/playlists',
+ 'reposts': 'stream/users/%s/reposts',
+ 'likes': 'users/%s/likes',
+ 'spotlight': 'users/%s/spotlight',
}
def _real_extract(self, url):
mobj = re.match(self._VALID_URL, url)
uploader = mobj.group('user')
- url = 'https://soundcloud.com/%s/' % uploader
- resolv_url = self._resolv_url(url)
user = self._download_json(
- resolv_url, uploader, 'Downloading user info')
+ self._resolv_url(self._BASE_URL + uploader),
+ uploader, 'Downloading user info')
resource = mobj.group('rsrc') or 'all'
return self._extract_playlist(
- self._BASE_URL_MAP[resource] % user['id'], compat_str(user['id']),
- '%s (%s)' % (user['username'], self._TITLE_MAP[resource]))
+ self._API_V2_BASE + self._BASE_URL_MAP[resource] % user['id'],
+ str_or_none(user.get('id')),
+ '%s (%s)' % (user['username'], resource.capitalize()))
class SoundcloudTrackStationIE(SoundcloudPagedPlaylistBaseIE):
@@ -678,7 +678,7 @@ class SoundcloudTrackStationIE(SoundcloudPagedPlaylistBaseIE):
'url': 'https://soundcloud.com/stations/track/officialsundial/your-text',
'info_dict': {
'id': '286017854',
- 'title': 'Track station: your-text',
+ 'title': 'Track station: your text',
},
'playlist_mincount': 47,
}]
@@ -686,19 +686,17 @@ class SoundcloudTrackStationIE(SoundcloudPagedPlaylistBaseIE):
def _real_extract(self, url):
track_name = self._match_id(url)
- webpage = self._download_webpage(url, track_name)
-
+ track = self._download_json(self._resolv_url(url), track_name)
track_id = self._search_regex(
- r'soundcloud:track-stations:(\d+)', webpage, 'track id')
+ r'soundcloud:track-stations:(\d+)', track['id'], 'track id')
return self._extract_playlist(
- '%s/stations/soundcloud:track-stations:%s/tracks'
- % (self._API_V2_BASE, track_id),
- track_id, 'Track station: %s' % track_name)
+ self._API_V2_BASE + 'stations/%s/tracks' % track['id'],
+ track_id, 'Track station: %s' % track['title'])
class SoundcloudPlaylistIE(SoundcloudPlaylistBaseIE):
- _VALID_URL = r'https?://api\.soundcloud\.com/playlists/(?P<id>[0-9]+)(?:/?\?secret_token=(?P<token>[^&]+?))?$'
+ _VALID_URL = r'https?://api(?:-v2)?\.soundcloud\.com/playlists/(?P<id>[0-9]+)(?:/?\?secret_token=(?P<token>[^&]+?))?$'
IE_NAME = 'soundcloud:playlist'
_TESTS = [{
'url': 'https://api.soundcloud.com/playlists/4110309',
@@ -713,29 +711,22 @@ class SoundcloudPlaylistIE(SoundcloudPlaylistBaseIE):
def _real_extract(self, url):
mobj = re.match(self._VALID_URL, url)
playlist_id = mobj.group('id')
- base_url = '%s//api.soundcloud.com/playlists/%s.json?' % (self.http_scheme(), playlist_id)
- data_dict = {
+ query = {
'client_id': self._CLIENT_ID,
}
token = mobj.group('token')
-
if token:
- data_dict['secret_token'] = token
+ query['secret_token'] = token
- data = compat_urllib_parse_urlencode(data_dict)
data = self._download_json(
- base_url + data, playlist_id, 'Downloading playlist')
+ self._API_V2_BASE + 'playlists/' + playlist_id,
+ playlist_id, 'Downloading playlist', query=query)
- entries = self._extract_track_entries(data['tracks'])
+ entries = self._extract_track_entries(data['tracks'], token)
- return {
- '_type': 'playlist',
- 'id': playlist_id,
- 'title': data.get('title'),
- 'description': data.get('description'),
- 'entries': entries,
- }
+ return self.playlist_result(
+ entries, playlist_id, data.get('title'), data.get('description'))
class SoundcloudSearchIE(SearchInfoExtractor, SoundcloudIE):
@@ -753,18 +744,18 @@ class SoundcloudSearchIE(SearchInfoExtractor, SoundcloudIE):
_SEARCH_KEY = 'scsearch'
_MAX_RESULTS_PER_PAGE = 200
_DEFAULT_RESULTS_PER_PAGE = 50
- _API_V2_BASE = 'https://api-v2.soundcloud.com'
def _get_collection(self, endpoint, collection_id, **query):
limit = min(
query.get('limit', self._DEFAULT_RESULTS_PER_PAGE),
self._MAX_RESULTS_PER_PAGE)
- query['limit'] = limit
- query['client_id'] = self._CLIENT_ID
- query['linked_partitioning'] = '1'
- query['offset'] = 0
- data = compat_urllib_parse_urlencode(query)
- next_url = '{0}{1}?{2}'.format(self._API_V2_BASE, endpoint, data)
+ query.update({
+ 'limit': limit,
+ 'client_id': self._CLIENT_ID,
+ 'linked_partitioning': 1,
+ 'offset': 0,
+ })
+ next_url = update_url_query(self._API_V2_BASE + endpoint, query)
collected_results = 0
@@ -791,5 +782,5 @@ class SoundcloudSearchIE(SearchInfoExtractor, SoundcloudIE):
break
def _get_n_results(self, query, n):
- tracks = self._get_collection('/search/tracks', query, limit=n, q=query)
+ tracks = self._get_collection('search/tracks', query, limit=n, q=query)
return self.playlist_result(tracks, playlist_title=query)