aboutsummaryrefslogtreecommitdiff
path: root/youtube_dl/extractor/soundcloud.py
diff options
context:
space:
mode:
Diffstat (limited to 'youtube_dl/extractor/soundcloud.py')
-rw-r--r--youtube_dl/extractor/soundcloud.py70
1 files changed, 49 insertions, 21 deletions
diff --git a/youtube_dl/extractor/soundcloud.py b/youtube_dl/extractor/soundcloud.py
index 194dabc71..3b7ecb3c3 100644
--- a/youtube_dl/extractor/soundcloud.py
+++ b/youtube_dl/extractor/soundcloud.py
@@ -1,4 +1,4 @@
-# encoding: utf-8
+# coding: utf-8
from __future__ import unicode_literals
import re
@@ -32,7 +32,7 @@ class SoundcloudIE(InfoExtractor):
_VALID_URL = r'''(?x)^(?:https?://)?
(?:(?:(?:www\.|m\.)?soundcloud\.com/
(?P<uploader>[\w\d-]+)/
- (?!(?:tracks|sets(?:/[^/?#]+)?|reposts|likes|spotlight)/?(?:$|[?#]))
+ (?!(?:tracks|sets(?:/.+?)?|reposts|likes|spotlight)/?(?:$|[?#]))
(?P<title>[\w\d-]+)/?
(?P<token>[^?]+?)?(?:[?].*)?$)
|(?:api\.soundcloud\.com/tracks/(?P<track_id>\d+)
@@ -53,6 +53,7 @@ class SoundcloudIE(InfoExtractor):
'uploader': 'E.T. ExTerrestrial Music',
'title': 'Lostin Powers - She so Heavy (SneakPreview) Adrian Ackers Blueprint 1',
'duration': 143,
+ 'license': 'all-rights-reserved',
}
},
# not streamable song
@@ -66,6 +67,7 @@ class SoundcloudIE(InfoExtractor):
'uploader': 'The Royal Concept',
'upload_date': '20120521',
'duration': 227,
+ 'license': 'all-rights-reserved',
},
'params': {
# rtmp
@@ -84,6 +86,7 @@ class SoundcloudIE(InfoExtractor):
'description': 'test chars: \"\'/\\ä↭',
'upload_date': '20131209',
'duration': 9,
+ 'license': 'all-rights-reserved',
},
},
# private link (alt format)
@@ -98,6 +101,7 @@ class SoundcloudIE(InfoExtractor):
'description': 'test chars: \"\'/\\ä↭',
'upload_date': '20131209',
'duration': 9,
+ 'license': 'all-rights-reserved',
},
},
# downloadable song
@@ -112,6 +116,7 @@ class SoundcloudIE(InfoExtractor):
'uploader': 'oddsamples',
'upload_date': '20140109',
'duration': 17,
+ 'license': 'cc-by-sa',
},
},
]
@@ -119,6 +124,12 @@ class SoundcloudIE(InfoExtractor):
_CLIENT_ID = '02gUJC0hH2ct1EGOcYXQIzRFU91c72Ea'
_IPHONE_CLIENT_ID = '376f225bf427445fc4bfb6b99b72e0bf'
+ @staticmethod
+ def _extract_urls(webpage):
+ return [m.group('url') for m in re.finditer(
+ r'<iframe[^>]+src=(["\'])(?P<url>(?:https?://)?(?:w\.)?soundcloud\.com/player.+?)\1',
+ webpage)]
+
def report_resolve(self, video_id):
"""Report information extraction."""
self.to_screen('%s: Resolving id' % video_id)
@@ -132,20 +143,20 @@ class SoundcloudIE(InfoExtractor):
name = full_title or track_id
if quiet:
self.report_extraction(name)
-
- thumbnail = info['artwork_url']
- if thumbnail is not None:
+ thumbnail = info.get('artwork_url')
+ if isinstance(thumbnail, compat_str):
thumbnail = thumbnail.replace('-large', '-t500x500')
ext = 'mp3'
result = {
'id': track_id,
- 'uploader': info['user']['username'],
- 'upload_date': unified_strdate(info['created_at']),
+ 'uploader': info.get('user', {}).get('username'),
+ 'upload_date': unified_strdate(info.get('created_at')),
'title': info['title'],
- 'description': info['description'],
+ 'description': info.get('description'),
'thumbnail': thumbnail,
'duration': int_or_none(info.get('duration'), 1000),
'webpage_url': info.get('permalink_url'),
+ 'license': info.get('license'),
}
formats = []
if info.get('downloadable', False):
@@ -215,7 +226,7 @@ class SoundcloudIE(InfoExtractor):
raise ExtractorError('Invalid URL: %s' % url)
track_id = mobj.group('track_id')
- token = None
+
if track_id is not None:
info_json_url = 'http://api.soundcloud.com/tracks/' + track_id + '.json?client_id=' + self._CLIENT_ID
full_title = track_id
@@ -249,7 +260,20 @@ class SoundcloudIE(InfoExtractor):
return self._extract_info_dict(info, full_title, secret_token=token)
-class SoundcloudSetIE(SoundcloudIE):
+class SoundcloudPlaylistBaseIE(SoundcloudIE):
+ @staticmethod
+ def _extract_id(e):
+ return compat_str(e['id']) if e.get('id') else None
+
+ def _extract_track_entries(self, tracks):
+ return [
+ self.url_result(
+ track['permalink_url'], SoundcloudIE.ie_key(),
+ video_id=self._extract_id(track))
+ for track in tracks if track.get('permalink_url')]
+
+
+class SoundcloudSetIE(SoundcloudPlaylistBaseIE):
_VALID_URL = r'https?://(?:(?:www|m)\.)?soundcloud\.com/(?P<uploader>[\w\d-]+)/sets/(?P<slug_title>[\w\d-]+)(?:/(?P<token>[^?/]+))?'
IE_NAME = 'soundcloud:set'
_TESTS = [{
@@ -259,6 +283,9 @@ class SoundcloudSetIE(SoundcloudIE):
'title': 'The Royal Concept EP',
},
'playlist_mincount': 6,
+ }, {
+ 'url': 'https://soundcloud.com/the-concept-band/sets/the-royal-concept-ep/token',
+ 'only_matching': True,
}]
def _real_extract(self, url):
@@ -285,7 +312,7 @@ class SoundcloudSetIE(SoundcloudIE):
msgs = (compat_str(err['error_message']) for err in info['errors'])
raise ExtractorError('unable to download video webpage: %s' % ','.join(msgs))
- entries = [self.url_result(track['permalink_url'], 'Soundcloud') for track in info['tracks']]
+ entries = self._extract_track_entries(info['tracks'])
return {
'_type': 'playlist',
@@ -295,7 +322,7 @@ class SoundcloudSetIE(SoundcloudIE):
}
-class SoundcloudUserIE(SoundcloudIE):
+class SoundcloudUserIE(SoundcloudPlaylistBaseIE):
_VALID_URL = r'''(?x)
https?://
(?:(?:www|m)\.)?soundcloud\.com/
@@ -312,21 +339,21 @@ class SoundcloudUserIE(SoundcloudIE):
'id': '114582580',
'title': 'The Akashic Chronicler (All)',
},
- 'playlist_mincount': 111,
+ 'playlist_mincount': 74,
}, {
'url': 'https://soundcloud.com/the-akashic-chronicler/tracks',
'info_dict': {
'id': '114582580',
'title': 'The Akashic Chronicler (Tracks)',
},
- 'playlist_mincount': 50,
+ 'playlist_mincount': 37,
}, {
'url': 'https://soundcloud.com/the-akashic-chronicler/sets',
'info_dict': {
'id': '114582580',
'title': 'The Akashic Chronicler (Playlists)',
},
- 'playlist_mincount': 3,
+ 'playlist_mincount': 2,
}, {
'url': 'https://soundcloud.com/the-akashic-chronicler/reposts',
'info_dict': {
@@ -345,7 +372,7 @@ class SoundcloudUserIE(SoundcloudIE):
'url': 'https://soundcloud.com/grynpyret/spotlight',
'info_dict': {
'id': '7098329',
- 'title': 'Grynpyret (Spotlight)',
+ 'title': 'GRYNPYRET (Spotlight)',
},
'playlist_mincount': 1,
}]
@@ -407,13 +434,14 @@ class SoundcloudUserIE(SoundcloudIE):
for cand in candidates:
if isinstance(cand, dict):
permalink_url = cand.get('permalink_url')
+ entry_id = self._extract_id(cand)
if permalink_url and permalink_url.startswith('http'):
- return permalink_url
+ return permalink_url, entry_id
for e in collection:
- permalink_url = resolve_permalink_url((e, e.get('track'), e.get('playlist')))
+ permalink_url, entry_id = resolve_permalink_url((e, e.get('track'), e.get('playlist')))
if permalink_url:
- entries.append(self.url_result(permalink_url))
+ entries.append(self.url_result(permalink_url, video_id=entry_id))
next_href = response.get('next_href')
if not next_href:
@@ -433,7 +461,7 @@ class SoundcloudUserIE(SoundcloudIE):
}
-class SoundcloudPlaylistIE(SoundcloudIE):
+class SoundcloudPlaylistIE(SoundcloudPlaylistBaseIE):
_VALID_URL = r'https?://api\.soundcloud\.com/playlists/(?P<id>[0-9]+)(?:/?\?secret_token=(?P<token>[^&]+?))?$'
IE_NAME = 'soundcloud:playlist'
_TESTS = [{
@@ -463,7 +491,7 @@ class SoundcloudPlaylistIE(SoundcloudIE):
data = self._download_json(
base_url + data, playlist_id, 'Downloading playlist')
- entries = [self.url_result(track['permalink_url'], 'Soundcloud') for track in data['tracks']]
+ entries = self._extract_track_entries(data['tracks'])
return {
'_type': 'playlist',