aboutsummaryrefslogtreecommitdiff
path: root/youtube_dl/extractor
diff options
context:
space:
mode:
authorPhilipp Hagemeister <phihag@phihag.de>2015-01-10 02:03:46 +0100
committerPhilipp Hagemeister <phihag@phihag.de>2015-01-10 02:03:46 +0100
commit51897bb77c504ad206abbef5ae7504fcd082b5b0 (patch)
treeb03d759663326139290e701ceea0bc6734928723 /youtube_dl/extractor
parentbd1a281ede1548ae0f0c74360eb518751c2a4526 (diff)
parentff0813313ac33b6abd03e9322b706dc83c5aeb14 (diff)
Merge remote-tracking branch 'xavierbeynon/master'
Diffstat (limited to 'youtube_dl/extractor')
-rw-r--r--youtube_dl/extractor/__init__.py2
-rw-r--r--youtube_dl/extractor/audiomack.py104
2 files changed, 81 insertions, 25 deletions
diff --git a/youtube_dl/extractor/__init__.py b/youtube_dl/extractor/__init__.py
index 5da7568ca..f544e87f1 100644
--- a/youtube_dl/extractor/__init__.py
+++ b/youtube_dl/extractor/__init__.py
@@ -26,7 +26,7 @@ from .arte import (
ArteTVEmbedIE,
)
from .atresplayer import AtresPlayerIE
-from .audiomack import AudiomackIE
+from .audiomack import AudiomackIE, AudiomackAlbumIE
from .auengine import AUEngineIE
from .azubu import AzubuIE
from .bambuser import BambuserIE, BambuserChannelIE
diff --git a/youtube_dl/extractor/audiomack.py b/youtube_dl/extractor/audiomack.py
index 622b20989..73cde78d3 100644
--- a/youtube_dl/extractor/audiomack.py
+++ b/youtube_dl/extractor/audiomack.py
@@ -17,12 +17,13 @@ class AudiomackIE(InfoExtractor):
'url': 'http://www.audiomack.com/song/roosh-williams/extraordinary',
'info_dict':
{
- 'id': 'roosh-williams/extraordinary',
+ 'id': '310086',
'ext': 'mp3',
- 'title': 'Roosh Williams - Extraordinary'
+ 'artist': 'Roosh Williams',
+ 'title': 'Extraordinary'
}
},
- # hosted on soundcloud via audiomack
+ # audiomack wrapper around soundcloud song
{
'add_ie': ['Soundcloud'],
'url': 'http://www.audiomack.com/song/xclusiveszone/take-kare',
@@ -37,33 +38,88 @@ class AudiomackIE(InfoExtractor):
},
]
+ @staticmethod
+ def create_song_dictionary(api_response, album_url_tag, track_no=0):
+ # All keys are the same in audiomack api and InfoExtractor format
+ entry = {key: api_response[key] for key in ['title', 'artist', 'id', 'url'] if key in api_response}
+ # Fudge values in the face of missing metadata
+ if 'id' not in entry:
+ entry['id'] = track_no
+ if 'title' not in entry:
+ entry['title'] = album_url_tag
+ return entry
+
def _real_extract(self, url):
- video_id = self._match_id(url)
+ # URLs end with [uploader name]/[uploader title]
+ # this title is whatever the user types in, and is rarely
+ # the proper song title. Real metadata is in the api response
+ album_url_tag = self._match_id(url)
+ # Request the extended version of the api for extra fields like artist and title
api_response = self._download_json(
- "http://www.audiomack.com/api/music/url/song/%s?_=%d" % (
- video_id, time.time()),
- video_id)
+ 'http://www.audiomack.com/api/music/url/song/%s?extended=1&_=%d' % (
+ album_url_tag, time.time()),
+ album_url_tag)
- if "url" not in api_response:
- raise ExtractorError("Unable to deduce api url of song")
- realurl = api_response["url"]
+ # API is inconsistent with errors
+ if 'url' not in api_response or not api_response['url'] or 'error' in api_response:
+ raise ExtractorError('Invalid url %s', url)
# Audiomack wraps a lot of soundcloud tracks in their branded wrapper
- # - if so, pass the work off to the soundcloud extractor
- if SoundcloudIE.suitable(realurl):
- return {'_type': 'url', 'url': realurl, 'ie_key': 'Soundcloud'}
+ # if so, pass the work off to the soundcloud extractor
+ if SoundcloudIE.suitable(api_response['url']):
+ return {'_type': 'url', 'url': api_response['url'], 'ie_key': 'Soundcloud'}
+
+ return self.create_song_dictionary(api_response, album_url_tag)
- webpage = self._download_webpage(url, video_id)
- artist = self._html_search_regex(
- r'<span class="artist">(.*?)</span>', webpage, "artist")
- songtitle = self._html_search_regex(
- r'<h1 class="profile-title song-title"><span class="artist">.*?</span>(.*?)</h1>',
- webpage, "title")
- title = artist + " - " + songtitle
- return {
- 'id': video_id,
- 'title': title,
- 'url': realurl,
+class AudiomackAlbumIE(InfoExtractor):
+ _VALID_URL = r'https?://(?:www\.)?audiomack\.com/album/(?P<id>[\w/-]+)'
+ IE_NAME = 'audiomack:album'
+ _TESTS = [
+ # Standard album playlist
+ {
+ 'url': 'http://www.audiomack.com/album/flytunezcom/tha-tour-part-2-mixtape',
+ 'playlist_count': 15,
+ 'info_dict':
+ {
+ 'id': '812251',
+ 'title': 'Tha Tour: Part 2 (Official Mixtape)'
+ }
+ },
+ # Album playlist ripped from fakeshoredrive with no metadata
+ {
+ 'url': 'http://www.audiomack.com/album/fakeshoredrive/ppp-pistol-p-project',
+ 'playlist_count': 10
}
+ ]
+
+ def _real_extract(self, url):
+ # URLs end with [uploader name]/[uploader title]
+ # this title is whatever the user types in, and is rarely
+ # the proper song title. Real metadata is in the api response
+ album_url_tag = self._match_id(url)
+ result = {'_type': 'playlist', 'entries': []}
+ # There is no one endpoint for album metadata - instead it is included/repeated in each song's metadata
+ # Therefore we don't know how many songs the album has and must infi-loop until failure
+ track_no = 0
+ while True:
+ # Get song's metadata
+ api_response = self._download_json('http://www.audiomack.com/api/music/url/album/%s/%d?extended=1&_=%d'
+ % (album_url_tag, track_no, time.time()), album_url_tag)
+
+ # Total failure, only occurs when url is totally wrong
+ # Won't happen in middle of valid playlist (next case)
+ if 'url' not in api_response or 'error' in api_response:
+ raise ExtractorError('Invalid url for track %d of album url %s' % (track_no, url))
+ # URL is good but song id doesn't exist - usually means end of playlist
+ elif not api_response['url']:
+ break
+ else:
+ # Pull out the album metadata and add to result (if it exists)
+ for resultkey, apikey in [('id', 'album_id'), ('title', 'album_title')]:
+ if apikey in api_response and resultkey not in result:
+ result[resultkey] = api_response[apikey]
+ result['entries'].append(AudiomackIE.create_song_dictionary(api_response, album_url_tag, track_no))
+ track_no += 1
+ return result