aboutsummaryrefslogtreecommitdiff
path: root/youtube_dl/extractor
diff options
context:
space:
mode:
authorYour Name <you@example.com>2015-01-02 02:20:04 -0600
committerYour Name <you@example.com>2015-01-02 02:20:04 -0600
commitdefaf19f5de839777cda1654d42b54850a78d8a7 (patch)
treeba24a2deac61cc6edb7b2812ba2eaa69db51aff6 /youtube_dl/extractor
parenta1e9e6440f60f82b8d8552053219e1e8e3149c5a (diff)
Push api updates to simplify audiomack, add support for albums
Diffstat (limited to 'youtube_dl/extractor')
-rw-r--r--youtube_dl/extractor/__init__.py2
-rw-r--r--youtube_dl/extractor/audiomack.py110
2 files changed, 84 insertions, 28 deletions
diff --git a/youtube_dl/extractor/__init__.py b/youtube_dl/extractor/__init__.py
index 6c5827f88..e4c51f238 100644
--- a/youtube_dl/extractor/__init__.py
+++ b/youtube_dl/extractor/__init__.py
@@ -26,7 +26,7 @@ from .arte import (
ArteTVEmbedIE,
)
from .atresplayer import AtresPlayerIE
-from .audiomack import AudiomackIE
+from .audiomack import AudiomackIE, AudiomackAlbumIE
from .auengine import AUEngineIE
from .azubu import AzubuIE
from .bambuser import BambuserIE, BambuserChannelIE
diff --git a/youtube_dl/extractor/audiomack.py b/youtube_dl/extractor/audiomack.py
index 622b20989..b6f3039c7 100644
--- a/youtube_dl/extractor/audiomack.py
+++ b/youtube_dl/extractor/audiomack.py
@@ -9,20 +9,21 @@ import time
class AudiomackIE(InfoExtractor):
- _VALID_URL = r'https?://(?:www\.)?audiomack\.com/song/(?P<id>[\w/-]+)'
+ _VALID_URL = r'https?://(?:www\.)?audiomack\.com/(song)/(?P<id>[\w/-]+)'
IE_NAME = 'audiomack'
_TESTS = [
- # hosted on audiomack
+ # audiomack
{
'url': 'http://www.audiomack.com/song/roosh-williams/extraordinary',
'info_dict':
{
- 'id': 'roosh-williams/extraordinary',
- 'ext': 'mp3',
- 'title': 'Roosh Williams - Extraordinary'
+ 'id': '310086',
+ "ext": "mp3",
+ "artist": "Roosh Williams",
+ 'title': 'Extraordinary'
}
},
- # hosted on soundcloud via audiomack
+ # audiomack through soundcloud
{
'add_ie': ['Soundcloud'],
'url': 'http://www.audiomack.com/song/xclusiveszone/take-kare',
@@ -37,33 +38,88 @@ class AudiomackIE(InfoExtractor):
},
]
+ @staticmethod
+ def create_song_dictionary(api_response, album_url_tag, track_no=0):
+ # All keys are the same in audiomack api and InfoExtractor format
+ entry = {key: api_response[key] for key in ["title", "artist", "id", "url"] if key in api_response}
+ # Fudge values in the face of missing metadata
+ if "id" not in entry:
+ entry["id"] = track_no
+ if "title" not in entry:
+ entry["title"] = album_url_tag
+ return entry
+
def _real_extract(self, url):
- video_id = self._match_id(url)
+ # URLs end with [uploader name]/[uploader title]
+ # this title is whatever the user types in, and is rarely
+ # the proper song title. Real metadata is in the api response
+ album_url_tag = self._match_id(url)
+ # Request the extended version of the api for extra fields like artist and title
api_response = self._download_json(
- "http://www.audiomack.com/api/music/url/song/%s?_=%d" % (
- video_id, time.time()),
- video_id)
+ "http://www.audiomack.com/api/music/url/song/%s?extended=1&_=%d" % (
+ album_url_tag, time.time()),
+ album_url_tag)
- if "url" not in api_response:
- raise ExtractorError("Unable to deduce api url of song")
- realurl = api_response["url"]
+ # API is inconsistent with errors
+ if "url" not in api_response or not api_response["url"] or "error" in api_response:
+ raise ExtractorError("Invalid url %s", url)
# Audiomack wraps a lot of soundcloud tracks in their branded wrapper
- # - if so, pass the work off to the soundcloud extractor
- if SoundcloudIE.suitable(realurl):
- return {'_type': 'url', 'url': realurl, 'ie_key': 'Soundcloud'}
+ # if so, pass the work off to the soundcloud extractor
+ if SoundcloudIE.suitable(api_response["url"]):
+ return {'_type': 'url', 'url': api_response["url"], 'ie_key': 'Soundcloud'}
- webpage = self._download_webpage(url, video_id)
- artist = self._html_search_regex(
- r'<span class="artist">(.*?)</span>', webpage, "artist")
- songtitle = self._html_search_regex(
- r'<h1 class="profile-title song-title"><span class="artist">.*?</span>(.*?)</h1>',
- webpage, "title")
- title = artist + " - " + songtitle
+ return self.create_song_dictionary(api_response, album_url_tag)
- return {
- 'id': video_id,
- 'title': title,
- 'url': realurl,
+
+class AudiomackAlbumIE(InfoExtractor):
+ _VALID_URL = r'https?://(?:www\.)?audiomack\.com/album/(?P<id>[\w/-]+)'
+ IE_NAME = 'audiomack:album'
+ _TESTS = [
+ # Standard album playlist
+ {
+ 'url': 'http://www.audiomack.com/album/flytunezcom/tha-tour-part-2-mixtape',
+ "playlist_count": 15,
+ 'info_dict':
+ {
+ 'id': "812251",
+ 'title': "Tha Tour: Part 2 (Official Mixtape)"
+ }
+ },
+ # Album playlist ripped from fakeshoredrive with no metadata
+ {
+ "url": "http://www.audiomack.com/album/fakeshoredrive/ppp-pistol-p-project",
+ "playlist_count": 10
}
+ ]
+
+ def _real_extract(self, url):
+ # URLs end with [uploader name]/[uploader title]
+ # this title is whatever the user types in, and is rarely
+ # the proper song title. Real metadata is in the api response
+ album_url_tag = self._match_id(url)
+ result = {"_type": "playlist", "entries": []}
+ # There is no one endpoint for album metadata - instead it is included/repeated in each song's metadata
+ # Therefore we don't know how many songs the album has and must infi-loop until failure
+ track_no = 0
+ while True:
+ # Get song's metadata
+ api_response = self._download_json("http://www.audiomack.com/api/music/url/album/%s/%d?extended=1&_=%d"
+ % (album_url_tag, track_no, time.time()), album_url_tag)
+
+ # Total failure, only occurs when url is totally wrong
+ # Won't happen in middle of valid playlist (next case)
+ if "url" not in api_response or "error" in api_response:
+ raise ExtractorError("Invalid url for track %d of album url %s" % (track_no, url))
+ # URL is good but song id doesn't exist - usually means end of playlist
+ elif not api_response["url"]:
+ break
+ else:
+ # Pull out the album metadata and add to result (if it exists)
+ for resultkey, apikey in [("id", "album_id"), ("title", "album_title")]:
+ if apikey in api_response and resultkey not in result:
+ result[resultkey] = api_response[apikey]
+ result["entries"].append(AudiomackIE.create_song_dictionary(api_response, album_url_tag, track_no))
+ track_no += 1
+ return result