diff options
| author | xavier <xavier.beynon@gmail.com> | 2014-10-23 23:54:59 -0500 | 
|---|---|---|
| committer | xavier <xavier.beynon@gmail.com> | 2014-10-23 23:54:59 -0500 | 
| commit | 9e9bc793f3abddc4824cfcb13f569163fb0a4ba7 (patch) | |
| tree | 9188d0a967ee65e8c30df94d58e98882d61f2bae | |
| parent | 5c565ac9e7fed4f6a7123332cbaf35fc44a5a921 (diff) | |
Finished audiomack extractor
| -rw-r--r-- | youtube_dl/extractor/audiomack.py | 82 | 
1 files changed, 53 insertions, 29 deletions
| diff --git a/youtube_dl/extractor/audiomack.py b/youtube_dl/extractor/audiomack.py index c5214f401..2ececa998 100644 --- a/youtube_dl/extractor/audiomack.py +++ b/youtube_dl/extractor/audiomack.py @@ -1,43 +1,67 @@ +# Xavier Beynon 2014  # coding: utf-8  from __future__ import unicode_literals  from .common import InfoExtractor +from .soundcloud import SoundcloudIE  import datetime  import time -import urllib.request -import json  class AudiomackIE(InfoExtractor):      _VALID_URL = r'https?://(?:www\.)?audiomack\.com/song/(?P<id>[\w/-]+)' -    _TEST = { -        'url': 'https://www.audiomack.com/song/crewneckkramer/story-i-tell', -        'info_dict': { -            'id': 'story-i-tell', -            'ext': 'mp3', -            'title': 'story-i-tell' +    IE_NAME = 'audiomack' +    _TESTS = [ +        #hosted on audiomack +        { +            'url': 'http://www.audiomack.com/song/roosh-williams/extraordinary', +            'file': 'Roosh Williams - Extraordinary.mp3', +            'info_dict': +            { +                'ext': 'mp3', +                'title': 'Roosh Williams - Extraordinary' +            } +        }, +        #hosted on soundcloud via audiomack +        { +            'url': 'http://www.audiomack.com/song/xclusiveszone/take-kare', +            'file': '172419696.mp3', +            'info_dict': +            { +                'ext': 'mp3', +                'title': 'Young Thug ft Lil Wayne - Take Kare', +                "upload_date": "20141016", +                "description": "New track produced by London On Da Track called “Take Kare\"\n\nhttp://instagram.com/theyoungthugworld\nhttps://www.facebook.com/ThuggerThuggerCashMoney\n", +                "uploader": "Young Thug World" +            }          } -    } +    ]      def _real_extract(self, url): -        # TODO more code goes here, for example ... -        #webpage = self._download_webpage(url, video_id) -        #title = self._html_search_regex(r'<h1>(.*?)</h1>', webpage, 'title') -	 -        assert("/song/" in url) -        songurl = url[url.index("/song/")+5:] -        title = songurl[songurl.rindex("/")+1:] -        video_id = title -        t = int(time.mktime(datetime.datetime.now().timetuple())) -        s = "http://www.audiomack.com/api/music/url/song"+songurl+"?_="+str(t) -        f = urllib.request.urlopen(s) -        j = f.read(1000).decode("utf-8") -        data = json.loads(j) +        #id is what follows /song/ in url, usually the uploader name + title +        id = url[url.index("/song/")+5:] -        return { -            'id': video_id, -            'title': title, -            'url' : data["url"], -            'ext' : 'mp3' -            # TODO more properties (see youtube_dl/extractor/common.py) -        }    +        #Call the api, which gives us a json doc with the real url inside +        rightnow = int(time.mktime(datetime.datetime.now().timetuple())) +        apiresponse = self._download_json("http://www.audiomack.com/api/music/url/song"+id+"?_="+str(rightnow), id) +        if not url in apiresponse: +            raise Exception("Unable to deduce api url of song") +        realurl = apiresponse["url"] + +        #Audiomack wraps a lot of soundcloud tracks in their branded wrapper +        # - if so, pass the work off to the soundcloud extractor +        if SoundcloudIE.suitable(realurl): +            sc = SoundcloudIE(downloader=self._downloader) +            return sc._real_extract(realurl) +        else: +            #Pull out metadata +            page = self._download_webpage(url, id) +            artist = self._html_search_regex(r'<span class="artist">(.*)</span>', page, "artist") +            songtitle = self._html_search_regex(r'<h1 class="profile-title song-title"><span class="artist">.*</span>(.*)</h1>', page, "title") +            title = artist+" - "+songtitle +            return { +                'id': title,  # ignore id, which is not useful in song name +                'title': title, +                'url': realurl, +                'ext': 'mp3' +            } | 
