diff options
Diffstat (limited to 'youtube_dl/extractor/audiomack.py')
| -rw-r--r-- | youtube_dl/extractor/audiomack.py | 142 | 
1 files changed, 106 insertions, 36 deletions
| diff --git a/youtube_dl/extractor/audiomack.py b/youtube_dl/extractor/audiomack.py index 6232d2cd0..8bfe50214 100644 --- a/youtube_dl/extractor/audiomack.py +++ b/youtube_dl/extractor/audiomack.py @@ -1,69 +1,139 @@  # coding: utf-8  from __future__ import unicode_literals +import itertools +import time +  from .common import InfoExtractor  from .soundcloud import SoundcloudIE -from ..utils import ExtractorError - -import time +from ..utils import ( +    ExtractorError, +    url_basename, +)  class AudiomackIE(InfoExtractor):      _VALID_URL = r'https?://(?:www\.)?audiomack\.com/song/(?P<id>[\w/-]+)'      IE_NAME = 'audiomack'      _TESTS = [ -        #hosted on audiomack +        # hosted on audiomack          {              'url': 'http://www.audiomack.com/song/roosh-williams/extraordinary',              'info_dict':              { -                'id' : 'roosh-williams/extraordinary', +                'id': '310086',                  'ext': 'mp3', -                'title': 'Roosh Williams - Extraordinary' +                'uploader': 'Roosh Williams', +                'title': 'Extraordinary'              }          }, -        #hosted on soundcloud via audiomack +        # audiomack wrapper around soundcloud song          { +            'add_ie': ['Soundcloud'],              'url': 'http://www.audiomack.com/song/xclusiveszone/take-kare', -            'file': '172419696.mp3', -            'info_dict': -            { +            'info_dict': { +                'id': '172419696',                  'ext': 'mp3', +                'description': 'md5:1fc3272ed7a635cce5be1568c2822997',                  'title': 'Young Thug ft Lil Wayne - Take Kare', -                "upload_date": "20141016", -                "description": "New track produced by London On Da Track called “Take Kare\"\n\nhttp://instagram.com/theyoungthugworld\nhttps://www.facebook.com/ThuggerThuggerCashMoney\n", -                "uploader": "Young Thug World" +                'uploader': 'Young Thug World', +                'upload_date': '20141016',              } -        } +        },      ]      def _real_extract(self, url): -        video_id = self._match_id(url) +        # URLs end with [uploader name]/[uploader title] +        # this title is whatever the user types in, and is rarely +        # the proper song title.  Real metadata is in the api response +        album_url_tag = self._match_id(url) +        # Request the extended version of the api for extra fields like artist and title          api_response = self._download_json( -            "http://www.audiomack.com/api/music/url/song/%s?_=%d" % ( -                video_id, time.time()), -            video_id) - -        if "url" not in api_response: -            raise ExtractorError("Unable to deduce api url of song") -        realurl = api_response["url"] +            'http://www.audiomack.com/api/music/url/song/%s?extended=1&_=%d' % ( +                album_url_tag, time.time()), +            album_url_tag) -        #Audiomack wraps a lot of soundcloud tracks in their branded wrapper -        # - if so, pass the work off to the soundcloud extractor -        if SoundcloudIE.suitable(realurl): -            return {'_type': 'url', 'url': realurl, 'ie_key': 'Soundcloud'} +        # API is inconsistent with errors +        if 'url' not in api_response or not api_response['url'] or 'error' in api_response: +            raise ExtractorError('Invalid url %s', url) -        webpage = self._download_webpage(url, video_id) -        artist = self._html_search_regex( -            r'<span class="artist">(.*?)</span>', webpage, "artist") -        songtitle = self._html_search_regex( -            r'<h1 class="profile-title song-title"><span class="artist">.*?</span>(.*?)</h1>', -            webpage, "title") -        title = artist + " - " + songtitle +        # Audiomack wraps a lot of soundcloud tracks in their branded wrapper +        # if so, pass the work off to the soundcloud extractor +        if SoundcloudIE.suitable(api_response['url']): +            return {'_type': 'url', 'url': api_response['url'], 'ie_key': 'Soundcloud'}          return { -            'id': video_id, -            'title': title, -            'url': realurl, +            'id': api_response.get('id', album_url_tag), +            'uploader': api_response.get('artist'), +            'title': api_response.get('title'), +            'url': api_response['url'], +        } + + +class AudiomackAlbumIE(InfoExtractor): +    _VALID_URL = r'https?://(?:www\.)?audiomack\.com/album/(?P<id>[\w/-]+)' +    IE_NAME = 'audiomack:album' +    _TESTS = [ +        # Standard album playlist +        { +            'url': 'http://www.audiomack.com/album/flytunezcom/tha-tour-part-2-mixtape', +            'playlist_count': 15, +            'info_dict': +            { +                'id': '812251', +                'title': 'Tha Tour: Part 2 (Official Mixtape)' +            } +        }, +        # Album playlist ripped from fakeshoredrive with no metadata +        { +            'url': 'http://www.audiomack.com/album/fakeshoredrive/ppp-pistol-p-project', +            'playlist': [{ +                'info_dict': { +                    'title': '9.-heaven-or-hell-chimaca-ft-zuse-prod-by-dj-fu', +                    'id': '9.-heaven-or-hell-chimaca-ft-zuse-prod-by-dj-fu', +                    'ext': 'mp3', +                } +            }], +            'params': { +                'playliststart': 8, +                'playlistend': 8, +            }          } +    ] + +    def _real_extract(self, url): +        # URLs end with [uploader name]/[uploader title] +        # this title is whatever the user types in, and is rarely +        # the proper song title.  Real metadata is in the api response +        album_url_tag = self._match_id(url) +        result = {'_type': 'playlist', 'entries': []} +        # There is no one endpoint for album metadata - instead it is included/repeated in each song's metadata +        # Therefore we don't know how many songs the album has and must infi-loop until failure +        for track_no in itertools.count(): +            # Get song's metadata +            api_response = self._download_json( +                'http://www.audiomack.com/api/music/url/album/%s/%d?extended=1&_=%d' +                % (album_url_tag, track_no, time.time()), album_url_tag, +                note='Querying song information (%d)' % (track_no + 1)) + +            # Total failure, only occurs when url is totally wrong +            # Won't happen in middle of valid playlist (next case) +            if 'url' not in api_response or 'error' in api_response: +                raise ExtractorError('Invalid url for track %d of album url %s' % (track_no, url)) +            # URL is good but song id doesn't exist - usually means end of playlist +            elif not api_response['url']: +                break +            else: +                # Pull out the album metadata and add to result (if it exists) +                for resultkey, apikey in [('id', 'album_id'), ('title', 'album_title')]: +                    if apikey in api_response and resultkey not in result: +                        result[resultkey] = api_response[apikey] +                song_id = url_basename(api_response['url']).rpartition('.')[0] +                result['entries'].append({ +                    'id': api_response.get('id', song_id), +                    'uploader': api_response.get('artist'), +                    'title': api_response.get('title', song_id), +                    'url': api_response['url'], +                }) +        return result | 
