diff options
| author | Jaime Marquínez Ferrándiz <jaime.marquinez.ferrandiz@gmail.com> | 2013-07-24 14:39:21 +0200 | 
|---|---|---|
| committer | Jaime Marquínez Ferrándiz <jaime.marquinez.ferrandiz@gmail.com> | 2013-07-24 14:39:21 +0200 | 
| commit | eb6a41ba0f5dbb836d5b48b9e38f406c3c46c0ec (patch) | |
| tree | b64dcfa23e9b1dce9408c24372b8651a60369ee1 | |
| parent | 7d2392691c18973c86820b796cbfe61cfad9ff11 (diff) | |
ExfmIE: extract Soundcloud songs using SoundcloudIE
Now SouncloudIE accepts api urls.
| -rw-r--r-- | youtube_dl/extractor/exfm.py | 44 | ||||
| -rw-r--r-- | youtube_dl/extractor/soundcloud.py | 39 | 
2 files changed, 54 insertions, 29 deletions
| diff --git a/youtube_dl/extractor/exfm.py b/youtube_dl/extractor/exfm.py index fe1582d1a..3443f19c5 100644 --- a/youtube_dl/extractor/exfm.py +++ b/youtube_dl/extractor/exfm.py @@ -8,17 +8,30 @@ class ExfmIE(InfoExtractor):      IE_NAME = u'exfm'      IE_DESC = u'ex.fm'      _VALID_URL = r'(?:http://)?(?:www\.)?ex\.fm/song/([^/]+)' -    _SOUNDCLOUD_URL_ = r'(?:http://)?(?:www\.)?api\.soundcloud.com/tracks/([^/]+)/stream' -    _TEST = { -        u'url': u'http://ex.fm/song/1bgtzg', -        u'file': u'1bgtzg.mp3', -        u'md5': u'8a7967a3fef10e59a1d6f86240fd41cf', -        u'info_dict': { -            u"title": u"We Can't Stop", -            u"uploader": u"Miley Cyrus", -            u'thumbnail': u'http://i1.sndcdn.com/artworks-000049666230-w9i7ef-t500x500.jpg?9d68d37' -        } -    } +    _SOUNDCLOUD_URL = r'(?:http://)?(?:www\.)?api\.soundcloud.com/tracks/([^/]+)/stream' +    _TESTS = [ +        { +            u'url': u'http://ex.fm/song/1bgtzg', +            u'file': u'95223130.mp3', +            u'md5': u'8a7967a3fef10e59a1d6f86240fd41cf', +            u'info_dict': { +                u"title": u"We Can't Stop - Miley Cyrus", +                u"uploader": u"Miley Cyrus", +                u'upload_date': u'20130603', +                u'description': u'Download "We Can\'t Stop" \r\niTunes: http://smarturl.it/WeCantStop?IQid=SC\r\nAmazon: http://smarturl.it/WeCantStopAMZ?IQid=SC', +            }, +            u'note': u'Soundcloud song', +        }, +        { +            u'url': u'http://ex.fm/song/wddt8', +            u'file': u'wddt8.mp3', +            u'md5': u'966bd70741ac5b8570d8e45bfaed3643', +            u'info_dict': { +                u'title': u'Safe and Sound', +                u'uploader': u'Capital Cities', +            }, +        }, +    ]      def _real_extract(self, url):          mobj = re.match(self._VALID_URL, url) @@ -26,11 +39,10 @@ class ExfmIE(InfoExtractor):          info_url = "http://ex.fm/api/v3/song/%s" %(song_id)          webpage = self._download_webpage(info_url, song_id)          info = json.loads(webpage) -        song_url = re.match(self._SOUNDCLOUD_URL_,info['song']['url']) -        if song_url is not None: -        	song_url = song_url.group() + "?client_id=b45b1aa10f1ac2941910a7f0d10f8e28" -        else: -        	song_url = info['song']['url'] +        song_url = info['song']['url'] +        if re.match(self._SOUNDCLOUD_URL, song_url) is not None: +            self.to_screen('Soundcloud song detected') +            return self.url_result(song_url.replace('/stream',''), 'Soundcloud')          return [{              'id':          song_id,              'url':         song_url, diff --git a/youtube_dl/extractor/soundcloud.py b/youtube_dl/extractor/soundcloud.py index 54ff8db12..7c9f1c6b6 100644 --- a/youtube_dl/extractor/soundcloud.py +++ b/youtube_dl/extractor/soundcloud.py @@ -19,7 +19,11 @@ class SoundcloudIE(InfoExtractor):         of the stream token and uid       """ -    _VALID_URL = r'^(?:https?://)?(?:www\.)?soundcloud\.com/([\w\d-]+)/([\w\d-]+)/?(?:[?].*)?$' +    _VALID_URL = r'''^(?:https?://)? +                    (?:(?:(?:www\.)?soundcloud\.com/([\w\d-]+)/([\w\d-]+)/?(?:[?].*)?$) +                       |(?:api\.soundcloud\.com/tracks/(?P<track_id>\d+)) +                    ) +                    '''      IE_NAME = u'soundcloud'      _TEST = {          u'url': u'http://soundcloud.com/ethmusic/lostin-powers-she-so-heavy', @@ -35,6 +39,10 @@ class SoundcloudIE(InfoExtractor):      _CLIENT_ID = 'b45b1aa10f1ac2941910a7f0d10f8e28' +    @classmethod +    def suitable(cls, url): +        return re.match(cls._VALID_URL, url, flags=re.VERBOSE) is not None +      def report_resolve(self, video_id):          """Report information extraction."""          self.to_screen(u'%s: Resolving id' % video_id) @@ -63,21 +71,26 @@ class SoundcloudIE(InfoExtractor):          }      def _real_extract(self, url): -        mobj = re.match(self._VALID_URL, url) +        mobj = re.match(self._VALID_URL, url, flags=re.VERBOSE)          if mobj is None:              raise ExtractorError(u'Invalid URL: %s' % url) -        # extract uploader (which is in the url) -        uploader = mobj.group(1) -        # extract simple title (uploader + slug of song title) -        slug_title =  mobj.group(2) -        full_title = '%s/%s' % (uploader, slug_title) - -        self.report_resolve(full_title) - -        url = 'http://soundcloud.com/%s/%s' % (uploader, slug_title) -        resolv_url = self._resolv_url(url) -        info_json = self._download_webpage(resolv_url, full_title, u'Downloading info JSON') +        track_id = mobj.group('track_id') +        if track_id is not None: +            info_json_url = 'http://api.soundcloud.com/tracks/' + track_id + '.json?client_id=' + self._CLIENT_ID +            full_title = track_id +        else: +            # extract uploader (which is in the url) +            uploader = mobj.group(1) +            # extract simple title (uploader + slug of song title) +            slug_title =  mobj.group(2) +            full_title = '%s/%s' % (uploader, slug_title) +     +            self.report_resolve(full_title) +     +            url = 'http://soundcloud.com/%s/%s' % (uploader, slug_title) +            info_json_url = self._resolv_url(url) +        info_json = self._download_webpage(info_json_url, full_title, u'Downloading info JSON')          info = json.loads(info_json)          return self._extract_info_dict(info, full_title) | 
