diff options
Diffstat (limited to 'youtube_dl/extractor/bandcamp.py')
| -rw-r--r-- | youtube_dl/extractor/bandcamp.py | 71 | 
1 files changed, 69 insertions, 2 deletions
diff --git a/youtube_dl/extractor/bandcamp.py b/youtube_dl/extractor/bandcamp.py index 129a20f44..359d4174b 100644 --- a/youtube_dl/extractor/bandcamp.py +++ b/youtube_dl/extractor/bandcamp.py @@ -3,13 +3,16 @@ import re  from .common import InfoExtractor  from ..utils import ( +    compat_str, +    compat_urlparse,      ExtractorError,  )  class BandcampIE(InfoExtractor): +    IE_NAME = u'Bandcamp'      _VALID_URL = r'http://.*?\.bandcamp\.com/track/(?P<title>.*)' -    _TEST = { +    _TESTS = [{          u'url': u'http://youtube-dl.bandcamp.com/track/youtube-dl-test-song',          u'file': u'1812978515.mp3',          u'md5': u'cdeb30cdae1921719a3cbcab696ef53c', @@ -17,7 +20,29 @@ class BandcampIE(InfoExtractor):              u"title": u"youtube-dl test song \"'/\\\u00e4\u21ad"          },          u'skip': u'There is a limit of 200 free downloads / month for the test song' -    } +    }, { +        u'url': u'http://blazo.bandcamp.com/album/jazz-format-mixtape-vol-1', +        u'playlist': [ +            { +                u'file': u'1353101989.mp3', +                u'md5': u'39bc1eded3476e927c724321ddf116cf', +                u'info_dict': { +                    u'title': u'Intro', +                } +            }, +            { +                u'file': u'38097443.mp3', +                u'md5': u'1a2c32e2691474643e912cc6cd4bffaa', +                u'info_dict': { +                    u'title': u'Kero One - Keep It Alive (Blazo remix)', +                } +            }, +        ], +        u'params': { +            u'playlistend': 2 +        }, +        u'skip': u'Bancamp imposes download limits. See test_playlists:test_bandcamp_album for the playlist test' +    }]      def _real_extract(self, url):          mobj = re.match(self._VALID_URL, url) @@ -26,6 +51,26 @@ class BandcampIE(InfoExtractor):          # We get the link to the free download page          m_download = re.search(r'freeDownloadPage: "(.*?)"', webpage)          if m_download is None: +            m_trackinfo = re.search(r'trackinfo: (.+),\s*?\n', webpage) +        if m_trackinfo: +            json_code = m_trackinfo.group(1) +            data = json.loads(json_code) + +            entries = [] +            for d in data: +                formats = [{ +                    'format_id': 'format_id', +                    'url': format_url, +                    'ext': format_id.partition('-')[0] +                } for format_id, format_url in sorted(d['file'].items())] +                entries.append({ +                    'id': compat_str(d['id']), +                    'title': d['title'], +                    'formats': formats, +                }) + +            return self.playlist_result(entries, title, title) +        else:              raise ExtractorError(u'No free songs found')          download_link = m_download.group(1) @@ -61,3 +106,25 @@ class BandcampIE(InfoExtractor):                        }          return [track_info] + + +class BandcampAlbumIE(InfoExtractor): +    IE_NAME = u'Bandcamp:album' +    _VALID_URL = r'http://.*?\.bandcamp\.com/album/(?P<title>.*)' + +    def _real_extract(self, url): +        mobj = re.match(self._VALID_URL, url) +        title = mobj.group('title') +        webpage = self._download_webpage(url, title) +        tracks_paths = re.findall(r'<a href="(.*?)" itemprop="url">', webpage) +        if not tracks_paths: +            raise ExtractorError(u'The page doesn\'t contain any track') +        entries = [ +            self.url_result(compat_urlparse.urljoin(url, t_path), ie=BandcampIE.ie_key()) +            for t_path in tracks_paths] +        title = self._search_regex(r'album_title : "(.*?)"', webpage, u'title') +        return { +            '_type': 'playlist', +            'title': title, +            'entries': entries, +        }  | 
