diff options
author | Devin J. Pohly <djpohly@gmail.com> | 2015-03-12 15:43:13 -0400 |
---|---|---|
committer | Devin J. Pohly <djpohly@gmail.com> | 2015-03-12 16:03:37 -0400 |
commit | 11984c7467184100ca4a61ae939a8c260480f42c (patch) | |
tree | 66d122eaf41e9c203dbc6e13e170a4107c74a9a7 | |
parent | 3946864c8a752c068b188126013ddb747b53385d (diff) |
[BeatportPro] Add new extractor
This extractor is for Beatport's 2-minute, low-quality track previews
only. To obtain an entire track, you obviously have to purchase and
download it normally through the Beatport store!
Possible future improvements:
- Playlists for albums or other track-list pages
- User login to play from My Beatport, Hold Bin, or Cart
-rw-r--r-- | youtube_dl/extractor/__init__.py | 1 | ||||
-rw-r--r-- | youtube_dl/extractor/beatportpro.py | 101 |
2 files changed, 102 insertions, 0 deletions
diff --git a/youtube_dl/extractor/__init__.py b/youtube_dl/extractor/__init__.py index 7f9523c2b..ac765fdb8 100644 --- a/youtube_dl/extractor/__init__.py +++ b/youtube_dl/extractor/__init__.py @@ -37,6 +37,7 @@ from .bandcamp import BandcampIE, BandcampAlbumIE from .bbccouk import BBCCoUkIE from .beeg import BeegIE from .behindkink import BehindKinkIE +from .beatportpro import BeatportProIE from .bet import BetIE from .bild import BildIE from .bilibili import BiliBiliIE diff --git a/youtube_dl/extractor/beatportpro.py b/youtube_dl/extractor/beatportpro.py new file mode 100644 index 000000000..21048b732 --- /dev/null +++ b/youtube_dl/extractor/beatportpro.py @@ -0,0 +1,101 @@ +# coding: utf-8 +from __future__ import unicode_literals + +from .common import InfoExtractor + +import re +import json + + +class BeatportProIE(InfoExtractor): + _VALID_URL = r'https?://pro\.beatport\.com/track/.*/(?P<id>[0-9]+)' + _TESTS = [{ + 'url': 'https://pro.beatport.com/track/synesthesia-original-mix/5379371', + 'md5': 'b3c34d8639a2f6a7f734382358478887', + 'info_dict': { + 'id': 5379371, + 'display-id': 'synesthesia-original-mix', + 'ext': 'mp4', + 'title': 'Froxic - Synesthesia (Original Mix)', + }, + }, { + 'url': 'https://pro.beatport.com/track/love-and-war-original-mix/3756896', + 'md5': 'e44c3025dfa38c6577fbaeb43da43514', + 'info_dict': { + 'id': 3756896, + 'display-id': 'love-and-war-original-mix', + 'ext': 'mp3', + 'title': 'Wolfgang Gartner - Love & War (Original Mix)', + }, + }, { + 'url': 'https://pro.beatport.com/track/birds-original-mix/4991738', + 'md5': 'a1fd8e8046de3950fd039304c186c05f', + 'info_dict': { + 'id': 4991738, + 'display-id': 'birds-original-mix', + 'ext': 'mp4', + 'title': "Tos, Middle Milk, Mumblin' Johnsson - Birds (Original Mix)", + } + }] + + def _real_extract(self, url): + track_id = self._match_id(url) + webpage = self._download_webpage(url, track_id) + + # Extract "Playables" JSON information from the page + playables = self._search_regex(r'window\.Playables = ({.*?});', webpage, + 'playables info', flags=re.DOTALL) + playables = json.loads(playables) + + # Find first track with matching ID (always the first one listed?) + track = next(filter(lambda t: t['id'] == int(track_id), playables['tracks'])) + + # Construct title from artist(s), track name, and mix name + title = ', '.join((a['name'] for a in track['artists'])) + ' - ' + track['name'] + if track['mix']: + title += ' (' + track['mix'] + ')' + + # Get format information + formats = [] + for ext, info in track['preview'].items(): + if info['url'] is None: + continue + fmt = { + 'url': info['url'], + 'ext': ext, + 'format_id': ext, + 'vcodec': 'none', + } + if ext == 'mp3': + fmt['preference'] = 0 + fmt['acodec'] = 'mp3' + fmt['abr'] = 96 + fmt['asr'] = 44100 + elif ext == 'mp4': + fmt['preference'] = 1 + fmt['acodec'] = 'aac' + fmt['abr'] = 96 + fmt['asr'] = 44100 + formats += [fmt] + formats.sort(key=lambda f: f['preference']) + + # Get album art as thumbnails + imgs = [] + for name, info in track['images'].items(): + if name == 'dynamic' or info['url'] is None: + continue + img = { + 'id': name, + 'url': info['url'], + 'height': info['height'], + 'width': info['width'], + } + imgs += [img] + + return { + 'id': track['id'], + 'display-id': track['slug'], + 'title': title, + 'formats': formats, + 'thumbnails': imgs, + } |