diff options
Diffstat (limited to 'youtube_dl/extractor/deezer.py')
| -rw-r--r-- | youtube_dl/extractor/deezer.py | 89 | 
1 files changed, 89 insertions, 0 deletions
| diff --git a/youtube_dl/extractor/deezer.py b/youtube_dl/extractor/deezer.py new file mode 100644 index 000000000..c3205ff5f --- /dev/null +++ b/youtube_dl/extractor/deezer.py @@ -0,0 +1,89 @@ +from __future__ import unicode_literals + +import json +import re + +from .common import InfoExtractor +from ..utils import ( +    ExtractorError, +    int_or_none, +    orderedSet, +) + + +class DeezerPlaylistIE(InfoExtractor): +    _VALID_URL = r'https?://(?:www\.)?deezer\.com/playlist/(?P<id>[0-9]+)' +    _TEST = { +        'url': 'http://www.deezer.com/playlist/176747451', +        'info_dict': { +            'id': '176747451', +            'title': 'Best!', +            'uploader': 'Anonymous', +            'thumbnail': 're:^https?://cdn-images.deezer.com/images/cover/.*\.jpg$', +        }, +        'playlist_count': 30, +        'skip': 'Only available in .de', +    } + +    def _real_extract(self, url): +        if 'test' not in self._downloader.params: +            self._downloader.report_warning('For now, this extractor only supports the 30 second previews. Patches welcome!') + +        mobj = re.match(self._VALID_URL, url) +        playlist_id = mobj.group('id') + +        webpage = self._download_webpage(url, playlist_id) +        geoblocking_msg = self._html_search_regex( +            r'<p class="soon-txt">(.*?)</p>', webpage, 'geoblocking message', +            default=None) +        if geoblocking_msg is not None: +            raise ExtractorError( +                'Deezer said: %s' % geoblocking_msg, expected=True) + +        data_json = self._search_regex( +            r'naboo\.display\(\'[^\']+\',\s*(.*?)\);\n', webpage, 'data JSON') +        data = json.loads(data_json) + +        playlist_title = data.get('DATA', {}).get('TITLE') +        playlist_uploader = data.get('DATA', {}).get('PARENT_USERNAME') +        playlist_thumbnail = self._search_regex( +            r'<img id="naboo_playlist_image".*?src="([^"]+)"', webpage, +            'playlist thumbnail') + +        preview_pattern = self._search_regex( +            r"var SOUND_PREVIEW_GATEWAY\s*=\s*'([^']+)';", webpage, +            'preview URL pattern', fatal=False) +        entries = [] +        for s in data['SONGS']['data']: +            puid = s['MD5_ORIGIN'] +            preview_video_url = preview_pattern.\ +                replace('{0}', puid[0]).\ +                replace('{1}', puid).\ +                replace('{2}', s['MEDIA_VERSION']) +            formats = [{ +                'format_id': 'preview', +                'url': preview_video_url, +                'preference': -100,  # Only the first 30 seconds +                'ext': 'mp3', +            }] +            self._sort_formats(formats) +            artists = ', '.join( +                orderedSet(a['ART_NAME'] for a in s['ARTISTS'])) +            entries.append({ +                'id': s['SNG_ID'], +                'duration': int_or_none(s.get('DURATION')), +                'title': '%s - %s' % (artists, s['SNG_TITLE']), +                'uploader': s['ART_NAME'], +                'uploader_id': s['ART_ID'], +                'age_limit': 16 if s.get('EXPLICIT_LYRICS') == '1' else 0, +                'formats': formats, +            }) + +        return { +            '_type': 'playlist', +            'id': playlist_id, +            'title': playlist_title, +            'uploader': playlist_uploader, +            'thumbnail': playlist_thumbnail, +            'entries': entries, +        } | 
