diff options
Diffstat (limited to 'youtube_dl/extractor/mixcloud.py')
-rw-r--r-- | youtube_dl/extractor/mixcloud.py | 48 |
1 files changed, 38 insertions, 10 deletions
diff --git a/youtube_dl/extractor/mixcloud.py b/youtube_dl/extractor/mixcloud.py index 0efbe660a..798968ae3 100644 --- a/youtube_dl/extractor/mixcloud.py +++ b/youtube_dl/extractor/mixcloud.py @@ -9,6 +9,7 @@ from .common import InfoExtractor from ..compat import ( compat_chr, compat_ord, + compat_str, compat_urllib_parse_unquote, compat_urlparse, ) @@ -53,16 +54,27 @@ class MixcloudIE(InfoExtractor): 'only_matching': True, }] - # See https://www.mixcloud.com/media/js2/www_js_2.9e23256562c080482435196ca3975ab5.js - @staticmethod - def _decrypt_play_info(play_info): - KEY = 'pleasedontdownloadourmusictheartistswontgetpaid' + _keys = [ + 'return { requestAnimationFrame: function(callback) { callback(); }, innerHeight: 500 };', + 'pleasedontdownloadourmusictheartistswontgetpaid', + 'window.addEventListener = window.addEventListener || function() {};', + '(function() { return new Date().toLocaleDateString(); })()' + ] + _current_key = None + # See https://www.mixcloud.com/media/js2/www_js_2.9e23256562c080482435196ca3975ab5.js + def _decrypt_play_info(self, play_info, video_id): play_info = base64.b64decode(play_info.encode('ascii')) - - return ''.join([ - compat_chr(compat_ord(ch) ^ compat_ord(KEY[idx % len(KEY)])) - for idx, ch in enumerate(play_info)]) + for num, key in enumerate(self._keys, start=1): + try: + return self._parse_json( + ''.join([ + compat_chr(compat_ord(ch) ^ compat_ord(key[idx % len(key)])) + for idx, ch in enumerate(play_info)]), + video_id) + except ExtractorError: + if num == len(self._keys): + raise def _real_extract(self, url): mobj = re.match(self._VALID_URL, url) @@ -72,14 +84,30 @@ class MixcloudIE(InfoExtractor): webpage = self._download_webpage(url, track_id) + if not self._current_key: + js_url = self._search_regex( + r'<script[^>]+\bsrc=["\"](https://(?:www\.)?mixcloud\.com/media/js2/www_js_4\.[^>]+\.js)', + webpage, 'js url', default=None) + if js_url: + js = self._download_webpage(js_url, track_id, fatal=False) + if js: + KEY_RE_TEMPLATE = r'player\s*:\s*{.*?\b%s\s*:\s*(["\'])(?P<key>(?:(?!\1).)+)\1' + for key_name in ('value', 'key_value'): + key = self._search_regex( + KEY_RE_TEMPLATE % key_name, js, 'key', + default=None, group='key') + if key and isinstance(key, compat_str): + self._keys.insert(0, key) + self._current_key = key + message = self._html_search_regex( r'(?s)<div[^>]+class="global-message cloudcast-disabled-notice-light"[^>]*>(.+?)<(?:a|/div)', webpage, 'error message', default=None) encrypted_play_info = self._search_regex( r'm-play-info="([^"]+)"', webpage, 'play info') - play_info = self._parse_json( - self._decrypt_play_info(encrypted_play_info), track_id) + + play_info = self._decrypt_play_info(encrypted_play_info, track_id) if message and 'stream_url' not in play_info: raise ExtractorError('%s said: %s' % (self.IE_NAME, message), expected=True) |