diff options
Diffstat (limited to 'youtube_dl/extractor/mixcloud.py')
| -rw-r--r-- | youtube_dl/extractor/mixcloud.py | 72 | 
1 files changed, 15 insertions, 57 deletions
| diff --git a/youtube_dl/extractor/mixcloud.py b/youtube_dl/extractor/mixcloud.py index 84f291558..425a4ccf1 100644 --- a/youtube_dl/extractor/mixcloud.py +++ b/youtube_dl/extractor/mixcloud.py @@ -1,7 +1,6 @@  from __future__ import unicode_literals  import re -import itertools  from .common import InfoExtractor  from ..compat import ( @@ -46,20 +45,16 @@ class MixcloudIE(InfoExtractor):          },      }] -    def _get_url(self, track_id, template_url, server_number): -        boundaries = (1, 30) -        for nr in server_numbers(server_number, boundaries): -            url = template_url % nr -            try: -                # We only want to know if the request succeed -                # don't download the whole file -                self._request_webpage( -                    HEADRequest(url), track_id, -                    'Checking URL %d/%d ...' % (nr, boundaries[-1])) -                return url -            except ExtractorError: -                pass -        return None +    def _check_url(self, url, track_id, ext): +        try: +            # We only want to know if the request succeed +            # don't download the whole file +            self._request_webpage( +                HEADRequest(url), track_id, +                'Trying %s URL' % ext) +            return True +        except ExtractorError: +            return False      def _real_extract(self, url):          mobj = re.match(self._VALID_URL, url) @@ -72,15 +67,10 @@ class MixcloudIE(InfoExtractor):          preview_url = self._search_regex(              r'\s(?:data-preview-url|m-preview)="([^"]+)"', webpage, 'preview url')          song_url = preview_url.replace('/previews/', '/c/originals/') -        server_number = int(self._search_regex(r'stream(\d+)', song_url, 'server number')) -        template_url = re.sub(r'(stream\d*)', 'stream%d', song_url) -        final_song_url = self._get_url(track_id, template_url, server_number) -        if final_song_url is None: -            self.to_screen('Trying with m4a extension') -            template_url = template_url.replace('.mp3', '.m4a').replace('originals/', 'm4a/64/') -            final_song_url = self._get_url(track_id, template_url, server_number) -        if final_song_url is None: -            raise ExtractorError('Unable to extract track url') +        if not self._check_url(song_url, track_id, 'mp3'): +            song_url = song_url.replace('.mp3', '.m4a').replace('originals/', 'm4a/64/') +            if not self._check_url(song_url, track_id, 'm4a'): +                raise ExtractorError('Unable to extract track url')          PREFIX = (              r'm-play-on-spacebar[^>]+' @@ -107,7 +97,7 @@ class MixcloudIE(InfoExtractor):          return {              'id': track_id,              'title': title, -            'url': final_song_url, +            'url': song_url,              'description': description,              'thumbnail': thumbnail,              'uploader': uploader, @@ -115,35 +105,3 @@ class MixcloudIE(InfoExtractor):              'view_count': view_count,              'like_count': like_count,          } - - -def server_numbers(first, boundaries): -    """ Server numbers to try in descending order of probable availability. -    Starting from first (i.e. the number of the server hosting the preview file) -    and going further and further up to the higher boundary and down to the -    lower one in an alternating fashion. Namely: - -        server_numbers(2, (1, 5)) - -        # Where the preview server is 2, min number is 1 and max is 5. -        # Yields: 2, 3, 1, 4, 5 - -    Why not random numbers or increasing sequences? Since from what I've seen, -    full length files seem to be hosted on servers whose number is closer to -    that of the preview; to be confirmed. -    """ -    zip_longest = getattr(itertools, 'zip_longest', None) -    if zip_longest is None: -        # python 2.x -        zip_longest = itertools.izip_longest - -    if len(boundaries) != 2: -        raise ValueError("boundaries should be a two-element tuple") -    min, max = boundaries -    highs = range(first + 1, max + 1) -    lows = range(first - 1, min - 1, -1) -    rest = filter( -        None, itertools.chain.from_iterable(zip_longest(highs, lows))) -    yield first -    for n in rest: -        yield n | 
