diff options
Diffstat (limited to 'youtube_dl/extractor/mixcloud.py')
| -rw-r--r-- | youtube_dl/extractor/mixcloud.py | 52 | 
1 files changed, 39 insertions, 13 deletions
| diff --git a/youtube_dl/extractor/mixcloud.py b/youtube_dl/extractor/mixcloud.py index c4bd53fe7..5f64e7bd0 100644 --- a/youtube_dl/extractor/mixcloud.py +++ b/youtube_dl/extractor/mixcloud.py @@ -4,9 +4,10 @@ import re  from .common import InfoExtractor  from ..utils import ( -    unified_strdate,      compat_urllib_parse,      ExtractorError, +    int_or_none, +    parse_iso8601,  ) @@ -24,6 +25,10 @@ class MixcloudIE(InfoExtractor):              'uploader': 'Daniel Holbach',              'uploader_id': 'dholbach',              'upload_date': '20111115', +            'timestamp': 1321359578, +            'thumbnail': 're:https?://.*\.jpg', +            'view_count': int, +            'like_count': int,          },      } @@ -51,10 +56,6 @@ class MixcloudIE(InfoExtractor):          webpage = self._download_webpage(url, track_id) -        api_url = 'http://api.mixcloud.com/%s/%s/' % (uploader, cloudcast_name) -        info = self._download_json( -            api_url, track_id, 'Downloading cloudcast info') -          preview_url = self._search_regex(              r'\s(?:data-preview-url|m-preview)="(.+?)"', webpage, 'preview url')          song_url = preview_url.replace('/previews/', '/c/originals/') @@ -65,16 +66,41 @@ class MixcloudIE(InfoExtractor):              template_url = template_url.replace('.mp3', '.m4a').replace('originals/', 'm4a/64/')              final_song_url = self._get_url(template_url)          if final_song_url is None: -            raise ExtractorError(u'Unable to extract track url') +            raise ExtractorError('Unable to extract track url') + +        PREFIX = ( +            r'<div class="cloudcast-play-button-container"' +            r'(?:\s+[a-zA-Z0-9-]+(?:="[^"]+")?)*?\s+') +        title = self._html_search_regex( +            PREFIX + r'm-title="([^"]+)"', webpage, 'title') +        thumbnail = self._proto_relative_url(self._html_search_regex( +            PREFIX + r'm-thumbnail-url="([^"]+)"', webpage, 'thumbnail', +            fatal=False)) +        uploader = self._html_search_regex( +            PREFIX + r'm-owner-name="([^"]+)"', +            webpage, 'uploader', fatal=False) +        uploader_id = self._search_regex( +            r'\s+"profile": "([^"]+)",', webpage, 'uploader id', fatal=False) +        description = self._og_search_description(webpage) +        like_count = int_or_none(self._search_regex( +            r'<meta itemprop="interactionCount" content="UserLikes:([0-9]+)"', +            webpage, 'like count', fatal=False)) +        view_count = int_or_none(self._search_regex( +            r'<meta itemprop="interactionCount" content="UserPlays:([0-9]+)"', +            webpage, 'play count', fatal=False)) +        timestamp = parse_iso8601(self._search_regex( +            r'<time itemprop="dateCreated" datetime="([^"]+)">', +            webpage, 'upload date'))          return {              'id': track_id, -            'title': info['name'], +            'title': title,              'url': final_song_url, -            'description': info.get('description'), -            'thumbnail': info['pictures'].get('extra_large'), -            'uploader': info['user']['name'], -            'uploader_id': info['user']['username'], -            'upload_date': unified_strdate(info['created_time']), -            'view_count': info['play_count'], +            'description': description, +            'thumbnail': thumbnail, +            'uploader': uploader, +            'uploader_id': uploader_id, +            'timestamp': timestamp, +            'view_count': view_count, +            'like_count': like_count,          } | 
