aboutsummaryrefslogtreecommitdiff
path: root/youtube_dl/extractor
diff options
context:
space:
mode:
authorPhilipp Hagemeister <phihag@phihag.de>2014-05-13 09:42:38 +0200
committerPhilipp Hagemeister <phihag@phihag.de>2014-05-13 09:42:38 +0200
commit57c7411f4615cda595f06fa461719b4336d808f4 (patch)
tree900d34b7d2e237b490fd517ed7ea29d897d5f942 /youtube_dl/extractor
parentd0a122348e56c68dad1f34d67103ab3a353f602a (diff)
[mixcloud] Shed API dependency (#2904)
Diffstat (limited to 'youtube_dl/extractor')
-rw-r--r--youtube_dl/extractor/common.py10
-rw-r--r--youtube_dl/extractor/mixcloud.py52
2 files changed, 49 insertions, 13 deletions
diff --git a/youtube_dl/extractor/common.py b/youtube_dl/extractor/common.py
index cef8c523c..11b31db88 100644
--- a/youtube_dl/extractor/common.py
+++ b/youtube_dl/extractor/common.py
@@ -556,6 +556,16 @@ class InfoExtractor(object):
if self._downloader.params.get('prefer_insecure', False)
else 'https:')
+ def _proto_relative_url(self, url, scheme=None):
+ if url is None:
+ return url
+ if url.startswith('//'):
+ if scheme is None:
+ scheme = self.http_scheme()
+ return scheme + url
+ else:
+ return url
+
class SearchInfoExtractor(InfoExtractor):
"""
diff --git a/youtube_dl/extractor/mixcloud.py b/youtube_dl/extractor/mixcloud.py
index c4bd53fe7..5f64e7bd0 100644
--- a/youtube_dl/extractor/mixcloud.py
+++ b/youtube_dl/extractor/mixcloud.py
@@ -4,9 +4,10 @@ import re
from .common import InfoExtractor
from ..utils import (
- unified_strdate,
compat_urllib_parse,
ExtractorError,
+ int_or_none,
+ parse_iso8601,
)
@@ -24,6 +25,10 @@ class MixcloudIE(InfoExtractor):
'uploader': 'Daniel Holbach',
'uploader_id': 'dholbach',
'upload_date': '20111115',
+ 'timestamp': 1321359578,
+ 'thumbnail': 're:https?://.*\.jpg',
+ 'view_count': int,
+ 'like_count': int,
},
}
@@ -51,10 +56,6 @@ class MixcloudIE(InfoExtractor):
webpage = self._download_webpage(url, track_id)
- api_url = 'http://api.mixcloud.com/%s/%s/' % (uploader, cloudcast_name)
- info = self._download_json(
- api_url, track_id, 'Downloading cloudcast info')
-
preview_url = self._search_regex(
r'\s(?:data-preview-url|m-preview)="(.+?)"', webpage, 'preview url')
song_url = preview_url.replace('/previews/', '/c/originals/')
@@ -65,16 +66,41 @@ class MixcloudIE(InfoExtractor):
template_url = template_url.replace('.mp3', '.m4a').replace('originals/', 'm4a/64/')
final_song_url = self._get_url(template_url)
if final_song_url is None:
- raise ExtractorError(u'Unable to extract track url')
+ raise ExtractorError('Unable to extract track url')
+
+ PREFIX = (
+ r'<div class="cloudcast-play-button-container"'
+ r'(?:\s+[a-zA-Z0-9-]+(?:="[^"]+")?)*?\s+')
+ title = self._html_search_regex(
+ PREFIX + r'm-title="([^"]+)"', webpage, 'title')
+ thumbnail = self._proto_relative_url(self._html_search_regex(
+ PREFIX + r'm-thumbnail-url="([^"]+)"', webpage, 'thumbnail',
+ fatal=False))
+ uploader = self._html_search_regex(
+ PREFIX + r'm-owner-name="([^"]+)"',
+ webpage, 'uploader', fatal=False)
+ uploader_id = self._search_regex(
+ r'\s+"profile": "([^"]+)",', webpage, 'uploader id', fatal=False)
+ description = self._og_search_description(webpage)
+ like_count = int_or_none(self._search_regex(
+ r'<meta itemprop="interactionCount" content="UserLikes:([0-9]+)"',
+ webpage, 'like count', fatal=False))
+ view_count = int_or_none(self._search_regex(
+ r'<meta itemprop="interactionCount" content="UserPlays:([0-9]+)"',
+ webpage, 'play count', fatal=False))
+ timestamp = parse_iso8601(self._search_regex(
+ r'<time itemprop="dateCreated" datetime="([^"]+)">',
+ webpage, 'upload date'))
return {
'id': track_id,
- 'title': info['name'],
+ 'title': title,
'url': final_song_url,
- 'description': info.get('description'),
- 'thumbnail': info['pictures'].get('extra_large'),
- 'uploader': info['user']['name'],
- 'uploader_id': info['user']['username'],
- 'upload_date': unified_strdate(info['created_time']),
- 'view_count': info['play_count'],
+ 'description': description,
+ 'thumbnail': thumbnail,
+ 'uploader': uploader,
+ 'uploader_id': uploader_id,
+ 'timestamp': timestamp,
+ 'view_count': view_count,
+ 'like_count': like_count,
}