diff options
-rw-r--r-- | test/helper.py | 2 | ||||
-rw-r--r-- | test/test_playlists.py | 8 | ||||
-rw-r--r-- | youtube_dl/extractor/__init__.py | 7 | ||||
-rw-r--r-- | youtube_dl/extractor/common.py | 19 | ||||
-rw-r--r-- | youtube_dl/extractor/mixcloud.py | 52 | ||||
-rw-r--r-- | youtube_dl/extractor/videott.py | 58 | ||||
-rw-r--r-- | youtube_dl/extractor/vine.py | 31 | ||||
-rw-r--r-- | youtube_dl/extractor/wdr.py | 28 | ||||
-rw-r--r-- | youtube_dl/extractor/youtube.py | 2 | ||||
-rw-r--r-- | youtube_dl/version.py | 2 |
10 files changed, 188 insertions, 21 deletions
diff --git a/test/helper.py b/test/helper.py index d5e0a603e..230d2bd67 100644 --- a/test/helper.py +++ b/test/helper.py @@ -107,7 +107,7 @@ def expect_info_dict(self, expected_dict, got_dict): elif isinstance(expected, type): got = got_dict.get(info_field) self.assertTrue(isinstance(got, expected), - u'Expected type %r, but got value %r of type %r' % (expected, got, type(got))) + u'Expected type %r for field %s, but got value %r of type %r' % (expected, info_field, got, type(got))) else: if isinstance(expected, compat_str) and expected.startswith('md5:'): got = 'md5:' + md5(got_dict.get(info_field)) diff --git a/test/test_playlists.py b/test/test_playlists.py index fe6274e60..cc871698a 100644 --- a/test/test_playlists.py +++ b/test/test_playlists.py @@ -23,6 +23,7 @@ from youtube_dl.extractor import ( VimeoUserIE, VimeoAlbumIE, VimeoGroupsIE, + VineUserIE, UstreamChannelIE, SoundcloudSetIE, SoundcloudUserIE, @@ -102,6 +103,13 @@ class TestPlaylists(unittest.TestCase): self.assertEqual(result['title'], 'Rolex Awards for Enterprise') self.assertTrue(len(result['entries']) > 72) + def test_vine_user(self): + dl = FakeYDL() + ie = VineUserIE(dl) + result = ie.extract('https://vine.co/Visa') + self.assertIsPlaylist(result) + self.assertTrue(len(result['entries']) >= 50) + def test_ustream_channel(self): dl = FakeYDL() ie = UstreamChannelIE(dl) diff --git a/youtube_dl/extractor/__init__.py b/youtube_dl/extractor/__init__.py index 9529077c7..cfe17a268 100644 --- a/youtube_dl/extractor/__init__.py +++ b/youtube_dl/extractor/__init__.py @@ -295,6 +295,7 @@ from .videodetective import VideoDetectiveIE from .videolecturesnet import VideoLecturesNetIE from .videofyme import VideofyMeIE from .videopremium import VideoPremiumIE +from .videott import VideoTtIE from .videoweed import VideoWeedIE from .vimeo import ( VimeoIE, @@ -305,7 +306,10 @@ from .vimeo import ( VimeoReviewIE, VimeoWatchLaterIE, ) -from .vine import VineIE +from .vine import ( + VineIE, + VineUserIE, +) from .viki import VikiIE from .vk import VKIE from .vube import VubeIE @@ -314,6 +318,7 @@ from .washingtonpost import WashingtonPostIE from .wat import WatIE from .wdr import ( WDRIE, + WDRMobileIE, WDRMausIE, ) from .weibo import WeiboIE diff --git a/youtube_dl/extractor/common.py b/youtube_dl/extractor/common.py index 10b0cbe69..11b31db88 100644 --- a/youtube_dl/extractor/common.py +++ b/youtube_dl/extractor/common.py @@ -242,10 +242,11 @@ class InfoExtractor(object): url = url_or_request.get_full_url() except AttributeError: url = url_or_request - if len(url) > 200: - h = u'___' + hashlib.md5(url.encode('utf-8')).hexdigest() - url = url[:200 - len(h)] + h - raw_filename = ('%s_%s.dump' % (video_id, url)) + basen = video_id + '_' + url + if len(basen) > 240: + h = u'___' + hashlib.md5(basen.encode('utf-8')).hexdigest() + basen = basen[:240 - len(h)] + h + raw_filename = basen + '.dump' filename = sanitize_filename(raw_filename, restricted=True) self.to_screen(u'Saving request to ' + filename) with open(filename, 'wb') as outf: @@ -555,6 +556,16 @@ class InfoExtractor(object): if self._downloader.params.get('prefer_insecure', False) else 'https:') + def _proto_relative_url(self, url, scheme=None): + if url is None: + return url + if url.startswith('//'): + if scheme is None: + scheme = self.http_scheme() + return scheme + url + else: + return url + class SearchInfoExtractor(InfoExtractor): """ diff --git a/youtube_dl/extractor/mixcloud.py b/youtube_dl/extractor/mixcloud.py index c4bd53fe7..5f64e7bd0 100644 --- a/youtube_dl/extractor/mixcloud.py +++ b/youtube_dl/extractor/mixcloud.py @@ -4,9 +4,10 @@ import re from .common import InfoExtractor from ..utils import ( - unified_strdate, compat_urllib_parse, ExtractorError, + int_or_none, + parse_iso8601, ) @@ -24,6 +25,10 @@ class MixcloudIE(InfoExtractor): 'uploader': 'Daniel Holbach', 'uploader_id': 'dholbach', 'upload_date': '20111115', + 'timestamp': 1321359578, + 'thumbnail': 're:https?://.*\.jpg', + 'view_count': int, + 'like_count': int, }, } @@ -51,10 +56,6 @@ class MixcloudIE(InfoExtractor): webpage = self._download_webpage(url, track_id) - api_url = 'http://api.mixcloud.com/%s/%s/' % (uploader, cloudcast_name) - info = self._download_json( - api_url, track_id, 'Downloading cloudcast info') - preview_url = self._search_regex( r'\s(?:data-preview-url|m-preview)="(.+?)"', webpage, 'preview url') song_url = preview_url.replace('/previews/', '/c/originals/') @@ -65,16 +66,41 @@ class MixcloudIE(InfoExtractor): template_url = template_url.replace('.mp3', '.m4a').replace('originals/', 'm4a/64/') final_song_url = self._get_url(template_url) if final_song_url is None: - raise ExtractorError(u'Unable to extract track url') + raise ExtractorError('Unable to extract track url') + + PREFIX = ( + r'<div class="cloudcast-play-button-container"' + r'(?:\s+[a-zA-Z0-9-]+(?:="[^"]+")?)*?\s+') + title = self._html_search_regex( + PREFIX + r'm-title="([^"]+)"', webpage, 'title') + thumbnail = self._proto_relative_url(self._html_search_regex( + PREFIX + r'm-thumbnail-url="([^"]+)"', webpage, 'thumbnail', + fatal=False)) + uploader = self._html_search_regex( + PREFIX + r'm-owner-name="([^"]+)"', + webpage, 'uploader', fatal=False) + uploader_id = self._search_regex( + r'\s+"profile": "([^"]+)",', webpage, 'uploader id', fatal=False) + description = self._og_search_description(webpage) + like_count = int_or_none(self._search_regex( + r'<meta itemprop="interactionCount" content="UserLikes:([0-9]+)"', + webpage, 'like count', fatal=False)) + view_count = int_or_none(self._search_regex( + r'<meta itemprop="interactionCount" content="UserPlays:([0-9]+)"', + webpage, 'play count', fatal=False)) + timestamp = parse_iso8601(self._search_regex( + r'<time itemprop="dateCreated" datetime="([^"]+)">', + webpage, 'upload date')) return { 'id': track_id, - 'title': info['name'], + 'title': title, 'url': final_song_url, - 'description': info.get('description'), - 'thumbnail': info['pictures'].get('extra_large'), - 'uploader': info['user']['name'], - 'uploader_id': info['user']['username'], - 'upload_date': unified_strdate(info['created_time']), - 'view_count': info['play_count'], + 'description': description, + 'thumbnail': thumbnail, + 'uploader': uploader, + 'uploader_id': uploader_id, + 'timestamp': timestamp, + 'view_count': view_count, + 'like_count': like_count, } diff --git a/youtube_dl/extractor/videott.py b/youtube_dl/extractor/videott.py new file mode 100644 index 000000000..b5034b02f --- /dev/null +++ b/youtube_dl/extractor/videott.py @@ -0,0 +1,58 @@ +from __future__ import unicode_literals + +import re +import base64 + +from .common import InfoExtractor +from ..utils import unified_strdate + + +class VideoTtIE(InfoExtractor): + ID_NAME = 'video.tt' + IE_DESC = 'video.tt - Your True Tube' + _VALID_URL = r'http://(?:www\.)?video\.tt/(?:video/|watch_video\.php\?v=)(?P<id>[\da-zA-Z]{9})' + + _TEST = { + 'url': 'http://www.video.tt/watch_video.php?v=amd5YujV8', + 'md5': 'b13aa9e2f267effb5d1094443dff65ba', + 'info_dict': { + 'id': 'amd5YujV8', + 'ext': 'flv', + 'title': 'Motivational video Change your mind in just 2.50 mins', + 'description': '', + 'upload_date': '20130827', + 'uploader': 'joseph313', + } + } + + def _real_extract(self, url): + mobj = re.match(self._VALID_URL, url) + video_id = mobj.group('id') + + settings = self._download_json( + 'http://www.video.tt/player_control/settings.php?v=%s' % video_id, video_id, + 'Downloading video JSON')['settings'] + + video = settings['video_details']['video'] + + formats = [ + { + 'url': base64.b64decode(res['u']).decode('utf-8'), + 'ext': 'flv', + 'format_id': res['l'], + } for res in settings['res'] if res['u'] + ] + + return { + 'id': video_id, + 'title': video['title'], + 'description': video['description'], + 'thumbnail': settings['config']['thumbnail'], + 'upload_date': unified_strdate(video['added']), + 'uploader': video['owner'], + 'view_count': int(video['view_count']), + 'comment_count': int(video['comment_count']), + 'like_count': int(video['liked']), + 'dislike_count': int(video['disliked']), + 'formats': formats, + }
\ No newline at end of file diff --git a/youtube_dl/extractor/vine.py b/youtube_dl/extractor/vine.py index 5136ec466..076c87119 100644 --- a/youtube_dl/extractor/vine.py +++ b/youtube_dl/extractor/vine.py @@ -2,6 +2,7 @@ from __future__ import unicode_literals import re import json +import itertools from .common import InfoExtractor from ..utils import unified_strdate @@ -58,3 +59,33 @@ class VineIE(InfoExtractor): 'repost_count': data['reposts']['count'], 'formats': formats, } + + +class VineUserIE(InfoExtractor): + IE_NAME = 'vine:user' + _VALID_URL = r'(?:https?://)?vine\.co/(?P<user>[^/]+)/?(\?.*)?$' + _VINE_BASE_URL = "https://vine.co/" + + def _real_extract(self, url): + mobj = re.match(self._VALID_URL, url) + user = mobj.group('user') + + profile_url = "%sapi/users/profiles/vanity/%s" % ( + self._VINE_BASE_URL, user) + profile_data = self._download_json( + profile_url, user, note='Downloading user profile data') + + user_id = profile_data['data']['userId'] + timeline_data = [] + for pagenum in itertools.count(1): + timeline_url = "%sapi/timelines/users/%s?page=%s" % ( + self._VINE_BASE_URL, user_id, pagenum) + timeline_page = self._download_json( + timeline_url, user, note='Downloading page %d' % pagenum) + timeline_data.extend(timeline_page['data']['records']) + if timeline_page['data']['nextPage'] is None: + break + + entries = [ + self.url_result(e['permalinkUrl'], 'Vine') for e in timeline_data] + return self.playlist_result(entries, user) diff --git a/youtube_dl/extractor/wdr.py b/youtube_dl/extractor/wdr.py index 63691aa67..feeb44b45 100644 --- a/youtube_dl/extractor/wdr.py +++ b/youtube_dl/extractor/wdr.py @@ -115,6 +115,34 @@ class WDRIE(InfoExtractor): } +class WDRMobileIE(InfoExtractor): + _VALID_URL = r'''(?x) + https?://mobile-ondemand\.wdr\.de/ + .*?/fsk(?P<age_limit>[0-9]+) + /[0-9]+/[0-9]+/ + (?P<id>[0-9]+)_(?P<title>[0-9]+)''' + IE_NAME = 'wdr:mobile' + _TEST = { + 'url': 'http://mobile-ondemand.wdr.de/CMS2010/mdb/ondemand/weltweit/fsk0/42/421735/421735_4283021.mp4', + 'info_dict': { + 'title': '4283021', + 'id': '421735', + 'age_limit': 0, + }, + '_skip': 'Will be depublicized shortly' + } + + def _real_extract(self, url): + mobj = re.match(self._VALID_URL, url) + return { + 'id': mobj.group('id'), + 'title': mobj.group('title'), + 'age_limit': int(mobj.group('age_limit')), + 'url': url, + 'user_agent': 'mobile', + } + + class WDRMausIE(InfoExtractor): _VALID_URL = 'http://(?:www\.)?wdrmaus\.de/(?:[^/]+/){,2}(?P<id>[^/?#]+)(?:/index\.php5|(?<!index)\.php5|/(?:$|[?#]))' IE_DESC = 'Sendung mit der Maus' diff --git a/youtube_dl/extractor/youtube.py b/youtube_dl/extractor/youtube.py index 67845349e..1f3aa4322 100644 --- a/youtube_dl/extractor/youtube.py +++ b/youtube_dl/extractor/youtube.py @@ -1437,7 +1437,7 @@ class YoutubePlaylistIE(YoutubeBaseInfoExtractor): more_widget_html = content_html = page # Check if the playlist exists or is private - if re.search(r'<div class="yt-alert-message">[^<]*?The playlist does not exist[^<]*?</div>', page) is not None: + if re.search(r'<div class="yt-alert-message">[^<]*?(The|This) playlist (does not exist|is private)[^<]*?</div>', page) is not None: raise ExtractorError( u'The playlist doesn\'t exist or is private, use --username or ' '--netrc to access it.', diff --git a/youtube_dl/version.py b/youtube_dl/version.py index d6d606913..89a2f72dc 100644 --- a/youtube_dl/version.py +++ b/youtube_dl/version.py @@ -1,2 +1,2 @@ -__version__ = '2014.05.05' +__version__ = '2014.05.12' |