diff options
-rw-r--r-- | test/test_all_urls.py | 3 | ||||
-rw-r--r-- | youtube_dl/YoutubeDL.py | 2 | ||||
-rw-r--r-- | youtube_dl/extractor/__init__.py | 1 | ||||
-rw-r--r-- | youtube_dl/extractor/c56.py | 29 | ||||
-rw-r--r-- | youtube_dl/extractor/cnet.py | 70 | ||||
-rw-r--r-- | youtube_dl/extractor/comedycentral.py | 2 | ||||
-rw-r--r-- | youtube_dl/extractor/wimp.py | 27 | ||||
-rw-r--r-- | youtube_dl/extractor/youtube.py | 2 | ||||
-rw-r--r-- | youtube_dl/utils.py | 8 | ||||
-rw-r--r-- | youtube_dl/version.py | 2 |
10 files changed, 124 insertions, 22 deletions
diff --git a/test/test_all_urls.py b/test/test_all_urls.py index ed041ffda..bea8c41fb 100644 --- a/test/test_all_urls.py +++ b/test/test_all_urls.py @@ -153,6 +153,9 @@ class TestAllURLsMatching(unittest.TestCase): self.assertMatch( 'http://thecolbertreport.cc.com/videos/gh6urb/neil-degrasse-tyson-pt--1?xrs=eml_col_031114', ['ComedyCentralShows']) + self.assertMatch( + 'http://thedailyshow.cc.com/guests/michael-lewis/3efna8/exclusive---michael-lewis-extended-interview-pt--3', + ['ComedyCentralShows']) if __name__ == '__main__': unittest.main() diff --git a/youtube_dl/YoutubeDL.py b/youtube_dl/YoutubeDL.py index 430773edd..5794fdbe9 100644 --- a/youtube_dl/YoutubeDL.py +++ b/youtube_dl/YoutubeDL.py @@ -876,7 +876,7 @@ class YoutubeDL(object): try: dn = os.path.dirname(encodeFilename(filename)) - if dn != '' and not os.path.exists(dn): + if dn and not os.path.exists(dn): os.makedirs(dn) except (OSError, IOError) as err: self.report_error('unable to create directory ' + compat_str(err)) diff --git a/youtube_dl/extractor/__init__.py b/youtube_dl/extractor/__init__.py index 7c3587e47..c9c400b61 100644 --- a/youtube_dl/extractor/__init__.py +++ b/youtube_dl/extractor/__init__.py @@ -40,6 +40,7 @@ from .clipfish import ClipfishIE from .cliphunter import CliphunterIE from .clipsyndicate import ClipsyndicateIE from .cmt import CMTIE +from .cnet import CNETIE from .cnn import ( CNNIE, CNNBlogsIE, diff --git a/youtube_dl/extractor/c56.py b/youtube_dl/extractor/c56.py index 690bc7c25..cb96c3876 100644 --- a/youtube_dl/extractor/c56.py +++ b/youtube_dl/extractor/c56.py @@ -2,39 +2,46 @@ from __future__ import unicode_literals import re -import json from .common import InfoExtractor class C56IE(InfoExtractor): - _VALID_URL = r'https?://((www|player)\.)?56\.com/(.+?/)?(v_|(play_album.+-))(?P<textid>.+?)\.(html|swf)' + _VALID_URL = r'https?://(?:(?:www|player)\.)?56\.com/(?:.+?/)?(?:v_|(?:play_album.+-))(?P<textid>.+?)\.(?:html|swf)' IE_NAME = '56.com' _TEST = { 'url': 'http://www.56.com/u39/v_OTM0NDA3MTY.html', - 'file': '93440716.flv', 'md5': 'e59995ac63d0457783ea05f93f12a866', 'info_dict': { + 'id': '93440716', + 'ext': 'flv', 'title': '网事知多少 第32期:车怒', + 'duration': 283.813, }, } def _real_extract(self, url): mobj = re.match(self._VALID_URL, url, flags=re.VERBOSE) text_id = mobj.group('textid') - info_page = self._download_webpage('http://vxml.56.com/json/%s/' % text_id, - text_id, 'Downloading video info') - info = json.loads(info_page)['info'] - formats = [{ - 'format_id': f['type'], - 'filesize': int(f['filesize']), - 'url': f['url'] - } for f in info['rfiles']] + + page = self._download_json( + 'http://vxml.56.com/json/%s/' % text_id, text_id, 'Downloading video info') + + info = page['info'] + + formats = [ + { + 'format_id': f['type'], + 'filesize': int(f['filesize']), + 'url': f['url'] + } for f in info['rfiles'] + ] self._sort_formats(formats) return { 'id': info['vid'], 'title': info['Subject'], + 'duration': int(info['duration']) / 1000.0, 'formats': formats, 'thumbnail': info.get('bimg') or info.get('img'), } diff --git a/youtube_dl/extractor/cnet.py b/youtube_dl/extractor/cnet.py new file mode 100644 index 000000000..6a2f5ce36 --- /dev/null +++ b/youtube_dl/extractor/cnet.py @@ -0,0 +1,70 @@ +# coding: utf-8 +from __future__ import unicode_literals + +import json +import re + +from .common import InfoExtractor +from ..utils import ( + int_or_none, +) + + +class CNETIE(InfoExtractor): + _VALID_URL = r'https?://(?:www\.)?cnet\.com/videos/(?P<id>[^/]+)/' + _TEST = { + 'url': 'http://www.cnet.com/videos/hands-on-with-microsofts-windows-8-1-update/', + 'md5': '041233212a0d06b179c87cbcca1577b8', + 'info_dict': { + 'id': '56f4ea68-bd21-4852-b08c-4de5b8354c60', + 'ext': 'mp4', + 'title': 'Hands-on with Microsoft Windows 8.1 Update', + 'description': 'The new update to the Windows 8 OS brings improved performance for mouse and keyboard users.', + 'thumbnail': 're:^http://.*/flmswindows8.jpg$', + 'uploader_id': 'sarah.mitroff@cbsinteractive.com', + 'uploader': 'Sarah Mitroff', + } + } + + def _real_extract(self, url): + mobj = re.match(self._VALID_URL, url) + display_id = mobj.group('id') + + webpage = self._download_webpage(url, display_id) + data_json = self._html_search_regex( + r"<div class=\"cnetVideoPlayer\" data-cnet-video-options='([^']+)'", + webpage, 'data json') + data = json.loads(data_json) + vdata = data['video'] + + video_id = vdata['id'] + title = vdata['headline'] + description = vdata.get('dek') + thumbnail = vdata.get('image', {}).get('path') + author = vdata.get('author') + if author: + uploader = '%s %s' % (author['firstName'], author['lastName']) + uploader_id = author.get('email') + else: + uploader = None + uploader_id = None + + formats = [{ + 'format_id': '%s-%s-%s' % ( + f['type'], f['format'], + int_or_none(f.get('bitrate'), 1000, default='')), + 'url': f['uri'], + 'tbr': int_or_none(f.get('bitrate'), 1000), + } for f in vdata['files']['data']] + self._sort_formats(formats) + + return { + 'id': video_id, + 'display_id': display_id, + 'title': title, + 'formats': formats, + 'description': description, + 'uploader': uploader, + 'uploader_id': uploader_id, + 'thumbnail': thumbnail, + } diff --git a/youtube_dl/extractor/comedycentral.py b/youtube_dl/extractor/comedycentral.py index 38577243c..0c99887a2 100644 --- a/youtube_dl/extractor/comedycentral.py +++ b/youtube_dl/extractor/comedycentral.py @@ -41,7 +41,7 @@ class ComedyCentralShowsIE(InfoExtractor): _VALID_URL = r'''(?x)^(:(?P<shortname>tds|thedailyshow|cr|colbert|colbertnation|colbertreport) |https?://(:www\.)? (?P<showname>thedailyshow|thecolbertreport)\.(?:cc\.)?com/ - (full-episodes/(?P<episode>.*)| + (full-episodes/(?:[0-9a-z]{6}/)?(?P<episode>.*)| (?P<clip> (?:(?:guests/[^/]+|videos)/[^/]+/(?P<videotitle>[^/?#]+)) |(the-colbert-report-(videos|collections)/(?P<clipID>[0-9]+)/[^/]*/(?P<cntitle>.*?)) diff --git a/youtube_dl/extractor/wimp.py b/youtube_dl/extractor/wimp.py index 79fd53e0c..c27dda944 100644 --- a/youtube_dl/extractor/wimp.py +++ b/youtube_dl/extractor/wimp.py @@ -3,11 +3,12 @@ from __future__ import unicode_literals import re from .common import InfoExtractor +from .youtube import YoutubeIE class WimpIE(InfoExtractor): _VALID_URL = r'http://(?:www\.)?wimp\.com/([^/]+)/' - _TEST = { + _TESTS = [{ 'url': 'http://www.wimp.com/maruexhausted/', 'md5': 'f1acced123ecb28d9bb79f2479f2b6a1', 'info_dict': { @@ -16,7 +17,20 @@ class WimpIE(InfoExtractor): 'title': 'Maru is exhausted.', 'description': 'md5:57e099e857c0a4ea312542b684a869b8', } - } + }, { + # youtube video + 'url': 'http://www.wimp.com/clowncar/', + 'info_dict': { + 'id': 'cG4CEr2aiSg', + 'ext': 'mp4', + 'title': 'Basset hound clown car...incredible!', + 'description': 'md5:8d228485e0719898c017203f900b3a35', + 'uploader': 'Gretchen Hoey', + 'uploader_id': 'gretchenandjeff1', + 'upload_date': '20140303', + }, + 'add_ie': ['Youtube'], + }] def _real_extract(self, url): mobj = re.match(self._VALID_URL, url) @@ -24,6 +38,13 @@ class WimpIE(InfoExtractor): webpage = self._download_webpage(url, video_id) video_url = self._search_regex( r's1\.addVariable\("file",\s*"([^"]+)"\);', webpage, 'video URL') + if YoutubeIE.suitable(video_url): + self.to_screen('Found YouTube video') + return { + '_type': 'url', + 'url': video_url, + 'ie_key': YoutubeIE.ie_key(), + } return { 'id': video_id, @@ -31,4 +52,4 @@ class WimpIE(InfoExtractor): 'title': self._og_search_title(webpage), 'thumbnail': self._og_search_thumbnail(webpage), 'description': self._og_search_description(webpage), - }
\ No newline at end of file + } diff --git a/youtube_dl/extractor/youtube.py b/youtube_dl/extractor/youtube.py index b6a1884b5..e422d86e4 100644 --- a/youtube_dl/extractor/youtube.py +++ b/youtube_dl/extractor/youtube.py @@ -1753,7 +1753,7 @@ class YoutubeFeedsInfoExtractor(YoutubeBaseInfoExtractor): return self.playlist_result(feed_entries, playlist_title=self._PLAYLIST_TITLE) class YoutubeSubscriptionsIE(YoutubeFeedsInfoExtractor): - IE_DESC = u'YouTube.com subscriptions feed, "ytsubs" keyword(requires authentication)' + IE_DESC = u'YouTube.com subscriptions feed, "ytsubs" keyword (requires authentication)' _VALID_URL = r'https?://www\.youtube\.com/feed/subscriptions|:ytsubs(?:criptions)?' _FEED_NAME = 'subscriptions' _PLAYLIST_TITLE = u'Youtube Subscriptions' diff --git a/youtube_dl/utils.py b/youtube_dl/utils.py index e54ea9d61..5f1f664c8 100644 --- a/youtube_dl/utils.py +++ b/youtube_dl/utils.py @@ -1176,12 +1176,12 @@ class HEADRequest(compat_urllib_request.Request): return "HEAD" -def int_or_none(v, scale=1): - return v if v is None else (int(v) // scale) +def int_or_none(v, scale=1, default=None): + return default if v is None else (int(v) // scale) -def float_or_none(v, scale=1): - return v if v is None else (float(v) / scale) +def float_or_none(v, scale=1, default=None): + return default if v is None else (float(v) / scale) def parse_duration(s): diff --git a/youtube_dl/version.py b/youtube_dl/version.py index ea24d76e6..ac34122d9 100644 --- a/youtube_dl/version.py +++ b/youtube_dl/version.py @@ -1,2 +1,2 @@ -__version__ = '2014.04.03.1' +__version__ = '2014.04.03.3' |