aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--test/test_all_urls.py3
-rw-r--r--youtube_dl/YoutubeDL.py2
-rw-r--r--youtube_dl/extractor/__init__.py1
-rw-r--r--youtube_dl/extractor/c56.py29
-rw-r--r--youtube_dl/extractor/cnet.py70
-rw-r--r--youtube_dl/extractor/comedycentral.py2
-rw-r--r--youtube_dl/extractor/wimp.py27
-rw-r--r--youtube_dl/extractor/youtube.py2
-rw-r--r--youtube_dl/utils.py8
-rw-r--r--youtube_dl/version.py2
10 files changed, 124 insertions, 22 deletions
diff --git a/test/test_all_urls.py b/test/test_all_urls.py
index ed041ffda..bea8c41fb 100644
--- a/test/test_all_urls.py
+++ b/test/test_all_urls.py
@@ -153,6 +153,9 @@ class TestAllURLsMatching(unittest.TestCase):
self.assertMatch(
'http://thecolbertreport.cc.com/videos/gh6urb/neil-degrasse-tyson-pt--1?xrs=eml_col_031114',
['ComedyCentralShows'])
+ self.assertMatch(
+ 'http://thedailyshow.cc.com/guests/michael-lewis/3efna8/exclusive---michael-lewis-extended-interview-pt--3',
+ ['ComedyCentralShows'])
if __name__ == '__main__':
unittest.main()
diff --git a/youtube_dl/YoutubeDL.py b/youtube_dl/YoutubeDL.py
index 430773edd..5794fdbe9 100644
--- a/youtube_dl/YoutubeDL.py
+++ b/youtube_dl/YoutubeDL.py
@@ -876,7 +876,7 @@ class YoutubeDL(object):
try:
dn = os.path.dirname(encodeFilename(filename))
- if dn != '' and not os.path.exists(dn):
+ if dn and not os.path.exists(dn):
os.makedirs(dn)
except (OSError, IOError) as err:
self.report_error('unable to create directory ' + compat_str(err))
diff --git a/youtube_dl/extractor/__init__.py b/youtube_dl/extractor/__init__.py
index 7c3587e47..c9c400b61 100644
--- a/youtube_dl/extractor/__init__.py
+++ b/youtube_dl/extractor/__init__.py
@@ -40,6 +40,7 @@ from .clipfish import ClipfishIE
from .cliphunter import CliphunterIE
from .clipsyndicate import ClipsyndicateIE
from .cmt import CMTIE
+from .cnet import CNETIE
from .cnn import (
CNNIE,
CNNBlogsIE,
diff --git a/youtube_dl/extractor/c56.py b/youtube_dl/extractor/c56.py
index 690bc7c25..cb96c3876 100644
--- a/youtube_dl/extractor/c56.py
+++ b/youtube_dl/extractor/c56.py
@@ -2,39 +2,46 @@
from __future__ import unicode_literals
import re
-import json
from .common import InfoExtractor
class C56IE(InfoExtractor):
- _VALID_URL = r'https?://((www|player)\.)?56\.com/(.+?/)?(v_|(play_album.+-))(?P<textid>.+?)\.(html|swf)'
+ _VALID_URL = r'https?://(?:(?:www|player)\.)?56\.com/(?:.+?/)?(?:v_|(?:play_album.+-))(?P<textid>.+?)\.(?:html|swf)'
IE_NAME = '56.com'
_TEST = {
'url': 'http://www.56.com/u39/v_OTM0NDA3MTY.html',
- 'file': '93440716.flv',
'md5': 'e59995ac63d0457783ea05f93f12a866',
'info_dict': {
+ 'id': '93440716',
+ 'ext': 'flv',
'title': '网事知多少 第32期:车怒',
+ 'duration': 283.813,
},
}
def _real_extract(self, url):
mobj = re.match(self._VALID_URL, url, flags=re.VERBOSE)
text_id = mobj.group('textid')
- info_page = self._download_webpage('http://vxml.56.com/json/%s/' % text_id,
- text_id, 'Downloading video info')
- info = json.loads(info_page)['info']
- formats = [{
- 'format_id': f['type'],
- 'filesize': int(f['filesize']),
- 'url': f['url']
- } for f in info['rfiles']]
+
+ page = self._download_json(
+ 'http://vxml.56.com/json/%s/' % text_id, text_id, 'Downloading video info')
+
+ info = page['info']
+
+ formats = [
+ {
+ 'format_id': f['type'],
+ 'filesize': int(f['filesize']),
+ 'url': f['url']
+ } for f in info['rfiles']
+ ]
self._sort_formats(formats)
return {
'id': info['vid'],
'title': info['Subject'],
+ 'duration': int(info['duration']) / 1000.0,
'formats': formats,
'thumbnail': info.get('bimg') or info.get('img'),
}
diff --git a/youtube_dl/extractor/cnet.py b/youtube_dl/extractor/cnet.py
new file mode 100644
index 000000000..6a2f5ce36
--- /dev/null
+++ b/youtube_dl/extractor/cnet.py
@@ -0,0 +1,70 @@
+# coding: utf-8
+from __future__ import unicode_literals
+
+import json
+import re
+
+from .common import InfoExtractor
+from ..utils import (
+ int_or_none,
+)
+
+
+class CNETIE(InfoExtractor):
+ _VALID_URL = r'https?://(?:www\.)?cnet\.com/videos/(?P<id>[^/]+)/'
+ _TEST = {
+ 'url': 'http://www.cnet.com/videos/hands-on-with-microsofts-windows-8-1-update/',
+ 'md5': '041233212a0d06b179c87cbcca1577b8',
+ 'info_dict': {
+ 'id': '56f4ea68-bd21-4852-b08c-4de5b8354c60',
+ 'ext': 'mp4',
+ 'title': 'Hands-on with Microsoft Windows 8.1 Update',
+ 'description': 'The new update to the Windows 8 OS brings improved performance for mouse and keyboard users.',
+ 'thumbnail': 're:^http://.*/flmswindows8.jpg$',
+ 'uploader_id': 'sarah.mitroff@cbsinteractive.com',
+ 'uploader': 'Sarah Mitroff',
+ }
+ }
+
+ def _real_extract(self, url):
+ mobj = re.match(self._VALID_URL, url)
+ display_id = mobj.group('id')
+
+ webpage = self._download_webpage(url, display_id)
+ data_json = self._html_search_regex(
+ r"<div class=\"cnetVideoPlayer\" data-cnet-video-options='([^']+)'",
+ webpage, 'data json')
+ data = json.loads(data_json)
+ vdata = data['video']
+
+ video_id = vdata['id']
+ title = vdata['headline']
+ description = vdata.get('dek')
+ thumbnail = vdata.get('image', {}).get('path')
+ author = vdata.get('author')
+ if author:
+ uploader = '%s %s' % (author['firstName'], author['lastName'])
+ uploader_id = author.get('email')
+ else:
+ uploader = None
+ uploader_id = None
+
+ formats = [{
+ 'format_id': '%s-%s-%s' % (
+ f['type'], f['format'],
+ int_or_none(f.get('bitrate'), 1000, default='')),
+ 'url': f['uri'],
+ 'tbr': int_or_none(f.get('bitrate'), 1000),
+ } for f in vdata['files']['data']]
+ self._sort_formats(formats)
+
+ return {
+ 'id': video_id,
+ 'display_id': display_id,
+ 'title': title,
+ 'formats': formats,
+ 'description': description,
+ 'uploader': uploader,
+ 'uploader_id': uploader_id,
+ 'thumbnail': thumbnail,
+ }
diff --git a/youtube_dl/extractor/comedycentral.py b/youtube_dl/extractor/comedycentral.py
index 38577243c..0c99887a2 100644
--- a/youtube_dl/extractor/comedycentral.py
+++ b/youtube_dl/extractor/comedycentral.py
@@ -41,7 +41,7 @@ class ComedyCentralShowsIE(InfoExtractor):
_VALID_URL = r'''(?x)^(:(?P<shortname>tds|thedailyshow|cr|colbert|colbertnation|colbertreport)
|https?://(:www\.)?
(?P<showname>thedailyshow|thecolbertreport)\.(?:cc\.)?com/
- (full-episodes/(?P<episode>.*)|
+ (full-episodes/(?:[0-9a-z]{6}/)?(?P<episode>.*)|
(?P<clip>
(?:(?:guests/[^/]+|videos)/[^/]+/(?P<videotitle>[^/?#]+))
|(the-colbert-report-(videos|collections)/(?P<clipID>[0-9]+)/[^/]*/(?P<cntitle>.*?))
diff --git a/youtube_dl/extractor/wimp.py b/youtube_dl/extractor/wimp.py
index 79fd53e0c..c27dda944 100644
--- a/youtube_dl/extractor/wimp.py
+++ b/youtube_dl/extractor/wimp.py
@@ -3,11 +3,12 @@ from __future__ import unicode_literals
import re
from .common import InfoExtractor
+from .youtube import YoutubeIE
class WimpIE(InfoExtractor):
_VALID_URL = r'http://(?:www\.)?wimp\.com/([^/]+)/'
- _TEST = {
+ _TESTS = [{
'url': 'http://www.wimp.com/maruexhausted/',
'md5': 'f1acced123ecb28d9bb79f2479f2b6a1',
'info_dict': {
@@ -16,7 +17,20 @@ class WimpIE(InfoExtractor):
'title': 'Maru is exhausted.',
'description': 'md5:57e099e857c0a4ea312542b684a869b8',
}
- }
+ }, {
+ # youtube video
+ 'url': 'http://www.wimp.com/clowncar/',
+ 'info_dict': {
+ 'id': 'cG4CEr2aiSg',
+ 'ext': 'mp4',
+ 'title': 'Basset hound clown car...incredible!',
+ 'description': 'md5:8d228485e0719898c017203f900b3a35',
+ 'uploader': 'Gretchen Hoey',
+ 'uploader_id': 'gretchenandjeff1',
+ 'upload_date': '20140303',
+ },
+ 'add_ie': ['Youtube'],
+ }]
def _real_extract(self, url):
mobj = re.match(self._VALID_URL, url)
@@ -24,6 +38,13 @@ class WimpIE(InfoExtractor):
webpage = self._download_webpage(url, video_id)
video_url = self._search_regex(
r's1\.addVariable\("file",\s*"([^"]+)"\);', webpage, 'video URL')
+ if YoutubeIE.suitable(video_url):
+ self.to_screen('Found YouTube video')
+ return {
+ '_type': 'url',
+ 'url': video_url,
+ 'ie_key': YoutubeIE.ie_key(),
+ }
return {
'id': video_id,
@@ -31,4 +52,4 @@ class WimpIE(InfoExtractor):
'title': self._og_search_title(webpage),
'thumbnail': self._og_search_thumbnail(webpage),
'description': self._og_search_description(webpage),
- } \ No newline at end of file
+ }
diff --git a/youtube_dl/extractor/youtube.py b/youtube_dl/extractor/youtube.py
index b6a1884b5..e422d86e4 100644
--- a/youtube_dl/extractor/youtube.py
+++ b/youtube_dl/extractor/youtube.py
@@ -1753,7 +1753,7 @@ class YoutubeFeedsInfoExtractor(YoutubeBaseInfoExtractor):
return self.playlist_result(feed_entries, playlist_title=self._PLAYLIST_TITLE)
class YoutubeSubscriptionsIE(YoutubeFeedsInfoExtractor):
- IE_DESC = u'YouTube.com subscriptions feed, "ytsubs" keyword(requires authentication)'
+ IE_DESC = u'YouTube.com subscriptions feed, "ytsubs" keyword (requires authentication)'
_VALID_URL = r'https?://www\.youtube\.com/feed/subscriptions|:ytsubs(?:criptions)?'
_FEED_NAME = 'subscriptions'
_PLAYLIST_TITLE = u'Youtube Subscriptions'
diff --git a/youtube_dl/utils.py b/youtube_dl/utils.py
index e54ea9d61..5f1f664c8 100644
--- a/youtube_dl/utils.py
+++ b/youtube_dl/utils.py
@@ -1176,12 +1176,12 @@ class HEADRequest(compat_urllib_request.Request):
return "HEAD"
-def int_or_none(v, scale=1):
- return v if v is None else (int(v) // scale)
+def int_or_none(v, scale=1, default=None):
+ return default if v is None else (int(v) // scale)
-def float_or_none(v, scale=1):
- return v if v is None else (float(v) / scale)
+def float_or_none(v, scale=1, default=None):
+ return default if v is None else (float(v) / scale)
def parse_duration(s):
diff --git a/youtube_dl/version.py b/youtube_dl/version.py
index ea24d76e6..ac34122d9 100644
--- a/youtube_dl/version.py
+++ b/youtube_dl/version.py
@@ -1,2 +1,2 @@
-__version__ = '2014.04.03.1'
+__version__ = '2014.04.03.3'