diff options
author | Philipp Hagemeister <phihag@phihag.de> | 2014-04-03 16:21:21 +0200 |
---|---|---|
committer | Philipp Hagemeister <phihag@phihag.de> | 2014-04-03 16:21:21 +0200 |
commit | 9271bc835546a8bd11c645018e9daabd54522855 (patch) | |
tree | 033032c696aca96b210205e44e76c56bc557477e /youtube_dl/extractor | |
parent | 968ed2a7779fc7337fdcaa12da2b12e288e7eeb6 (diff) |
[cnet] Add new extractor (Fixes #2679)
Diffstat (limited to 'youtube_dl/extractor')
-rw-r--r-- | youtube_dl/extractor/__init__.py | 1 | ||||
-rw-r--r-- | youtube_dl/extractor/cnet.py | 70 |
2 files changed, 71 insertions, 0 deletions
diff --git a/youtube_dl/extractor/__init__.py b/youtube_dl/extractor/__init__.py index 7c3587e47..c9c400b61 100644 --- a/youtube_dl/extractor/__init__.py +++ b/youtube_dl/extractor/__init__.py @@ -40,6 +40,7 @@ from .clipfish import ClipfishIE from .cliphunter import CliphunterIE from .clipsyndicate import ClipsyndicateIE from .cmt import CMTIE +from .cnet import CNETIE from .cnn import ( CNNIE, CNNBlogsIE, diff --git a/youtube_dl/extractor/cnet.py b/youtube_dl/extractor/cnet.py new file mode 100644 index 000000000..6a2f5ce36 --- /dev/null +++ b/youtube_dl/extractor/cnet.py @@ -0,0 +1,70 @@ +# coding: utf-8 +from __future__ import unicode_literals + +import json +import re + +from .common import InfoExtractor +from ..utils import ( + int_or_none, +) + + +class CNETIE(InfoExtractor): + _VALID_URL = r'https?://(?:www\.)?cnet\.com/videos/(?P<id>[^/]+)/' + _TEST = { + 'url': 'http://www.cnet.com/videos/hands-on-with-microsofts-windows-8-1-update/', + 'md5': '041233212a0d06b179c87cbcca1577b8', + 'info_dict': { + 'id': '56f4ea68-bd21-4852-b08c-4de5b8354c60', + 'ext': 'mp4', + 'title': 'Hands-on with Microsoft Windows 8.1 Update', + 'description': 'The new update to the Windows 8 OS brings improved performance for mouse and keyboard users.', + 'thumbnail': 're:^http://.*/flmswindows8.jpg$', + 'uploader_id': 'sarah.mitroff@cbsinteractive.com', + 'uploader': 'Sarah Mitroff', + } + } + + def _real_extract(self, url): + mobj = re.match(self._VALID_URL, url) + display_id = mobj.group('id') + + webpage = self._download_webpage(url, display_id) + data_json = self._html_search_regex( + r"<div class=\"cnetVideoPlayer\" data-cnet-video-options='([^']+)'", + webpage, 'data json') + data = json.loads(data_json) + vdata = data['video'] + + video_id = vdata['id'] + title = vdata['headline'] + description = vdata.get('dek') + thumbnail = vdata.get('image', {}).get('path') + author = vdata.get('author') + if author: + uploader = '%s %s' % (author['firstName'], author['lastName']) + uploader_id = author.get('email') + else: + uploader = None + uploader_id = None + + formats = [{ + 'format_id': '%s-%s-%s' % ( + f['type'], f['format'], + int_or_none(f.get('bitrate'), 1000, default='')), + 'url': f['uri'], + 'tbr': int_or_none(f.get('bitrate'), 1000), + } for f in vdata['files']['data']] + self._sort_formats(formats) + + return { + 'id': video_id, + 'display_id': display_id, + 'title': title, + 'formats': formats, + 'description': description, + 'uploader': uploader, + 'uploader_id': uploader_id, + 'thumbnail': thumbnail, + } |