diff options
Diffstat (limited to 'youtube_dl/extractor')
-rw-r--r-- | youtube_dl/extractor/collegehumor.py | 2 | ||||
-rw-r--r-- | youtube_dl/extractor/ro220.py | 19 | ||||
-rw-r--r-- | youtube_dl/extractor/spiegel.py | 37 | ||||
-rw-r--r-- | youtube_dl/extractor/ustream.py | 48 | ||||
-rw-r--r-- | youtube_dl/extractor/vevo.py | 39 |
5 files changed, 74 insertions, 71 deletions
diff --git a/youtube_dl/extractor/collegehumor.py b/youtube_dl/extractor/collegehumor.py index 026d023a0..1db27026a 100644 --- a/youtube_dl/extractor/collegehumor.py +++ b/youtube_dl/extractor/collegehumor.py @@ -38,7 +38,7 @@ class CollegeHumorIE(InfoExtractor): 'title': 'Funny Dogs Protecting Babies Compilation 2014 [NEW HD]', 'uploader': 'Funnyplox TV', 'uploader_id': 'funnyploxtv', - 'description': 'md5:b20fc87608e2837596bbc8df85a3c34d', + 'description': 'md5:7e8899d3f749db50fa089eb243cba17f', 'upload_date': '20140128', }, 'params': { diff --git a/youtube_dl/extractor/ro220.py b/youtube_dl/extractor/ro220.py index c32f64d99..4678f62df 100644 --- a/youtube_dl/extractor/ro220.py +++ b/youtube_dl/extractor/ro220.py @@ -1,3 +1,5 @@ +from __future__ import unicode_literals + import re from .common import InfoExtractor @@ -11,12 +13,12 @@ class Ro220IE(InfoExtractor): IE_NAME = '220.ro' _VALID_URL = r'(?x)(?:https?://)?(?:www\.)?220\.ro/(?P<category>[^/]+)/(?P<shorttitle>[^/]+)/(?P<video_id>[^/]+)' _TEST = { - u"url": u"http://www.220.ro/sport/Luati-Le-Banii-Sez-4-Ep-1/LYV6doKo7f/", - u'file': u'LYV6doKo7f.mp4', - u'md5': u'03af18b73a07b4088753930db7a34add', - u'info_dict': { - u"title": u"Luati-le Banii sez 4 ep 1", - u"description": u"Iata-ne reveniti dupa o binemeritata vacanta. Va astept si pe Facebook cu pareri si comentarii.", + "url": "http://www.220.ro/sport/Luati-Le-Banii-Sez-4-Ep-1/LYV6doKo7f/", + 'file': 'LYV6doKo7f.mp4', + 'md5': '03af18b73a07b4088753930db7a34add', + 'info_dict': { + "title": "Luati-le Banii sez 4 ep 1", + "description": "Iata-ne reveniti dupa o binemeritata vacanta. Va astept si pe Facebook cu pareri si comentarii.", } } @@ -27,10 +29,10 @@ class Ro220IE(InfoExtractor): webpage = self._download_webpage(url, video_id) flashVars_str = self._search_regex( r'<param name="flashVars" value="([^"]+)"', - webpage, u'flashVars') + webpage, 'flashVars') flashVars = compat_parse_qs(flashVars_str) - info = { + return { '_type': 'video', 'id': video_id, 'ext': 'mp4', @@ -39,4 +41,3 @@ class Ro220IE(InfoExtractor): 'description': clean_html(flashVars['desc'][0]), 'thumbnail': flashVars['preview'][0], } - return info diff --git a/youtube_dl/extractor/spiegel.py b/youtube_dl/extractor/spiegel.py index 051a34d5b..9156d7faf 100644 --- a/youtube_dl/extractor/spiegel.py +++ b/youtube_dl/extractor/spiegel.py @@ -1,3 +1,5 @@ +from __future__ import unicode_literals + import re from .common import InfoExtractor @@ -6,20 +8,20 @@ from .common import InfoExtractor class SpiegelIE(InfoExtractor): _VALID_URL = r'https?://(?:www\.)?spiegel\.de/video/[^/]*-(?P<videoID>[0-9]+)(?:\.html)?(?:#.*)?$' _TESTS = [{ - u'url': u'http://www.spiegel.de/video/vulkan-tungurahua-in-ecuador-ist-wieder-aktiv-video-1259285.html', - u'file': u'1259285.mp4', - u'md5': u'2c2754212136f35fb4b19767d242f66e', - u'info_dict': { - u"title": u"Vulkanausbruch in Ecuador: Der \"Feuerschlund\" ist wieder aktiv" - } + 'url': 'http://www.spiegel.de/video/vulkan-tungurahua-in-ecuador-ist-wieder-aktiv-video-1259285.html', + 'file': '1259285.mp4', + 'md5': '2c2754212136f35fb4b19767d242f66e', + 'info_dict': { + 'title': 'Vulkanausbruch in Ecuador: Der "Feuerschlund" ist wieder aktiv', + }, }, { - u'url': u'http://www.spiegel.de/video/schach-wm-videoanalyse-des-fuenften-spiels-video-1309159.html', - u'file': u'1309159.mp4', - u'md5': u'f2cdf638d7aa47654e251e1aee360af1', - u'info_dict': { - u'title': u'Schach-WM in der Videoanalyse: Carlsen nutzt die Fehlgriffe des Titelverteidigers' - } + 'url': 'http://www.spiegel.de/video/schach-wm-videoanalyse-des-fuenften-spiels-video-1309159.html', + 'file': '1309159.mp4', + 'md5': 'f2cdf638d7aa47654e251e1aee360af1', + 'info_dict': { + 'title': 'Schach-WM in der Videoanalyse: Carlsen nutzt die Fehlgriffe des Titelverteidigers', + }, }] def _real_extract(self, url): @@ -29,17 +31,17 @@ class SpiegelIE(InfoExtractor): webpage = self._download_webpage(url, video_id) video_title = self._html_search_regex( - r'<div class="module-title">(.*?)</div>', webpage, u'title') + r'<div class="module-title">(.*?)</div>', webpage, 'title') - xml_url = u'http://video2.spiegel.de/flash/' + video_id + u'.xml' + xml_url = 'http://video2.spiegel.de/flash/' + video_id + '.xml' idoc = self._download_xml( xml_url, video_id, - note=u'Downloading XML', errnote=u'Failed to download XML') + note='Downloading XML', errnote='Failed to download XML') formats = [ { 'format_id': n.tag.rpartition('type')[2], - 'url': u'http://video2.spiegel.de/flash/' + n.find('./filename').text, + 'url': 'http://video2.spiegel.de/flash/' + n.find('./filename').text, 'width': int(n.find('./width').text), 'height': int(n.find('./height').text), 'abr': int(n.find('./audiobitrate').text), @@ -55,10 +57,9 @@ class SpiegelIE(InfoExtractor): self._sort_formats(formats) - info = { + return { 'id': video_id, 'title': video_title, 'duration': duration, 'formats': formats, } - return info diff --git a/youtube_dl/extractor/ustream.py b/youtube_dl/extractor/ustream.py index 74c82587f..7fa2b9e15 100644 --- a/youtube_dl/extractor/ustream.py +++ b/youtube_dl/extractor/ustream.py @@ -1,3 +1,5 @@ +from __future__ import unicode_literals + import json import re @@ -10,48 +12,48 @@ from ..utils import ( class UstreamIE(InfoExtractor): _VALID_URL = r'https?://www\.ustream\.tv/recorded/(?P<videoID>\d+)' - IE_NAME = u'ustream' + IE_NAME = 'ustream' _TEST = { - u'url': u'http://www.ustream.tv/recorded/20274954', - u'file': u'20274954.flv', - u'md5': u'088f151799e8f572f84eb62f17d73e5c', - u'info_dict': { - u"uploader": u"Young Americans for Liberty", - u"title": u"Young Americans for Liberty February 7, 2012 2:28 AM" - } + 'url': 'http://www.ustream.tv/recorded/20274954', + 'file': '20274954.flv', + 'md5': '088f151799e8f572f84eb62f17d73e5c', + 'info_dict': { + "uploader": "Young Americans for Liberty", + "title": "Young Americans for Liberty February 7, 2012 2:28 AM", + }, } def _real_extract(self, url): m = re.match(self._VALID_URL, url) video_id = m.group('videoID') - video_url = u'http://tcdn.ustream.tv/video/%s' % video_id + video_url = 'http://tcdn.ustream.tv/video/%s' % video_id webpage = self._download_webpage(url, video_id) self.report_extraction(video_id) video_title = self._html_search_regex(r'data-title="(?P<title>.+)"', - webpage, u'title') + webpage, 'title') uploader = self._html_search_regex(r'data-content-type="channel".*?>(?P<uploader>.*?)</a>', - webpage, u'uploader', fatal=False, flags=re.DOTALL) + webpage, 'uploader', fatal=False, flags=re.DOTALL) thumbnail = self._html_search_regex(r'<link rel="image_src" href="(?P<thumb>.*?)"', - webpage, u'thumbnail', fatal=False) - - info = { - 'id': video_id, - 'url': video_url, - 'ext': 'flv', - 'title': video_title, - 'uploader': uploader, - 'thumbnail': thumbnail, - } - return info + webpage, 'thumbnail', fatal=False) + + return { + 'id': video_id, + 'url': video_url, + 'ext': 'flv', + 'title': video_title, + 'uploader': uploader, + 'thumbnail': thumbnail, + } + class UstreamChannelIE(InfoExtractor): _VALID_URL = r'https?://www\.ustream\.tv/channel/(?P<slug>.+)' - IE_NAME = u'ustream:channel' + IE_NAME = 'ustream:channel' def _real_extract(self, url): m = re.match(self._VALID_URL, url) diff --git a/youtube_dl/extractor/vevo.py b/youtube_dl/extractor/vevo.py index a4b26a26f..f0673972c 100644 --- a/youtube_dl/extractor/vevo.py +++ b/youtube_dl/extractor/vevo.py @@ -1,3 +1,5 @@ +from __future__ import unicode_literals + import re import json import xml.etree.ElementTree @@ -22,16 +24,16 @@ class VevoIE(InfoExtractor): vevo:) (?P<id>[^&?#]+)''' _TESTS = [{ - u'url': u'http://www.vevo.com/watch/hurts/somebody-to-die-for/GB1101300280', - u'file': u'GB1101300280.mp4', - u"md5": u"06bea460acb744eab74a9d7dcb4bfd61", - u'info_dict': { - u"upload_date": u"20130624", - u"uploader": u"Hurts", - u"title": u"Somebody to Die For", - u"duration": 230.12, - u"width": 1920, - u"height": 1080, + 'url': 'http://www.vevo.com/watch/hurts/somebody-to-die-for/GB1101300280', + 'file': 'GB1101300280.mp4', + "md5": "06bea460acb744eab74a9d7dcb4bfd61", + 'info_dict': { + "upload_date": "20130624", + "uploader": "Hurts", + "title": "Somebody to Die For", + "duration": 230.12, + "width": 1920, + "height": 1080, } }] _SMIL_BASE_URL = 'http://smil.lvl3.vevo.com/' @@ -44,7 +46,7 @@ class VevoIE(InfoExtractor): if version['version'] > last_version['version']: last_version = version if last_version['version'] == -1: - raise ExtractorError(u'Unable to extract last version of the video') + raise ExtractorError('Unable to extract last version of the video') renditions = xml.etree.ElementTree.fromstring(last_version['data']) formats = [] @@ -85,7 +87,7 @@ class VevoIE(InfoExtractor): format_url = self._SMIL_BASE_URL + m.group('path') formats.append({ 'url': format_url, - 'format_id': u'SMIL_' + m.group('cbr'), + 'format_id': 'SMIL_' + m.group('cbr'), 'vcodec': m.group('vcodec'), 'acodec': m.group('acodec'), 'vbr': int(m.group('vbr')), @@ -101,26 +103,25 @@ class VevoIE(InfoExtractor): video_id = mobj.group('id') json_url = 'http://videoplayer.vevo.com/VideoService/AuthenticateVideo?isrc=%s' % video_id - info_json = self._download_webpage(json_url, video_id, u'Downloading json info') - video_info = json.loads(info_json)['video'] + video_info = self._download_json(json_url, video_id)['video'] formats = self._formats_from_json(video_info) try: smil_url = '%s/Video/V2/VFILE/%s/%sr.smil' % ( self._SMIL_BASE_URL, video_id, video_id.lower()) smil_xml = self._download_webpage(smil_url, video_id, - u'Downloading SMIL info') + 'Downloading SMIL info') formats.extend(self._formats_from_smil(smil_xml)) except ExtractorError as ee: if not isinstance(ee.cause, compat_HTTPError): raise self._downloader.report_warning( - u'Cannot download SMIL information, falling back to JSON ..') + 'Cannot download SMIL information, falling back to JSON ..') timestamp_ms = int(self._search_regex( - r'/Date\((\d+)\)/', video_info['launchDate'], u'launch date')) + r'/Date\((\d+)\)/', video_info['launchDate'], 'launch date')) upload_date = datetime.datetime.fromtimestamp(timestamp_ms // 1000) - info = { + return { 'id': video_id, 'title': video_info['title'], 'formats': formats, @@ -129,5 +130,3 @@ class VevoIE(InfoExtractor): 'uploader': video_info['mainArtists'][0]['artistName'], 'duration': video_info['duration'], } - - return info |