aboutsummaryrefslogtreecommitdiff
path: root/youtube_dl/extractor
diff options
context:
space:
mode:
Diffstat (limited to 'youtube_dl/extractor')
-rw-r--r--youtube_dl/extractor/collegehumor.py2
-rw-r--r--youtube_dl/extractor/ro220.py19
-rw-r--r--youtube_dl/extractor/spiegel.py37
-rw-r--r--youtube_dl/extractor/ustream.py48
-rw-r--r--youtube_dl/extractor/vevo.py39
5 files changed, 74 insertions, 71 deletions
diff --git a/youtube_dl/extractor/collegehumor.py b/youtube_dl/extractor/collegehumor.py
index 026d023a0..1db27026a 100644
--- a/youtube_dl/extractor/collegehumor.py
+++ b/youtube_dl/extractor/collegehumor.py
@@ -38,7 +38,7 @@ class CollegeHumorIE(InfoExtractor):
'title': 'Funny Dogs Protecting Babies Compilation 2014 [NEW HD]',
'uploader': 'Funnyplox TV',
'uploader_id': 'funnyploxtv',
- 'description': 'md5:b20fc87608e2837596bbc8df85a3c34d',
+ 'description': 'md5:7e8899d3f749db50fa089eb243cba17f',
'upload_date': '20140128',
},
'params': {
diff --git a/youtube_dl/extractor/ro220.py b/youtube_dl/extractor/ro220.py
index c32f64d99..4678f62df 100644
--- a/youtube_dl/extractor/ro220.py
+++ b/youtube_dl/extractor/ro220.py
@@ -1,3 +1,5 @@
+from __future__ import unicode_literals
+
import re
from .common import InfoExtractor
@@ -11,12 +13,12 @@ class Ro220IE(InfoExtractor):
IE_NAME = '220.ro'
_VALID_URL = r'(?x)(?:https?://)?(?:www\.)?220\.ro/(?P<category>[^/]+)/(?P<shorttitle>[^/]+)/(?P<video_id>[^/]+)'
_TEST = {
- u"url": u"http://www.220.ro/sport/Luati-Le-Banii-Sez-4-Ep-1/LYV6doKo7f/",
- u'file': u'LYV6doKo7f.mp4',
- u'md5': u'03af18b73a07b4088753930db7a34add',
- u'info_dict': {
- u"title": u"Luati-le Banii sez 4 ep 1",
- u"description": u"Iata-ne reveniti dupa o binemeritata vacanta. Va astept si pe Facebook cu pareri si comentarii.",
+ "url": "http://www.220.ro/sport/Luati-Le-Banii-Sez-4-Ep-1/LYV6doKo7f/",
+ 'file': 'LYV6doKo7f.mp4',
+ 'md5': '03af18b73a07b4088753930db7a34add',
+ 'info_dict': {
+ "title": "Luati-le Banii sez 4 ep 1",
+ "description": "Iata-ne reveniti dupa o binemeritata vacanta. Va astept si pe Facebook cu pareri si comentarii.",
}
}
@@ -27,10 +29,10 @@ class Ro220IE(InfoExtractor):
webpage = self._download_webpage(url, video_id)
flashVars_str = self._search_regex(
r'<param name="flashVars" value="([^"]+)"',
- webpage, u'flashVars')
+ webpage, 'flashVars')
flashVars = compat_parse_qs(flashVars_str)
- info = {
+ return {
'_type': 'video',
'id': video_id,
'ext': 'mp4',
@@ -39,4 +41,3 @@ class Ro220IE(InfoExtractor):
'description': clean_html(flashVars['desc'][0]),
'thumbnail': flashVars['preview'][0],
}
- return info
diff --git a/youtube_dl/extractor/spiegel.py b/youtube_dl/extractor/spiegel.py
index 051a34d5b..9156d7faf 100644
--- a/youtube_dl/extractor/spiegel.py
+++ b/youtube_dl/extractor/spiegel.py
@@ -1,3 +1,5 @@
+from __future__ import unicode_literals
+
import re
from .common import InfoExtractor
@@ -6,20 +8,20 @@ from .common import InfoExtractor
class SpiegelIE(InfoExtractor):
_VALID_URL = r'https?://(?:www\.)?spiegel\.de/video/[^/]*-(?P<videoID>[0-9]+)(?:\.html)?(?:#.*)?$'
_TESTS = [{
- u'url': u'http://www.spiegel.de/video/vulkan-tungurahua-in-ecuador-ist-wieder-aktiv-video-1259285.html',
- u'file': u'1259285.mp4',
- u'md5': u'2c2754212136f35fb4b19767d242f66e',
- u'info_dict': {
- u"title": u"Vulkanausbruch in Ecuador: Der \"Feuerschlund\" ist wieder aktiv"
- }
+ 'url': 'http://www.spiegel.de/video/vulkan-tungurahua-in-ecuador-ist-wieder-aktiv-video-1259285.html',
+ 'file': '1259285.mp4',
+ 'md5': '2c2754212136f35fb4b19767d242f66e',
+ 'info_dict': {
+ 'title': 'Vulkanausbruch in Ecuador: Der "Feuerschlund" ist wieder aktiv',
+ },
},
{
- u'url': u'http://www.spiegel.de/video/schach-wm-videoanalyse-des-fuenften-spiels-video-1309159.html',
- u'file': u'1309159.mp4',
- u'md5': u'f2cdf638d7aa47654e251e1aee360af1',
- u'info_dict': {
- u'title': u'Schach-WM in der Videoanalyse: Carlsen nutzt die Fehlgriffe des Titelverteidigers'
- }
+ 'url': 'http://www.spiegel.de/video/schach-wm-videoanalyse-des-fuenften-spiels-video-1309159.html',
+ 'file': '1309159.mp4',
+ 'md5': 'f2cdf638d7aa47654e251e1aee360af1',
+ 'info_dict': {
+ 'title': 'Schach-WM in der Videoanalyse: Carlsen nutzt die Fehlgriffe des Titelverteidigers',
+ },
}]
def _real_extract(self, url):
@@ -29,17 +31,17 @@ class SpiegelIE(InfoExtractor):
webpage = self._download_webpage(url, video_id)
video_title = self._html_search_regex(
- r'<div class="module-title">(.*?)</div>', webpage, u'title')
+ r'<div class="module-title">(.*?)</div>', webpage, 'title')
- xml_url = u'http://video2.spiegel.de/flash/' + video_id + u'.xml'
+ xml_url = 'http://video2.spiegel.de/flash/' + video_id + '.xml'
idoc = self._download_xml(
xml_url, video_id,
- note=u'Downloading XML', errnote=u'Failed to download XML')
+ note='Downloading XML', errnote='Failed to download XML')
formats = [
{
'format_id': n.tag.rpartition('type')[2],
- 'url': u'http://video2.spiegel.de/flash/' + n.find('./filename').text,
+ 'url': 'http://video2.spiegel.de/flash/' + n.find('./filename').text,
'width': int(n.find('./width').text),
'height': int(n.find('./height').text),
'abr': int(n.find('./audiobitrate').text),
@@ -55,10 +57,9 @@ class SpiegelIE(InfoExtractor):
self._sort_formats(formats)
- info = {
+ return {
'id': video_id,
'title': video_title,
'duration': duration,
'formats': formats,
}
- return info
diff --git a/youtube_dl/extractor/ustream.py b/youtube_dl/extractor/ustream.py
index 74c82587f..7fa2b9e15 100644
--- a/youtube_dl/extractor/ustream.py
+++ b/youtube_dl/extractor/ustream.py
@@ -1,3 +1,5 @@
+from __future__ import unicode_literals
+
import json
import re
@@ -10,48 +12,48 @@ from ..utils import (
class UstreamIE(InfoExtractor):
_VALID_URL = r'https?://www\.ustream\.tv/recorded/(?P<videoID>\d+)'
- IE_NAME = u'ustream'
+ IE_NAME = 'ustream'
_TEST = {
- u'url': u'http://www.ustream.tv/recorded/20274954',
- u'file': u'20274954.flv',
- u'md5': u'088f151799e8f572f84eb62f17d73e5c',
- u'info_dict': {
- u"uploader": u"Young Americans for Liberty",
- u"title": u"Young Americans for Liberty February 7, 2012 2:28 AM"
- }
+ 'url': 'http://www.ustream.tv/recorded/20274954',
+ 'file': '20274954.flv',
+ 'md5': '088f151799e8f572f84eb62f17d73e5c',
+ 'info_dict': {
+ "uploader": "Young Americans for Liberty",
+ "title": "Young Americans for Liberty February 7, 2012 2:28 AM",
+ },
}
def _real_extract(self, url):
m = re.match(self._VALID_URL, url)
video_id = m.group('videoID')
- video_url = u'http://tcdn.ustream.tv/video/%s' % video_id
+ video_url = 'http://tcdn.ustream.tv/video/%s' % video_id
webpage = self._download_webpage(url, video_id)
self.report_extraction(video_id)
video_title = self._html_search_regex(r'data-title="(?P<title>.+)"',
- webpage, u'title')
+ webpage, 'title')
uploader = self._html_search_regex(r'data-content-type="channel".*?>(?P<uploader>.*?)</a>',
- webpage, u'uploader', fatal=False, flags=re.DOTALL)
+ webpage, 'uploader', fatal=False, flags=re.DOTALL)
thumbnail = self._html_search_regex(r'<link rel="image_src" href="(?P<thumb>.*?)"',
- webpage, u'thumbnail', fatal=False)
-
- info = {
- 'id': video_id,
- 'url': video_url,
- 'ext': 'flv',
- 'title': video_title,
- 'uploader': uploader,
- 'thumbnail': thumbnail,
- }
- return info
+ webpage, 'thumbnail', fatal=False)
+
+ return {
+ 'id': video_id,
+ 'url': video_url,
+ 'ext': 'flv',
+ 'title': video_title,
+ 'uploader': uploader,
+ 'thumbnail': thumbnail,
+ }
+
class UstreamChannelIE(InfoExtractor):
_VALID_URL = r'https?://www\.ustream\.tv/channel/(?P<slug>.+)'
- IE_NAME = u'ustream:channel'
+ IE_NAME = 'ustream:channel'
def _real_extract(self, url):
m = re.match(self._VALID_URL, url)
diff --git a/youtube_dl/extractor/vevo.py b/youtube_dl/extractor/vevo.py
index a4b26a26f..f0673972c 100644
--- a/youtube_dl/extractor/vevo.py
+++ b/youtube_dl/extractor/vevo.py
@@ -1,3 +1,5 @@
+from __future__ import unicode_literals
+
import re
import json
import xml.etree.ElementTree
@@ -22,16 +24,16 @@ class VevoIE(InfoExtractor):
vevo:)
(?P<id>[^&?#]+)'''
_TESTS = [{
- u'url': u'http://www.vevo.com/watch/hurts/somebody-to-die-for/GB1101300280',
- u'file': u'GB1101300280.mp4',
- u"md5": u"06bea460acb744eab74a9d7dcb4bfd61",
- u'info_dict': {
- u"upload_date": u"20130624",
- u"uploader": u"Hurts",
- u"title": u"Somebody to Die For",
- u"duration": 230.12,
- u"width": 1920,
- u"height": 1080,
+ 'url': 'http://www.vevo.com/watch/hurts/somebody-to-die-for/GB1101300280',
+ 'file': 'GB1101300280.mp4',
+ "md5": "06bea460acb744eab74a9d7dcb4bfd61",
+ 'info_dict': {
+ "upload_date": "20130624",
+ "uploader": "Hurts",
+ "title": "Somebody to Die For",
+ "duration": 230.12,
+ "width": 1920,
+ "height": 1080,
}
}]
_SMIL_BASE_URL = 'http://smil.lvl3.vevo.com/'
@@ -44,7 +46,7 @@ class VevoIE(InfoExtractor):
if version['version'] > last_version['version']:
last_version = version
if last_version['version'] == -1:
- raise ExtractorError(u'Unable to extract last version of the video')
+ raise ExtractorError('Unable to extract last version of the video')
renditions = xml.etree.ElementTree.fromstring(last_version['data'])
formats = []
@@ -85,7 +87,7 @@ class VevoIE(InfoExtractor):
format_url = self._SMIL_BASE_URL + m.group('path')
formats.append({
'url': format_url,
- 'format_id': u'SMIL_' + m.group('cbr'),
+ 'format_id': 'SMIL_' + m.group('cbr'),
'vcodec': m.group('vcodec'),
'acodec': m.group('acodec'),
'vbr': int(m.group('vbr')),
@@ -101,26 +103,25 @@ class VevoIE(InfoExtractor):
video_id = mobj.group('id')
json_url = 'http://videoplayer.vevo.com/VideoService/AuthenticateVideo?isrc=%s' % video_id
- info_json = self._download_webpage(json_url, video_id, u'Downloading json info')
- video_info = json.loads(info_json)['video']
+ video_info = self._download_json(json_url, video_id)['video']
formats = self._formats_from_json(video_info)
try:
smil_url = '%s/Video/V2/VFILE/%s/%sr.smil' % (
self._SMIL_BASE_URL, video_id, video_id.lower())
smil_xml = self._download_webpage(smil_url, video_id,
- u'Downloading SMIL info')
+ 'Downloading SMIL info')
formats.extend(self._formats_from_smil(smil_xml))
except ExtractorError as ee:
if not isinstance(ee.cause, compat_HTTPError):
raise
self._downloader.report_warning(
- u'Cannot download SMIL information, falling back to JSON ..')
+ 'Cannot download SMIL information, falling back to JSON ..')
timestamp_ms = int(self._search_regex(
- r'/Date\((\d+)\)/', video_info['launchDate'], u'launch date'))
+ r'/Date\((\d+)\)/', video_info['launchDate'], 'launch date'))
upload_date = datetime.datetime.fromtimestamp(timestamp_ms // 1000)
- info = {
+ return {
'id': video_id,
'title': video_info['title'],
'formats': formats,
@@ -129,5 +130,3 @@ class VevoIE(InfoExtractor):
'uploader': video_info['mainArtists'][0]['artistName'],
'duration': video_info['duration'],
}
-
- return info