aboutsummaryrefslogtreecommitdiff
path: root/youtube_dl/extractor
diff options
context:
space:
mode:
Diffstat (limited to 'youtube_dl/extractor')
-rw-r--r--youtube_dl/extractor/__init__.py6
-rw-r--r--youtube_dl/extractor/ard.py33
-rw-r--r--youtube_dl/extractor/comedycentral.py2
-rw-r--r--youtube_dl/extractor/extremetube.py2
-rw-r--r--youtube_dl/extractor/fc2.py4
-rw-r--r--youtube_dl/extractor/ivi.py12
-rw-r--r--youtube_dl/extractor/mailru.py55
-rw-r--r--youtube_dl/extractor/naver.py34
-rw-r--r--youtube_dl/extractor/nrk.py2
-rw-r--r--youtube_dl/extractor/tagesschau.py79
-rw-r--r--youtube_dl/extractor/teachertube.py85
-rw-r--r--youtube_dl/extractor/teachingchannel.py33
-rw-r--r--youtube_dl/extractor/theplatform.py25
-rw-r--r--youtube_dl/extractor/vevo.py2
-rw-r--r--youtube_dl/extractor/xvideos.py17
-rw-r--r--youtube_dl/extractor/yahoo.py18
-rw-r--r--youtube_dl/extractor/youtube.py9
17 files changed, 327 insertions, 91 deletions
diff --git a/youtube_dl/extractor/__init__.py b/youtube_dl/extractor/__init__.py
index b689dc3c9..72523c54d 100644
--- a/youtube_dl/extractor/__init__.py
+++ b/youtube_dl/extractor/__init__.py
@@ -267,6 +267,12 @@ from .streamcz import StreamCZIE
from .swrmediathek import SWRMediathekIE
from .syfy import SyfyIE
from .sztvhu import SztvHuIE
+from .tagesschau import TagesschauIE
+from .teachertube import (
+ TeacherTubeIE,
+ TeacherTubeClassroomIE,
+)
+from .teachingchannel import TeachingChannelIE
from .teamcoco import TeamcocoIE
from .techtalks import TechTalksIE
from .ted import TEDIE
diff --git a/youtube_dl/extractor/ard.py b/youtube_dl/extractor/ard.py
index b88f71bc4..c6d22c029 100644
--- a/youtube_dl/extractor/ard.py
+++ b/youtube_dl/extractor/ard.py
@@ -38,15 +38,19 @@ class ARDIE(InfoExtractor):
webpage = self._download_webpage(url, video_id)
title = self._html_search_regex(
- r'<h1(?:\s+class="boxTopHeadline")?>(.*?)</h1>', webpage, 'title')
+ [r'<h1(?:\s+class="boxTopHeadline")?>(.*?)</h1>',
+ r'<meta name="dcterms.title" content="(.*?)"/>',
+ r'<h4 class="headline">(.*?)</h4>'],
+ webpage, 'title')
description = self._html_search_meta(
'dcterms.abstract', webpage, 'description')
thumbnail = self._og_search_thumbnail(webpage)
- streams = [
- mo.groupdict()
- for mo in re.finditer(
- r'mediaCollection\.addMediaStream\((?P<media_type>\d+), (?P<quality>\d+), "(?P<rtmp_url>[^"]*)", "(?P<video_url>[^"]*)", "[^"]*"\)', webpage)]
+
+ media_info = self._download_json(
+ 'http://www.ardmediathek.de/play/media/%s' % video_id, video_id)
+ # The second element of the _mediaArray contains the standard http urls
+ streams = media_info['_mediaArray'][1]['_mediaStreamArray']
if not streams:
if '"fsk"' in webpage:
raise ExtractorError('This video is only available after 20:00')
@@ -54,21 +58,12 @@ class ARDIE(InfoExtractor):
formats = []
for s in streams:
format = {
- 'quality': int(s['quality']),
+ 'quality': s['_quality'],
+ 'url': s['_stream'],
}
- if s.get('rtmp_url'):
- format['protocol'] = 'rtmp'
- format['url'] = s['rtmp_url']
- format['playpath'] = s['video_url']
- else:
- format['url'] = s['video_url']
-
- quality_name = self._search_regex(
- r'[,.]([a-zA-Z0-9_-]+),?\.mp4', format['url'],
- 'quality name', default='NA')
- format['format_id'] = '%s-%s-%s-%s' % (
- determine_ext(format['url']), quality_name, s['media_type'],
- s['quality'])
+
+ format['format_id'] = '%s-%s' % (
+ determine_ext(format['url']), format['quality'])
formats.append(format)
diff --git a/youtube_dl/extractor/comedycentral.py b/youtube_dl/extractor/comedycentral.py
index 6e3a316c6..ba4d73ab8 100644
--- a/youtube_dl/extractor/comedycentral.py
+++ b/youtube_dl/extractor/comedycentral.py
@@ -188,7 +188,7 @@ class ComedyCentralShowsIE(InfoExtractor):
})
formats.append({
'format_id': 'rtmp-%s' % format,
- 'url': rtmp_video_url,
+ 'url': rtmp_video_url.replace('viacomccstrm', 'viacommtvstrm'),
'ext': self._video_extensions.get(format, 'mp4'),
'height': h,
'width': w,
diff --git a/youtube_dl/extractor/extremetube.py b/youtube_dl/extractor/extremetube.py
index ff7c0cd3e..14a196ffc 100644
--- a/youtube_dl/extractor/extremetube.py
+++ b/youtube_dl/extractor/extremetube.py
@@ -37,7 +37,7 @@ class ExtremeTubeIE(InfoExtractor):
webpage = self._download_webpage(req, video_id)
video_title = self._html_search_regex(
- r'<h1 [^>]*?title="([^"]+)"[^>]*>\1<', webpage, 'title')
+ r'<h1 [^>]*?title="([^"]+)"[^>]*>', webpage, 'title')
uploader = self._html_search_regex(
r'>Posted by:(?=<)(?:\s|<[^>]*>)*(.+?)\|', webpage, 'uploader',
fatal=False)
diff --git a/youtube_dl/extractor/fc2.py b/youtube_dl/extractor/fc2.py
index ca8993241..18f91efac 100644
--- a/youtube_dl/extractor/fc2.py
+++ b/youtube_dl/extractor/fc2.py
@@ -13,7 +13,7 @@ from ..utils import (
class FC2IE(InfoExtractor):
- _VALID_URL = r'^http://video\.fc2\.com/(?P<lang>[^/]+)/content/(?P<id>[^/]+)'
+ _VALID_URL = r'^http://video\.fc2\.com/((?P<lang>[^/]+)/)?content/(?P<id>[^/]+)'
IE_NAME = 'fc2'
_TEST = {
'url': 'http://video.fc2.com/en/content/20121103kUan1KHs',
@@ -36,7 +36,7 @@ class FC2IE(InfoExtractor):
thumbnail = self._og_search_thumbnail(webpage)
refer = url.replace('/content/', '/a/content/')
- mimi = hashlib.md5(video_id + '_gGddgPfeaf_gzyr').hexdigest()
+ mimi = hashlib.md5((video_id + '_gGddgPfeaf_gzyr').encode('utf-8')).hexdigest()
info_url = (
"http://video.fc2.com/ginfo.php?mimi={1:s}&href={2:s}&v={0:s}&fversion=WIN%2011%2C6%2C602%2C180&from=2&otag=0&upid={0:s}&tk=null&".
diff --git a/youtube_dl/extractor/ivi.py b/youtube_dl/extractor/ivi.py
index 1ba4966c7..528be1524 100644
--- a/youtube_dl/extractor/ivi.py
+++ b/youtube_dl/extractor/ivi.py
@@ -33,14 +33,14 @@ class IviIE(InfoExtractor):
},
# Serial's serie
{
- 'url': 'http://www.ivi.ru/watch/dezhurnyi_angel/74791',
- 'md5': '3e6cc9a848c1d2ebcc6476444967baa9',
+ 'url': 'http://www.ivi.ru/watch/dvoe_iz_lartsa/9549',
+ 'md5': '221f56b35e3ed815fde2df71032f4b3e',
'info_dict': {
- 'id': '74791',
+ 'id': '9549',
'ext': 'mp4',
- 'title': 'Дежурный ангел - 1 серия',
- 'duration': 2490,
- 'thumbnail': 'http://thumbs.ivi.ru/f7.vcp.digitalaccess.ru/contents/8/e/bc2f6c2b6e5d291152fdd32c059141.jpg',
+ 'title': 'Двое из ларца - Серия 1',
+ 'duration': 2655,
+ 'thumbnail': 'http://thumbs.ivi.ru/f15.vcp.digitalaccess.ru/contents/8/4/0068dc0677041f3336b7c2baad8fc0.jpg',
},
'skip': 'Only works from Russia',
}
diff --git a/youtube_dl/extractor/mailru.py b/youtube_dl/extractor/mailru.py
index 5016989cc..7460d81cd 100644
--- a/youtube_dl/extractor/mailru.py
+++ b/youtube_dl/extractor/mailru.py
@@ -9,29 +9,48 @@ from .common import InfoExtractor
class MailRuIE(InfoExtractor):
IE_NAME = 'mailru'
IE_DESC = 'Видео@Mail.Ru'
- _VALID_URL = r'http://(?:www\.)?my\.mail\.ru/video/.*#video=/?(?P<id>[^/]+/[^/]+/[^/]+/\d+)'
+ _VALID_URL = r'http://(?:www\.)?my\.mail\.ru/(?:video/.*#video=/?(?P<idv1>(?:[^/]+/){3}\d+)|(?:(?P<idv2prefix>(?:[^/]+/){2})video/(?P<idv2suffix>[^/]+/\d+))\.html)'
- _TEST = {
- 'url': 'http://my.mail.ru/video/top#video=/mail/sonypicturesrus/75/76',
- 'md5': 'dea205f03120046894db4ebb6159879a',
- 'info_dict': {
- 'id': '46301138',
- 'ext': 'mp4',
- 'title': 'Новый Человек-Паук. Высокое напряжение. Восстание Электро',
- 'timestamp': 1393232740,
- 'upload_date': '20140224',
- 'uploader': 'sonypicturesrus',
- 'uploader_id': 'sonypicturesrus@mail.ru',
- 'duration': 184,
- }
- }
+ _TESTS = [
+ {
+ 'url': 'http://my.mail.ru/video/top#video=/mail/sonypicturesrus/75/76',
+ 'md5': 'dea205f03120046894db4ebb6159879a',
+ 'info_dict': {
+ 'id': '46301138',
+ 'ext': 'mp4',
+ 'title': 'Новый Человек-Паук. Высокое напряжение. Восстание Электро',
+ 'timestamp': 1393232740,
+ 'upload_date': '20140224',
+ 'uploader': 'sonypicturesrus',
+ 'uploader_id': 'sonypicturesrus@mail.ru',
+ 'duration': 184,
+ },
+ },
+ {
+ 'url': 'http://my.mail.ru/corp/hitech/video/news_hi-tech_mail_ru/1263.html',
+ 'md5': '00a91a58c3402204dcced523777b475f',
+ 'info_dict': {
+ 'id': '46843144',
+ 'ext': 'mp4',
+ 'title': 'Samsung Galaxy S5 Hammer Smash Fail Battery Explosion',
+ 'timestamp': 1397217632,
+ 'upload_date': '20140411',
+ 'uploader': 'hitech',
+ 'uploader_id': 'hitech@corp.mail.ru',
+ 'duration': 245,
+ },
+ },
+ ]
def _real_extract(self, url):
mobj = re.match(self._VALID_URL, url)
- video_id = mobj.group('id')
+ video_id = mobj.group('idv1')
+
+ if not video_id:
+ video_id = mobj.group('idv2prefix') + mobj.group('idv2suffix')
video_data = self._download_json(
- 'http://videoapi.my.mail.ru/videos/%s.json?new=1' % video_id, video_id, 'Downloading video JSON')
+ 'http://api.video.mail.ru/videos/%s.json?new=1' % video_id, video_id, 'Downloading video JSON')
author = video_data['author']
uploader = author['name']
@@ -40,6 +59,8 @@ class MailRuIE(InfoExtractor):
movie = video_data['movie']
content_id = str(movie['contentId'])
title = movie['title']
+ if title.endswith('.mp4'):
+ title = title[:-4]
thumbnail = movie['poster']
duration = movie['duration']
diff --git a/youtube_dl/extractor/naver.py b/youtube_dl/extractor/naver.py
index 4cab30631..c0231c197 100644
--- a/youtube_dl/extractor/naver.py
+++ b/youtube_dl/extractor/naver.py
@@ -1,4 +1,6 @@
# encoding: utf-8
+from __future__ import unicode_literals
+
import re
from .common import InfoExtractor
@@ -12,12 +14,13 @@ class NaverIE(InfoExtractor):
_VALID_URL = r'https?://(?:m\.)?tvcast\.naver\.com/v/(?P<id>\d+)'
_TEST = {
- u'url': u'http://tvcast.naver.com/v/81652',
- u'file': u'81652.mp4',
- u'info_dict': {
- u'title': u'[9월 모의고사 해설강의][수학_김상희] 수학 A형 16~20번',
- u'description': u'합격불변의 법칙 메가스터디 | 메가스터디 수학 김상희 선생님이 9월 모의고사 수학A형 16번에서 20번까지 해설강의를 공개합니다.',
- u'upload_date': u'20130903',
+ 'url': 'http://tvcast.naver.com/v/81652',
+ 'info_dict': {
+ 'id': '81652',
+ 'ext': 'mp4',
+ 'title': '[9월 모의고사 해설강의][수학_김상희] 수학 A형 16~20번',
+ 'description': '합격불변의 법칙 메가스터디 | 메가스터디 수학 김상희 선생님이 9월 모의고사 수학A형 16번에서 20번까지 해설강의를 공개합니다.',
+ 'upload_date': '20130903',
},
}
@@ -28,7 +31,7 @@ class NaverIE(InfoExtractor):
m_id = re.search(r'var rmcPlayer = new nhn.rmcnmv.RMCVideoPlayer\("(.+?)", "(.+?)"',
webpage)
if m_id is None:
- raise ExtractorError(u'couldn\'t extract vid and key')
+ raise ExtractorError('couldn\'t extract vid and key')
vid = m_id.group(1)
key = m_id.group(2)
query = compat_urllib_parse.urlencode({'vid': vid, 'inKey': key,})
@@ -39,22 +42,27 @@ class NaverIE(InfoExtractor):
})
info = self._download_xml(
'http://serviceapi.rmcnmv.naver.com/flash/videoInfo.nhn?' + query,
- video_id, u'Downloading video info')
+ video_id, 'Downloading video info')
urls = self._download_xml(
'http://serviceapi.rmcnmv.naver.com/flash/playableEncodingOption.nhn?' + query_urls,
- video_id, u'Downloading video formats info')
+ video_id, 'Downloading video formats info')
formats = []
for format_el in urls.findall('EncodingOptions/EncodingOption'):
domain = format_el.find('Domain').text
- if domain.startswith('rtmp'):
- continue
- formats.append({
+ f = {
'url': domain + format_el.find('uri').text,
'ext': 'mp4',
'width': int(format_el.find('width').text),
'height': int(format_el.find('height').text),
- })
+ }
+ if domain.startswith('rtmp'):
+ f.update({
+ 'ext': 'flv',
+ 'rtmp_protocol': '1', # rtmpt
+ })
+ formats.append(f)
+ self._sort_formats(formats)
return {
'id': video_id,
diff --git a/youtube_dl/extractor/nrk.py b/youtube_dl/extractor/nrk.py
index f5117d7b3..3a6a7883e 100644
--- a/youtube_dl/extractor/nrk.py
+++ b/youtube_dl/extractor/nrk.py
@@ -72,7 +72,7 @@ class NRKIE(InfoExtractor):
class NRKTVIE(InfoExtractor):
- _VALID_URL = r'http://tv\.nrk\.no/(?:serie/[^/]+|program)/(?P<id>[a-z]{4}\d{8})'
+ _VALID_URL = r'http://tv\.nrk(?:super)?\.no/(?:serie/[^/]+|program)/(?P<id>[a-z]{4}\d{8})'
_TESTS = [
{
diff --git a/youtube_dl/extractor/tagesschau.py b/youtube_dl/extractor/tagesschau.py
new file mode 100644
index 000000000..36331529e
--- /dev/null
+++ b/youtube_dl/extractor/tagesschau.py
@@ -0,0 +1,79 @@
+# -*- coding: utf-8 -*-
+from __future__ import unicode_literals
+
+import re
+
+from .common import InfoExtractor
+
+
+class TagesschauIE(InfoExtractor):
+ _VALID_URL = r'https?://(?:www\.)?tagesschau\.de/multimedia/video/video(?P<id>-?[0-9]+)\.html'
+
+ _TESTS = [{
+ 'url': 'http://www.tagesschau.de/multimedia/video/video1399128.html',
+ 'md5': 'bcdeac2194fb296d599ce7929dfa4009',
+ 'info_dict': {
+ 'id': '1399128',
+ 'ext': 'mp4',
+ 'title': 'Harald Range, Generalbundesanwalt, zu den Ermittlungen',
+ 'description': 'md5:69da3c61275b426426d711bde96463ab',
+ 'thumbnail': 're:^http:.*\.jpg$',
+ },
+ }, {
+ 'url': 'http://www.tagesschau.de/multimedia/video/video-196.html',
+ 'md5': '8aaa8bf3ae1ca2652309718c03019128',
+ 'info_dict': {
+ 'id': '196',
+ 'ext': 'mp4',
+ 'title': 'Ukraine-Konflikt: Klitschko in Kiew als Bürgermeister vereidigt',
+ 'description': 'md5:f22e4af75821d174fa6c977349682691',
+ 'thumbnail': 're:http://.*\.jpg',
+ },
+ }]
+
+ _FORMATS = {
+ 's': {'width': 256, 'height': 144, 'quality': 1},
+ 'm': {'width': 512, 'height': 288, 'quality': 2},
+ 'l': {'width': 960, 'height': 544, 'quality': 3},
+ }
+
+ def _real_extract(self, url):
+ mobj = re.match(self._VALID_URL, url)
+ video_id = mobj.group('id')
+
+ if video_id.startswith('-'):
+ display_id = video_id.strip('-')
+ else:
+ display_id = video_id
+
+ webpage = self._download_webpage(url, display_id)
+
+ playerpage = self._download_webpage(
+ 'http://www.tagesschau.de/multimedia/video/video%s~player_autoplay-true.html' % video_id,
+ display_id, 'Downloading player page')
+
+ medias = re.findall(
+ r'"(http://media.+?)", type:"video/(.+?)", quality:"(.+?)"',
+ playerpage)
+
+ formats = []
+ for url, ext, res in medias:
+ f = {
+ 'format_id': res + '_' + ext,
+ 'url': url,
+ 'ext': ext,
+ }
+ f.update(self._FORMATS.get(res, {}))
+ formats.append(f)
+
+ self._sort_formats(formats)
+
+ thumbnail = re.findall(r'"(/multimedia/.+?\.jpg)"', playerpage)[-1]
+
+ return {
+ 'id': display_id,
+ 'title': self._og_search_title(webpage).strip(),
+ 'thumbnail': 'http://www.tagesschau.de' + thumbnail,
+ 'formats': formats,
+ 'description': self._og_search_description(webpage).strip(),
+ }
diff --git a/youtube_dl/extractor/teachertube.py b/youtube_dl/extractor/teachertube.py
new file mode 100644
index 000000000..4740f3d56
--- /dev/null
+++ b/youtube_dl/extractor/teachertube.py
@@ -0,0 +1,85 @@
+# -*- coding: utf-8 -*-
+from __future__ import unicode_literals
+
+import re
+
+from .common import InfoExtractor
+
+
+class TeacherTubeIE(InfoExtractor):
+ IE_NAME = 'teachertube'
+ IE_DESC = 'teachertube.com videos'
+
+ _VALID_URL = r'https?://(?:www\.)?teachertube\.com/viewVideo\.php\?video_id=(?P<id>\d+)'
+
+ _TESTS = [{
+ 'url': 'http://www.teachertube.com/viewVideo.php?video_id=339997',
+ 'md5': 'f9434ef992fd65936d72999951ee254c',
+ 'info_dict': {
+ 'id': '339997',
+ 'ext': 'mp4',
+ 'title': 'Measures of dispersion from a frequency table_x264',
+ 'description': 'md5:a3e9853487185e9fcd7181a07164650b',
+ 'thumbnail': 're:http://.*\.jpg',
+ },
+ }, {
+ 'url': 'http://www.teachertube.com/viewVideo.php?video_id=340064',
+ 'md5': '0d625ec6bc9bf50f70170942ad580676',
+ 'info_dict': {
+ 'id': '340064',
+ 'ext': 'mp4',
+ 'title': 'How to Make Paper Dolls _ Paper Art Projects',
+ 'description': 'md5:2ca52b20cd727773d1dc418b3d6bd07b',
+ 'thumbnail': 're:http://.*\.jpg',
+ },
+ }]
+
+ def _real_extract(self, url):
+ mobj = re.match(self._VALID_URL, url)
+ video_id = mobj.group('id')
+
+ webpage = self._download_webpage(url, video_id)
+
+ url = self._html_search_meta('twitter:player:stream', webpage, 'twitter player')
+
+ formats = [{
+ 'format_id': 'flv',
+ 'url': url.replace('mp4v', 'flv').replace('.mp4', '.flv'),
+ 'quality': 0,
+ 'ext': 'flv',
+ }, {
+ 'format_id': 'mp4',
+ 'url': url,
+ 'quality': 1,
+ 'ext': 'mp4',
+ }]
+
+ self._sort_formats(formats)
+
+ return {
+ 'id': video_id,
+ 'title': self._og_search_title(webpage),
+ 'thumbnail': self._og_search_thumbnail(webpage),
+ 'formats': formats,
+ 'description': self._og_search_description(webpage),
+ }
+
+
+class TeacherTubeClassroomIE(InfoExtractor):
+ IE_NAME = 'teachertube:classroom'
+ IE_DESC = 'teachertube.com online classrooms'
+
+ _VALID_URL = r'https?://(?:www\.)?teachertube\.com/view_classroom\.php\?user=(?P<user>[0-9a-zA-Z]+)'
+
+ def _real_extract(self, url):
+ mobj = re.match(self._VALID_URL, url)
+ user_id = mobj.group('user')
+
+ rss = self._download_xml('http://www.teachertube.com/rssclassroom.php?mode=user&username=%s' % user_id,
+ user_id, 'Downloading classroom RSS')
+
+ entries = []
+ for url in rss.findall('.//{http://search.yahoo.com/mrss/}player'):
+ entries.append(self.url_result(url.attrib['url'], 'TeacherTube'))
+
+ return self.playlist_result(entries, user_id)
diff --git a/youtube_dl/extractor/teachingchannel.py b/youtube_dl/extractor/teachingchannel.py
new file mode 100644
index 000000000..117afa9bf
--- /dev/null
+++ b/youtube_dl/extractor/teachingchannel.py
@@ -0,0 +1,33 @@
+from __future__ import unicode_literals
+
+import re
+
+from .common import InfoExtractor
+from .ooyala import OoyalaIE
+
+
+class TeachingChannelIE(InfoExtractor):
+ _VALID_URL = r'https?://www\.teachingchannel\.org/videos/(?P<title>.+)'
+
+ _TEST = {
+ 'url': 'https://www.teachingchannel.org/videos/teacher-teaming-evolution',
+ 'info_dict': {
+ 'id': 'F3bnlzbToeI6pLEfRyrlfooIILUjz4nM',
+ 'ext': 'mp4',
+ 'title': 'A History of Teaming',
+ 'description': 'md5:2a9033db8da81f2edffa4c99888140b3',
+ },
+ 'params': {
+ # m3u8 download
+ 'skip_download': True,
+ },
+ }
+
+ def _real_extract(self, url):
+ mobj = re.match(self._VALID_URL, url)
+ title = mobj.group('title')
+ webpage = self._download_webpage(url, title)
+ ooyala_code = self._search_regex(
+ r'data-embed-code=\'(.+?)\'', webpage, 'ooyala code')
+
+ return OoyalaIE._build_url_result(ooyala_code)
diff --git a/youtube_dl/extractor/theplatform.py b/youtube_dl/extractor/theplatform.py
index f15780ef5..b6b2dba9c 100644
--- a/youtube_dl/extractor/theplatform.py
+++ b/youtube_dl/extractor/theplatform.py
@@ -1,3 +1,5 @@
+from __future__ import unicode_literals
+
import re
import json
@@ -18,17 +20,17 @@ class ThePlatformIE(InfoExtractor):
_TEST = {
# from http://www.metacafe.com/watch/cb-e9I_cZgTgIPd/blackberrys_big_bold_z30/
- u'url': u'http://link.theplatform.com/s/dJ5BDC/e9I_cZgTgIPd/meta.smil?format=smil&Tracking=true&mbr=true',
- u'info_dict': {
- u'id': u'e9I_cZgTgIPd',
- u'ext': u'flv',
- u'title': u'Blackberry\'s big, bold Z30',
- u'description': u'The Z30 is Blackberry\'s biggest, baddest mobile messaging device yet.',
- u'duration': 247,
+ 'url': 'http://link.theplatform.com/s/dJ5BDC/e9I_cZgTgIPd/meta.smil?format=smil&Tracking=true&mbr=true',
+ 'info_dict': {
+ 'id': 'e9I_cZgTgIPd',
+ 'ext': 'flv',
+ 'title': 'Blackberry\'s big, bold Z30',
+ 'description': 'The Z30 is Blackberry\'s biggest, baddest mobile messaging device yet.',
+ 'duration': 247,
},
- u'params': {
+ 'params': {
# rtmp download
- u'skip_download': True,
+ 'skip_download': True,
},
}
@@ -39,7 +41,7 @@ class ThePlatformIE(InfoExtractor):
error_msg = next(
n.attrib['abstract']
for n in meta.findall(_x('.//smil:ref'))
- if n.attrib.get('title') == u'Geographic Restriction')
+ if n.attrib.get('title') == 'Geographic Restriction')
except StopIteration:
pass
else:
@@ -101,8 +103,7 @@ class ThePlatformIE(InfoExtractor):
config_url = url+ '&form=json'
config_url = config_url.replace('swf/', 'config/')
config_url = config_url.replace('onsite/', 'onsite/config/')
- config_json = self._download_webpage(config_url, video_id, u'Downloading config')
- config = json.loads(config_json)
+ config = self._download_json(config_url, video_id, 'Downloading config')
smil_url = config['releaseUrl'] + '&format=SMIL&formats=MPEG4&manifest=f4m'
else:
smil_url = ('http://link.theplatform.com/s/dJ5BDC/{0}/meta.smil?'
diff --git a/youtube_dl/extractor/vevo.py b/youtube_dl/extractor/vevo.py
index ea34a8f16..eada13ce9 100644
--- a/youtube_dl/extractor/vevo.py
+++ b/youtube_dl/extractor/vevo.py
@@ -16,7 +16,7 @@ class VevoIE(InfoExtractor):
(currently used by MTVIE)
"""
_VALID_URL = r'''(?x)
- (?:https?://www\.vevo\.com/watch/(?:[^/]+/[^/]+/)?|
+ (?:https?://www\.vevo\.com/watch/(?:[^/]+/(?:[^/]+/)?)?|
https?://cache\.vevo\.com/m/html/embed\.html\?video=|
https?://videoplayer\.vevo\.com/embed/embedded\?videoId=|
vevo:)
diff --git a/youtube_dl/extractor/xvideos.py b/youtube_dl/extractor/xvideos.py
index 85e99e1b0..7e0044824 100644
--- a/youtube_dl/extractor/xvideos.py
+++ b/youtube_dl/extractor/xvideos.py
@@ -5,18 +5,21 @@ import re
from .common import InfoExtractor
from ..utils import (
compat_urllib_parse,
+ ExtractorError,
+ clean_html,
)
class XVideosIE(InfoExtractor):
_VALID_URL = r'^(?:https?://)?(?:www\.)?xvideos\.com/video([0-9]+)(?:.*)'
_TEST = {
- 'url': 'http://www.xvideos.com/video939581/funny_porns_by_s_-1',
- 'file': '939581.flv',
- 'md5': '1d0c835822f0a71a7bf011855db929d0',
+ 'url': 'http://www.xvideos.com/video4588838/biker_takes_his_girl',
+ 'md5': '4b46ae6ea5e6e9086e714d883313c0c9',
'info_dict': {
- "title": "Funny Porns By >>>>S<<<<<< -1",
- "age_limit": 18,
+ 'id': '4588838',
+ 'ext': 'flv',
+ 'title': 'Biker Takes his Girl',
+ 'age_limit': 18,
}
}
@@ -28,6 +31,10 @@ class XVideosIE(InfoExtractor):
self.report_extraction(video_id)
+ mobj = re.search(r'<h1 class="inlineError">(.+?)</h1>', webpage)
+ if mobj:
+ raise ExtractorError('%s said: %s' % (self.IE_NAME, clean_html(mobj.group(1))), expected=True)
+
# Extract video URL
video_url = compat_urllib_parse.unquote(
self._search_regex(r'flv_url=(.+?)&', webpage, 'video URL'))
diff --git a/youtube_dl/extractor/yahoo.py b/youtube_dl/extractor/yahoo.py
index 393f6ffbe..d84be2562 100644
--- a/youtube_dl/extractor/yahoo.py
+++ b/youtube_dl/extractor/yahoo.py
@@ -21,7 +21,7 @@ class YahooIE(InfoExtractor):
'url': 'http://screen.yahoo.com/julian-smith-travis-legg-watch-214727115.html',
'md5': '4962b075c08be8690a922ee026d05e69',
'info_dict': {
- 'id': '214727115',
+ 'id': '2d25e626-2378-391f-ada0-ddaf1417e588',
'ext': 'mp4',
'title': 'Julian Smith & Travis Legg Watch Julian Smith',
'description': 'Julian and Travis watch Julian Smith',
@@ -31,7 +31,7 @@ class YahooIE(InfoExtractor):
'url': 'http://screen.yahoo.com/wired/codefellas-s1-ep12-cougar-lies-103000935.html',
'md5': 'd6e6fc6e1313c608f316ddad7b82b306',
'info_dict': {
- 'id': '103000935',
+ 'id': 'd1dedf8c-d58c-38c3-8963-e899929ae0a9',
'ext': 'mp4',
'title': 'Codefellas - The Cougar Lies with Spanish Moss',
'description': 'Agent Topple\'s mustache does its dirty work, and Nicole brokers a deal for peace. But why is the NSA collecting millions of Instagram brunch photos? And if your waffles have nothing to hide, what are they so worried about?',
@@ -58,9 +58,11 @@ class YahooIE(InfoExtractor):
r'mediaItems: ({.*?})$', webpage, 'items', flags=re.MULTILINE,
default=None)
if items_json is None:
- long_id = self._search_regex(
+ CONTENT_ID_REGEXES = [
r'YUI\.namespace\("Media"\)\.CONTENT_ID\s*=\s*"([^"]+)"',
- webpage, 'content ID')
+ r'root\.App\.Cache\.context\.videoCache\.curVideo = \{"([^"]+)"'
+ ]
+ long_id = self._search_regex(CONTENT_ID_REGEXES, webpage, 'content ID')
video_id = long_id
else:
items = json.loads(items_json)
@@ -68,9 +70,9 @@ class YahooIE(InfoExtractor):
# The 'meta' field is not always in the video webpage, we request it
# from another page
long_id = info['id']
- return self._get_info(long_id, video_id)
+ return self._get_info(long_id, video_id, webpage)
- def _get_info(self, long_id, video_id):
+ def _get_info(self, long_id, video_id, webpage):
query = ('SELECT * FROM yahoo.media.video.streams WHERE id="%s"'
' AND plrs="86Gj0vCaSzV_Iuf6hNylf2" AND region="US"'
' AND protocol="http"' % long_id)
@@ -113,7 +115,7 @@ class YahooIE(InfoExtractor):
'title': meta['title'],
'formats': formats,
'description': clean_html(meta['description']),
- 'thumbnail': meta['thumbnail'],
+ 'thumbnail': meta['thumbnail'] if meta.get('thumbnail') else self._og_search_thumbnail(webpage),
}
@@ -137,7 +139,7 @@ class YahooNewsIE(YahooIE):
video_id = mobj.group('id')
webpage = self._download_webpage(url, video_id)
long_id = self._search_regex(r'contentId: \'(.+?)\',', webpage, 'long id')
- return self._get_info(long_id, video_id)
+ return self._get_info(long_id, video_id, webpage)
class YahooSearchIE(SearchInfoExtractor):
diff --git a/youtube_dl/extractor/youtube.py b/youtube_dl/extractor/youtube.py
index 981ca62c0..7c50881c4 100644
--- a/youtube_dl/extractor/youtube.py
+++ b/youtube_dl/extractor/youtube.py
@@ -223,6 +223,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor, SubtitlesInfoExtractor):
'246': {'ext': 'webm', 'height': 480, 'format_note': 'DASH video', 'acodec': 'none', 'preference': -40},
'247': {'ext': 'webm', 'height': 720, 'format_note': 'DASH video', 'acodec': 'none', 'preference': -40},
'248': {'ext': 'webm', 'height': 1080, 'format_note': 'DASH video', 'acodec': 'none', 'preference': -40},
+ '271': {'ext': 'webm', 'height': 1440, 'format_note': 'DASH video', 'acodec': 'none', 'preference': -40},
# Dash webm audio
'171': {'ext': 'webm', 'vcodec': 'none', 'format_note': 'DASH audio', 'abr': 48, 'preference': -50},
@@ -1140,7 +1141,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor, SubtitlesInfoExtractor):
mobj = re.search(r'(?s)id="eow-date.*?>(.*?)</span>', video_webpage)
if mobj is None:
mobj = re.search(
- r'(?s)id="watch-uploader-info".*?>.*?(?:Published|Uploaded) on (.*?)</strong>',
+ r'(?s)id="watch-uploader-info".*?>.*?(?:Published|Uploaded|Streamed live) on (.*?)</strong>',
video_webpage)
if mobj is not None:
upload_date = ' '.join(re.sub(r'[/,-]', r' ', mobj.group(1)).split())
@@ -1414,11 +1415,9 @@ class YoutubePlaylistIE(YoutubeBaseInfoExtractor):
title_span = (search_title('playlist-title') or
search_title('title long-title') or search_title('title'))
title = clean_html(title_span)
- video_re = r'''(?x)data-video-username="(.*?)".*?
+ video_re = r'''(?x)data-video-username=".*?".*?
href="/watch\?v=([0-9A-Za-z_-]{11})&amp;[^"]*?list=%s''' % re.escape(playlist_id)
- matches = orderedSet(re.findall(video_re, webpage, flags=re.DOTALL))
- # Some of the videos may have been deleted, their username field is empty
- ids = [video_id for (username, video_id) in matches if username]
+ ids = orderedSet(re.findall(video_re, webpage, flags=re.DOTALL))
url_results = self._ids_to_results(ids)
return self.playlist_result(url_results, playlist_id, title)