aboutsummaryrefslogtreecommitdiff
path: root/youtube_dl/extractor
diff options
context:
space:
mode:
Diffstat (limited to 'youtube_dl/extractor')
-rw-r--r--youtube_dl/extractor/__init__.py2
-rw-r--r--youtube_dl/extractor/naver.py34
-rw-r--r--youtube_dl/extractor/tagesschau.py79
-rw-r--r--youtube_dl/extractor/teachingchannel.py33
-rw-r--r--youtube_dl/extractor/xvideos.py17
-rw-r--r--youtube_dl/extractor/youtube.py7
6 files changed, 150 insertions, 22 deletions
diff --git a/youtube_dl/extractor/__init__.py b/youtube_dl/extractor/__init__.py
index 2ad1db555..a19e85543 100644
--- a/youtube_dl/extractor/__init__.py
+++ b/youtube_dl/extractor/__init__.py
@@ -266,10 +266,12 @@ from .streamcz import StreamCZIE
from .swrmediathek import SWRMediathekIE
from .syfy import SyfyIE
from .sztvhu import SztvHuIE
+from .tagesschau import TagesschauIE
from .teachertube import (
TeacherTubeIE,
TeacherTubeClassroomIE,
)
+from .teachingchannel import TeachingChannelIE
from .teamcoco import TeamcocoIE
from .techtalks import TechTalksIE
from .ted import TEDIE
diff --git a/youtube_dl/extractor/naver.py b/youtube_dl/extractor/naver.py
index 4cab30631..c0231c197 100644
--- a/youtube_dl/extractor/naver.py
+++ b/youtube_dl/extractor/naver.py
@@ -1,4 +1,6 @@
# encoding: utf-8
+from __future__ import unicode_literals
+
import re
from .common import InfoExtractor
@@ -12,12 +14,13 @@ class NaverIE(InfoExtractor):
_VALID_URL = r'https?://(?:m\.)?tvcast\.naver\.com/v/(?P<id>\d+)'
_TEST = {
- u'url': u'http://tvcast.naver.com/v/81652',
- u'file': u'81652.mp4',
- u'info_dict': {
- u'title': u'[9월 모의고사 해설강의][수학_김상희] 수학 A형 16~20번',
- u'description': u'합격불변의 법칙 메가스터디 | 메가스터디 수학 김상희 선생님이 9월 모의고사 수학A형 16번에서 20번까지 해설강의를 공개합니다.',
- u'upload_date': u'20130903',
+ 'url': 'http://tvcast.naver.com/v/81652',
+ 'info_dict': {
+ 'id': '81652',
+ 'ext': 'mp4',
+ 'title': '[9월 모의고사 해설강의][수학_김상희] 수학 A형 16~20번',
+ 'description': '합격불변의 법칙 메가스터디 | 메가스터디 수학 김상희 선생님이 9월 모의고사 수학A형 16번에서 20번까지 해설강의를 공개합니다.',
+ 'upload_date': '20130903',
},
}
@@ -28,7 +31,7 @@ class NaverIE(InfoExtractor):
m_id = re.search(r'var rmcPlayer = new nhn.rmcnmv.RMCVideoPlayer\("(.+?)", "(.+?)"',
webpage)
if m_id is None:
- raise ExtractorError(u'couldn\'t extract vid and key')
+ raise ExtractorError('couldn\'t extract vid and key')
vid = m_id.group(1)
key = m_id.group(2)
query = compat_urllib_parse.urlencode({'vid': vid, 'inKey': key,})
@@ -39,22 +42,27 @@ class NaverIE(InfoExtractor):
})
info = self._download_xml(
'http://serviceapi.rmcnmv.naver.com/flash/videoInfo.nhn?' + query,
- video_id, u'Downloading video info')
+ video_id, 'Downloading video info')
urls = self._download_xml(
'http://serviceapi.rmcnmv.naver.com/flash/playableEncodingOption.nhn?' + query_urls,
- video_id, u'Downloading video formats info')
+ video_id, 'Downloading video formats info')
formats = []
for format_el in urls.findall('EncodingOptions/EncodingOption'):
domain = format_el.find('Domain').text
- if domain.startswith('rtmp'):
- continue
- formats.append({
+ f = {
'url': domain + format_el.find('uri').text,
'ext': 'mp4',
'width': int(format_el.find('width').text),
'height': int(format_el.find('height').text),
- })
+ }
+ if domain.startswith('rtmp'):
+ f.update({
+ 'ext': 'flv',
+ 'rtmp_protocol': '1', # rtmpt
+ })
+ formats.append(f)
+ self._sort_formats(formats)
return {
'id': video_id,
diff --git a/youtube_dl/extractor/tagesschau.py b/youtube_dl/extractor/tagesschau.py
new file mode 100644
index 000000000..36331529e
--- /dev/null
+++ b/youtube_dl/extractor/tagesschau.py
@@ -0,0 +1,79 @@
+# -*- coding: utf-8 -*-
+from __future__ import unicode_literals
+
+import re
+
+from .common import InfoExtractor
+
+
+class TagesschauIE(InfoExtractor):
+ _VALID_URL = r'https?://(?:www\.)?tagesschau\.de/multimedia/video/video(?P<id>-?[0-9]+)\.html'
+
+ _TESTS = [{
+ 'url': 'http://www.tagesschau.de/multimedia/video/video1399128.html',
+ 'md5': 'bcdeac2194fb296d599ce7929dfa4009',
+ 'info_dict': {
+ 'id': '1399128',
+ 'ext': 'mp4',
+ 'title': 'Harald Range, Generalbundesanwalt, zu den Ermittlungen',
+ 'description': 'md5:69da3c61275b426426d711bde96463ab',
+ 'thumbnail': 're:^http:.*\.jpg$',
+ },
+ }, {
+ 'url': 'http://www.tagesschau.de/multimedia/video/video-196.html',
+ 'md5': '8aaa8bf3ae1ca2652309718c03019128',
+ 'info_dict': {
+ 'id': '196',
+ 'ext': 'mp4',
+ 'title': 'Ukraine-Konflikt: Klitschko in Kiew als Bürgermeister vereidigt',
+ 'description': 'md5:f22e4af75821d174fa6c977349682691',
+ 'thumbnail': 're:http://.*\.jpg',
+ },
+ }]
+
+ _FORMATS = {
+ 's': {'width': 256, 'height': 144, 'quality': 1},
+ 'm': {'width': 512, 'height': 288, 'quality': 2},
+ 'l': {'width': 960, 'height': 544, 'quality': 3},
+ }
+
+ def _real_extract(self, url):
+ mobj = re.match(self._VALID_URL, url)
+ video_id = mobj.group('id')
+
+ if video_id.startswith('-'):
+ display_id = video_id.strip('-')
+ else:
+ display_id = video_id
+
+ webpage = self._download_webpage(url, display_id)
+
+ playerpage = self._download_webpage(
+ 'http://www.tagesschau.de/multimedia/video/video%s~player_autoplay-true.html' % video_id,
+ display_id, 'Downloading player page')
+
+ medias = re.findall(
+ r'"(http://media.+?)", type:"video/(.+?)", quality:"(.+?)"',
+ playerpage)
+
+ formats = []
+ for url, ext, res in medias:
+ f = {
+ 'format_id': res + '_' + ext,
+ 'url': url,
+ 'ext': ext,
+ }
+ f.update(self._FORMATS.get(res, {}))
+ formats.append(f)
+
+ self._sort_formats(formats)
+
+ thumbnail = re.findall(r'"(/multimedia/.+?\.jpg)"', playerpage)[-1]
+
+ return {
+ 'id': display_id,
+ 'title': self._og_search_title(webpage).strip(),
+ 'thumbnail': 'http://www.tagesschau.de' + thumbnail,
+ 'formats': formats,
+ 'description': self._og_search_description(webpage).strip(),
+ }
diff --git a/youtube_dl/extractor/teachingchannel.py b/youtube_dl/extractor/teachingchannel.py
new file mode 100644
index 000000000..117afa9bf
--- /dev/null
+++ b/youtube_dl/extractor/teachingchannel.py
@@ -0,0 +1,33 @@
+from __future__ import unicode_literals
+
+import re
+
+from .common import InfoExtractor
+from .ooyala import OoyalaIE
+
+
+class TeachingChannelIE(InfoExtractor):
+ _VALID_URL = r'https?://www\.teachingchannel\.org/videos/(?P<title>.+)'
+
+ _TEST = {
+ 'url': 'https://www.teachingchannel.org/videos/teacher-teaming-evolution',
+ 'info_dict': {
+ 'id': 'F3bnlzbToeI6pLEfRyrlfooIILUjz4nM',
+ 'ext': 'mp4',
+ 'title': 'A History of Teaming',
+ 'description': 'md5:2a9033db8da81f2edffa4c99888140b3',
+ },
+ 'params': {
+ # m3u8 download
+ 'skip_download': True,
+ },
+ }
+
+ def _real_extract(self, url):
+ mobj = re.match(self._VALID_URL, url)
+ title = mobj.group('title')
+ webpage = self._download_webpage(url, title)
+ ooyala_code = self._search_regex(
+ r'data-embed-code=\'(.+?)\'', webpage, 'ooyala code')
+
+ return OoyalaIE._build_url_result(ooyala_code)
diff --git a/youtube_dl/extractor/xvideos.py b/youtube_dl/extractor/xvideos.py
index 85e99e1b0..7e0044824 100644
--- a/youtube_dl/extractor/xvideos.py
+++ b/youtube_dl/extractor/xvideos.py
@@ -5,18 +5,21 @@ import re
from .common import InfoExtractor
from ..utils import (
compat_urllib_parse,
+ ExtractorError,
+ clean_html,
)
class XVideosIE(InfoExtractor):
_VALID_URL = r'^(?:https?://)?(?:www\.)?xvideos\.com/video([0-9]+)(?:.*)'
_TEST = {
- 'url': 'http://www.xvideos.com/video939581/funny_porns_by_s_-1',
- 'file': '939581.flv',
- 'md5': '1d0c835822f0a71a7bf011855db929d0',
+ 'url': 'http://www.xvideos.com/video4588838/biker_takes_his_girl',
+ 'md5': '4b46ae6ea5e6e9086e714d883313c0c9',
'info_dict': {
- "title": "Funny Porns By >>>>S<<<<<< -1",
- "age_limit": 18,
+ 'id': '4588838',
+ 'ext': 'flv',
+ 'title': 'Biker Takes his Girl',
+ 'age_limit': 18,
}
}
@@ -28,6 +31,10 @@ class XVideosIE(InfoExtractor):
self.report_extraction(video_id)
+ mobj = re.search(r'<h1 class="inlineError">(.+?)</h1>', webpage)
+ if mobj:
+ raise ExtractorError('%s said: %s' % (self.IE_NAME, clean_html(mobj.group(1))), expected=True)
+
# Extract video URL
video_url = compat_urllib_parse.unquote(
self._search_regex(r'flv_url=(.+?)&', webpage, 'video URL'))
diff --git a/youtube_dl/extractor/youtube.py b/youtube_dl/extractor/youtube.py
index 8327fb146..7c50881c4 100644
--- a/youtube_dl/extractor/youtube.py
+++ b/youtube_dl/extractor/youtube.py
@@ -223,6 +223,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor, SubtitlesInfoExtractor):
'246': {'ext': 'webm', 'height': 480, 'format_note': 'DASH video', 'acodec': 'none', 'preference': -40},
'247': {'ext': 'webm', 'height': 720, 'format_note': 'DASH video', 'acodec': 'none', 'preference': -40},
'248': {'ext': 'webm', 'height': 1080, 'format_note': 'DASH video', 'acodec': 'none', 'preference': -40},
+ '271': {'ext': 'webm', 'height': 1440, 'format_note': 'DASH video', 'acodec': 'none', 'preference': -40},
# Dash webm audio
'171': {'ext': 'webm', 'vcodec': 'none', 'format_note': 'DASH audio', 'abr': 48, 'preference': -50},
@@ -1414,11 +1415,9 @@ class YoutubePlaylistIE(YoutubeBaseInfoExtractor):
title_span = (search_title('playlist-title') or
search_title('title long-title') or search_title('title'))
title = clean_html(title_span)
- video_re = r'''(?x)data-video-username="(.*?)".*?
+ video_re = r'''(?x)data-video-username=".*?".*?
href="/watch\?v=([0-9A-Za-z_-]{11})&amp;[^"]*?list=%s''' % re.escape(playlist_id)
- matches = orderedSet(re.findall(video_re, webpage, flags=re.DOTALL))
- # Some of the videos may have been deleted, their username field is empty
- ids = [video_id for (username, video_id) in matches if username]
+ ids = orderedSet(re.findall(video_re, webpage, flags=re.DOTALL))
url_results = self._ids_to_results(ids)
return self.playlist_result(url_results, playlist_id, title)