aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--AUTHORS1
-rw-r--r--test/helper.py2
-rw-r--r--test/test_download.py4
-rw-r--r--youtube_dl/extractor/__init__.py3
-rw-r--r--youtube_dl/extractor/alphaporno.py77
-rw-r--r--youtube_dl/extractor/archiveorg.py52
-rw-r--r--youtube_dl/extractor/arte.py4
-rw-r--r--youtube_dl/extractor/bbccouk.py95
-rw-r--r--youtube_dl/extractor/eroprofile.py45
-rw-r--r--youtube_dl/extractor/gameone.py60
-rw-r--r--youtube_dl/extractor/smotri.py4
-rw-r--r--youtube_dl/extractor/sohu.py96
-rw-r--r--youtube_dl/extractor/sportdeutschland.py3
-rw-r--r--youtube_dl/extractor/sunporno.py18
-rw-r--r--youtube_dl/extractor/teletask.py53
15 files changed, 391 insertions, 126 deletions
diff --git a/AUTHORS b/AUTHORS
index 6ea958fce..bb4d8b4d1 100644
--- a/AUTHORS
+++ b/AUTHORS
@@ -95,3 +95,4 @@ Adrian Kretz
Mathias Rav
Petr Kutalek
Will Glynn
+Max Reimann
diff --git a/test/helper.py b/test/helper.py
index 8a820526a..96d58b7c1 100644
--- a/test/helper.py
+++ b/test/helper.py
@@ -99,7 +99,7 @@ def gettestcases(include_onlymatching=False):
md5 = lambda s: hashlib.md5(s.encode('utf-8')).hexdigest()
-def expect_info_dict(self, expected_dict, got_dict):
+def expect_info_dict(self, got_dict, expected_dict):
for info_field, expected in expected_dict.items():
if isinstance(expected, compat_str) and expected.startswith('re:'):
got = got_dict.get(info_field)
diff --git a/test/test_download.py b/test/test_download.py
index a009aa475..412f3dbce 100644
--- a/test/test_download.py
+++ b/test/test_download.py
@@ -155,7 +155,7 @@ def generator(test_case):
if is_playlist:
self.assertEqual(res_dict['_type'], 'playlist')
self.assertTrue('entries' in res_dict)
- expect_info_dict(self, test_case.get('info_dict', {}), res_dict)
+ expect_info_dict(self, res_dict, test_case.get('info_dict', {}))
if 'playlist_mincount' in test_case:
assertGreaterEqual(
@@ -204,7 +204,7 @@ def generator(test_case):
with io.open(info_json_fn, encoding='utf-8') as infof:
info_dict = json.load(infof)
- expect_info_dict(self, tc.get('info_dict', {}), info_dict)
+ expect_info_dict(self, info_dict, tc.get('info_dict', {}))
finally:
try_rm_tcs_files()
if is_playlist and res_dict is not None and res_dict.get('entries'):
diff --git a/youtube_dl/extractor/__init__.py b/youtube_dl/extractor/__init__.py
index ba12e3263..ab0f76862 100644
--- a/youtube_dl/extractor/__init__.py
+++ b/youtube_dl/extractor/__init__.py
@@ -7,6 +7,7 @@ from .adobetv import AdobeTVIE
from .adultswim import AdultSwimIE
from .aftonbladet import AftonbladetIE
from .aljazeera import AlJazeeraIE
+from .alphaporno import AlphaPornoIE
from .anitube import AnitubeIE
from .anysex import AnySexIE
from .aol import AolIE
@@ -109,6 +110,7 @@ from .elpais import ElPaisIE
from .empflix import EMPFlixIE
from .engadget import EngadgetIE
from .eporner import EpornerIE
+from .eroprofile import EroProfileIE
from .escapist import EscapistIE
from .everyonesmixtape import EveryonesMixtapeIE
from .exfm import ExfmIE
@@ -407,6 +409,7 @@ from .ted import TEDIE
from .telebruxelles import TeleBruxellesIE
from .telecinco import TelecincoIE
from .telemb import TeleMBIE
+from .teletask import TeleTaskIE
from .tenplay import TenPlayIE
from .testurl import TestURLIE
from .tf1 import TF1IE
diff --git a/youtube_dl/extractor/alphaporno.py b/youtube_dl/extractor/alphaporno.py
new file mode 100644
index 000000000..c34719d1f
--- /dev/null
+++ b/youtube_dl/extractor/alphaporno.py
@@ -0,0 +1,77 @@
+from __future__ import unicode_literals
+
+from .common import InfoExtractor
+from ..utils import (
+ parse_iso8601,
+ parse_duration,
+ parse_filesize,
+ int_or_none,
+)
+
+
+class AlphaPornoIE(InfoExtractor):
+ _VALID_URL = r'https?://(?:www\.)?alphaporno\.com/videos/(?P<id>[^/]+)'
+ _TEST = {
+ 'url': 'http://www.alphaporno.com/videos/sensual-striptease-porn-with-samantha-alexandra/',
+ 'md5': 'feb6d3bba8848cd54467a87ad34bd38e',
+ 'info_dict': {
+ 'id': '258807',
+ 'display_id': 'sensual-striptease-porn-with-samantha-alexandra',
+ 'ext': 'mp4',
+ 'title': 'Sensual striptease porn with Samantha Alexandra',
+ 'thumbnail': 're:https?://.*\.jpg$',
+ 'timestamp': 1418694611,
+ 'upload_date': '20141216',
+ 'duration': 387,
+ 'filesize_approx': 54120000,
+ 'tbr': 1145,
+ 'categories': list,
+ 'age_limit': 18,
+ }
+ }
+
+ def _real_extract(self, url):
+ display_id = self._match_id(url)
+
+ webpage = self._download_webpage(url, display_id)
+
+ video_id = self._search_regex(
+ r"video_id\s*:\s*'([^']+)'", webpage, 'video id', default=None)
+
+ video_url = self._search_regex(
+ r"video_url\s*:\s*'([^']+)'", webpage, 'video url')
+ ext = self._html_search_meta(
+ 'encodingFormat', webpage, 'ext', default='.mp4')[1:]
+
+ title = self._search_regex(
+ [r'<meta content="([^"]+)" itemprop="description">',
+ r'class="title" itemprop="name">([^<]+)<'],
+ webpage, 'title')
+ thumbnail = self._html_search_meta('thumbnail', webpage, 'thumbnail')
+ timestamp = parse_iso8601(self._html_search_meta(
+ 'uploadDate', webpage, 'upload date'))
+ duration = parse_duration(self._html_search_meta(
+ 'duration', webpage, 'duration'))
+ filesize_approx = parse_filesize(self._html_search_meta(
+ 'contentSize', webpage, 'file size'))
+ bitrate = int_or_none(self._html_search_meta(
+ 'bitrate', webpage, 'bitrate'))
+ categories = self._html_search_meta(
+ 'keywords', webpage, 'categories', default='').split(',')
+
+ age_limit = self._rta_search(webpage)
+
+ return {
+ 'id': video_id,
+ 'display_id': display_id,
+ 'url': video_url,
+ 'ext': ext,
+ 'title': title,
+ 'thumbnail': thumbnail,
+ 'timestamp': timestamp,
+ 'duration': duration,
+ 'filesize_approx': filesize_approx,
+ 'tbr': bitrate,
+ 'categories': categories,
+ 'age_limit': age_limit,
+ }
diff --git a/youtube_dl/extractor/archiveorg.py b/youtube_dl/extractor/archiveorg.py
index 34ce8429b..9fc35a42b 100644
--- a/youtube_dl/extractor/archiveorg.py
+++ b/youtube_dl/extractor/archiveorg.py
@@ -1,42 +1,48 @@
from __future__ import unicode_literals
-import json
-import re
-
from .common import InfoExtractor
-from ..utils import (
- unified_strdate,
-)
+from ..utils import unified_strdate
class ArchiveOrgIE(InfoExtractor):
IE_NAME = 'archive.org'
IE_DESC = 'archive.org videos'
- _VALID_URL = r'(?:https?://)?(?:www\.)?archive\.org/details/(?P<id>[^?/]+)(?:[?].*)?$'
- _TEST = {
- "url": "http://archive.org/details/XD300-23_68HighlightsAResearchCntAugHumanIntellect",
- 'file': 'XD300-23_68HighlightsAResearchCntAugHumanIntellect.ogv',
+ _VALID_URL = r'https?://(?:www\.)?archive\.org/details/(?P<id>[^?/]+)(?:[?].*)?$'
+ _TESTS = [{
+ 'url': 'http://archive.org/details/XD300-23_68HighlightsAResearchCntAugHumanIntellect',
'md5': '8af1d4cf447933ed3c7f4871162602db',
'info_dict': {
- "title": "1968 Demo - FJCC Conference Presentation Reel #1",
- "description": "Reel 1 of 3: Also known as the \"Mother of All Demos\", Doug Engelbart's presentation at the Fall Joint Computer Conference in San Francisco, December 9, 1968 titled \"A Research Center for Augmenting Human Intellect.\" For this presentation, Doug and his team astonished the audience by not only relating their research, but demonstrating it live. This was the debut of the mouse, interactive computing, hypermedia, computer supported software engineering, video teleconferencing, etc. See also <a href=\"http://dougengelbart.org/firsts/dougs-1968-demo.html\" rel=\"nofollow\">Doug's 1968 Demo page</a> for more background, highlights, links, and the detailed paper published in this conference proceedings. Filmed on 3 reels: Reel 1 | <a href=\"http://www.archive.org/details/XD300-24_68HighlightsAResearchCntAugHumanIntellect\" rel=\"nofollow\">Reel 2</a> | <a href=\"http://www.archive.org/details/XD300-25_68HighlightsAResearchCntAugHumanIntellect\" rel=\"nofollow\">Reel 3</a>",
- "upload_date": "19681210",
- "uploader": "SRI International"
+ 'id': 'XD300-23_68HighlightsAResearchCntAugHumanIntellect',
+ 'ext': 'ogv',
+ 'title': '1968 Demo - FJCC Conference Presentation Reel #1',
+ 'description': 'md5:1780b464abaca9991d8968c877bb53ed',
+ 'upload_date': '19681210',
+ 'uploader': 'SRI International'
+ }
+ }, {
+ 'url': 'https://archive.org/details/Cops1922',
+ 'md5': '18f2a19e6d89af8425671da1cf3d4e04',
+ 'info_dict': {
+ 'id': 'Cops1922',
+ 'ext': 'ogv',
+ 'title': 'Buster Keaton\'s "Cops" (1922)',
+ 'description': 'md5:70f72ee70882f713d4578725461ffcc3',
}
- }
+ }]
def _real_extract(self, url):
- mobj = re.match(self._VALID_URL, url)
- video_id = mobj.group('id')
+ video_id = self._match_id(url)
json_url = url + ('?' if '?' in url else '&') + 'output=json'
- json_data = self._download_webpage(json_url, video_id)
- data = json.loads(json_data)
+ data = self._download_json(json_url, video_id)
+
+ def get_optional(data_dict, field):
+ return data_dict['metadata'].get(field, [None])[0]
- title = data['metadata']['title'][0]
- description = data['metadata']['description'][0]
- uploader = data['metadata']['creator'][0]
- upload_date = unified_strdate(data['metadata']['date'][0])
+ title = get_optional(data, 'title')
+ description = get_optional(data, 'description')
+ uploader = get_optional(data, 'creator')
+ upload_date = unified_strdate(get_optional(data, 'date'))
formats = [
{
diff --git a/youtube_dl/extractor/arte.py b/youtube_dl/extractor/arte.py
index 219631b9b..929dd3cc5 100644
--- a/youtube_dl/extractor/arte.py
+++ b/youtube_dl/extractor/arte.py
@@ -37,7 +37,7 @@ class ArteTvIE(InfoExtractor):
config_xml_url, video_id, note='Downloading configuration')
formats = [{
- 'forma_id': q.attrib['quality'],
+ 'format_id': q.attrib['quality'],
# The playpath starts at 'mp4:', if we don't manually
# split the url, rtmpdump will incorrectly parse them
'url': q.text.split('mp4:', 1)[0],
@@ -133,7 +133,7 @@ class ArteTVPlus7IE(InfoExtractor):
'width': int_or_none(f.get('width')),
'height': int_or_none(f.get('height')),
'tbr': int_or_none(f.get('bitrate')),
- 'quality': qfunc(f['quality']),
+ 'quality': qfunc(f.get('quality')),
'source_preference': source_pref,
}
diff --git a/youtube_dl/extractor/bbccouk.py b/youtube_dl/extractor/bbccouk.py
index 2d2f742ae..f690dc803 100644
--- a/youtube_dl/extractor/bbccouk.py
+++ b/youtube_dl/extractor/bbccouk.py
@@ -71,7 +71,20 @@ class BBCCoUkIE(SubtitlesInfoExtractor):
'skip_download': True,
},
'skip': 'Currently BBC iPlayer TV programmes are available to play in the UK only',
- },
+ }, {
+ 'url': 'http://www.bbc.co.uk/programmes/b04v20dw',
+ 'info_dict': {
+ 'id': 'b04v209v',
+ 'ext': 'flv',
+ 'title': 'Pete Tong, The Essential New Tune Special',
+ 'description': "Pete has a very special mix - all of 2014's Essential New Tunes!",
+ 'duration': 10800,
+ },
+ 'params': {
+ # rtmp download
+ 'skip_download': True,
+ }
+ }
]
def _extract_asx_playlist(self, connection, programme_id):
@@ -203,6 +216,59 @@ class BBCCoUkIE(SubtitlesInfoExtractor):
return formats, subtitles
+ def _download_playlist(self, playlist_id):
+ try:
+ playlist = self._download_json(
+ 'http://www.bbc.co.uk/programmes/%s/playlist.json' % playlist_id,
+ playlist_id, 'Downloading playlist JSON')
+
+ version = playlist.get('defaultAvailableVersion')
+ if version:
+ smp_config = version['smpConfig']
+ title = smp_config['title']
+ description = smp_config['summary']
+ for item in smp_config['items']:
+ kind = item['kind']
+ if kind != 'programme' and kind != 'radioProgramme':
+ continue
+ programme_id = item.get('vpid')
+ duration = int(item.get('duration'))
+ formats, subtitles = self._download_media_selector(programme_id)
+ return programme_id, title, description, duration, formats, subtitles
+ except ExtractorError as ee:
+ if not isinstance(ee.cause, compat_HTTPError) and ee.cause.code == 404:
+ raise
+
+ # fallback to legacy playlist
+ playlist = self._download_xml(
+ 'http://www.bbc.co.uk/iplayer/playlist/%s' % playlist_id,
+ playlist_id, 'Downloading legacy playlist XML')
+
+ no_items = playlist.find('./{http://bbc.co.uk/2008/emp/playlist}noItems')
+ if no_items is not None:
+ reason = no_items.get('reason')
+ if reason == 'preAvailability':
+ msg = 'Episode %s is not yet available' % playlist_id
+ elif reason == 'postAvailability':
+ msg = 'Episode %s is no longer available' % playlist_id
+ elif reason == 'noMedia':
+ msg = 'Episode %s is not currently available' % playlist_id
+ else:
+ msg = 'Episode %s is not available: %s' % (playlist_id, reason)
+ raise ExtractorError(msg, expected=True)
+
+ for item in self._extract_items(playlist):
+ kind = item.get('kind')
+ if kind != 'programme' and kind != 'radioProgramme':
+ continue
+ title = playlist.find('./{http://bbc.co.uk/2008/emp/playlist}title').text
+ description = playlist.find('./{http://bbc.co.uk/2008/emp/playlist}summary').text
+ programme_id = item.get('identifier')
+ duration = int(item.get('duration'))
+ formats, subtitles = self._download_media_selector(programme_id)
+
+ return programme_id, title, description, duration, formats, subtitles
+
def _real_extract(self, url):
group_id = self._match_id(url)
@@ -219,32 +285,7 @@ class BBCCoUkIE(SubtitlesInfoExtractor):
duration = player['duration']
formats, subtitles = self._download_media_selector(programme_id)
else:
- playlist = self._download_xml(
- 'http://www.bbc.co.uk/iplayer/playlist/%s' % group_id,
- group_id, 'Downloading playlist XML')
-
- no_items = playlist.find('./{http://bbc.co.uk/2008/emp/playlist}noItems')
- if no_items is not None:
- reason = no_items.get('reason')
- if reason == 'preAvailability':
- msg = 'Episode %s is not yet available' % group_id
- elif reason == 'postAvailability':
- msg = 'Episode %s is no longer available' % group_id
- elif reason == 'noMedia':
- msg = 'Episode %s is not currently available' % group_id
- else:
- msg = 'Episode %s is not available: %s' % (group_id, reason)
- raise ExtractorError(msg, expected=True)
-
- for item in self._extract_items(playlist):
- kind = item.get('kind')
- if kind != 'programme' and kind != 'radioProgramme':
- continue
- title = playlist.find('./{http://bbc.co.uk/2008/emp/playlist}title').text
- description = playlist.find('./{http://bbc.co.uk/2008/emp/playlist}summary').text
- programme_id = item.get('identifier')
- duration = int(item.get('duration'))
- formats, subtitles = self._download_media_selector(programme_id)
+ programme_id, title, description, duration, formats, subtitles = self._download_playlist(group_id)
if self._downloader.params.get('listsubtitles', False):
self._list_available_subtitles(programme_id, subtitles)
diff --git a/youtube_dl/extractor/eroprofile.py b/youtube_dl/extractor/eroprofile.py
new file mode 100644
index 000000000..79e2fbd39
--- /dev/null
+++ b/youtube_dl/extractor/eroprofile.py
@@ -0,0 +1,45 @@
+from __future__ import unicode_literals
+
+from .common import InfoExtractor
+
+
+class EroProfileIE(InfoExtractor):
+ _VALID_URL = r'https?://(?:www\.)?eroprofile\.com/m/videos/view/(?P<id>[^/]+)'
+ _TEST = {
+ 'url': 'http://www.eroprofile.com/m/videos/view/sexy-babe-softcore',
+ 'md5': 'c26f351332edf23e1ea28ce9ec9de32f',
+ 'info_dict': {
+ 'id': '3733775',
+ 'display_id': 'sexy-babe-softcore',
+ 'ext': 'm4v',
+ 'title': 'sexy babe softcore',
+ 'thumbnail': 're:https?://.*\.jpg',
+ 'age_limit': 18,
+ }
+ }
+
+ def _real_extract(self, url):
+ display_id = self._match_id(url)
+
+ webpage = self._download_webpage(url, display_id)
+
+ video_id = self._search_regex(
+ [r"glbUpdViews\s*\('\d*','(\d+)'", r'p/report/video/(\d+)'],
+ webpage, 'video id', default=None)
+
+ video_url = self._search_regex(
+ r'<source src="([^"]+)', webpage, 'video url')
+ title = self._html_search_regex(
+ r'Title:</th><td>([^<]+)</td>', webpage, 'title')
+ thumbnail = self._search_regex(
+ r'onclick="showVideoPlayer\(\)"><img src="([^"]+)',
+ webpage, 'thumbnail', fatal=False)
+
+ return {
+ 'id': video_id,
+ 'display_id': display_id,
+ 'url': video_url,
+ 'title': title,
+ 'thumbnail': thumbnail,
+ 'age_limit': 18,
+ }
diff --git a/youtube_dl/extractor/gameone.py b/youtube_dl/extractor/gameone.py
index 3022f539d..75f180928 100644
--- a/youtube_dl/extractor/gameone.py
+++ b/youtube_dl/extractor/gameone.py
@@ -6,7 +6,9 @@ import re
from .common import InfoExtractor
from ..utils import (
xpath_with_ns,
- parse_iso8601
+ parse_iso8601,
+ float_or_none,
+ int_or_none,
)
NAMESPACE_MAP = {
@@ -21,21 +23,38 @@ RAW_MP4_URL = 'http://cdn.riptide-mtvn.com/'
class GameOneIE(InfoExtractor):
_VALID_URL = r'https?://(?:www\.)?gameone\.de/tv/(?P<id>\d+)'
- _TEST = {
- 'url': 'http://www.gameone.de/tv/288',
- 'md5': '136656b7fb4c9cb4a8e2d500651c499b',
- 'info_dict': {
- 'id': '288',
- 'ext': 'mp4',
- 'title': 'Game One - Folge 288',
- 'duration': 1238,
- 'thumbnail': 'http://s3.gameone.de/gameone/assets/video_metas/teaser_images/000/643/636/big/640x360.jpg',
- 'description': 'FIFA-Pressepokal 2014, Star Citizen, Kingdom Come: Deliverance, Project Cars, Schöner Trants Nerdquiz Folge 2 Runde 1',
- 'age_limit': 16,
- 'upload_date': '20140513',
- 'timestamp': 1399980122,
+ _TESTS = [
+ {
+ 'url': 'http://www.gameone.de/tv/288',
+ 'md5': '136656b7fb4c9cb4a8e2d500651c499b',
+ 'info_dict': {
+ 'id': '288',
+ 'ext': 'mp4',
+ 'title': 'Game One - Folge 288',
+ 'duration': 1238,
+ 'thumbnail': 'http://s3.gameone.de/gameone/assets/video_metas/teaser_images/000/643/636/big/640x360.jpg',
+ 'description': 'FIFA-Pressepokal 2014, Star Citizen, Kingdom Come: Deliverance, Project Cars, Schöner Trants Nerdquiz Folge 2 Runde 1',
+ 'age_limit': 16,
+ 'upload_date': '20140513',
+ 'timestamp': 1399980122,
+ }
+ },
+ {
+ 'url': 'http://gameone.de/tv/220',
+ 'md5': '5227ca74c4ae6b5f74c0510a7c48839e',
+ 'info_dict': {
+ 'id': '220',
+ 'ext': 'mp4',
+ 'upload_date': '20120918',
+ 'description': 'Jet Set Radio HD, Tekken Tag Tournament 2, Source Filmmaker',
+ 'timestamp': 1347971451,
+ 'title': 'Game One - Folge 220',
+ 'duration': 896.62,
+ 'age_limit': 16,
+ }
}
- }
+
+ ]
def _real_extract(self, url):
mobj = re.match(self._VALID_URL, url)
@@ -66,13 +85,13 @@ class GameOneIE(InfoExtractor):
video_id,
'Downloading media:content')
rendition_items = content.findall('.//rendition')
- duration = int(rendition_items[0].get('duration'))
+ duration = float_or_none(rendition_items[0].get('duration'))
formats = [
{
'url': re.sub(r'.*/(r2)', RAW_MP4_URL + r'\1', r.find('./src').text),
- 'width': int(r.get('width')),
- 'height': int(r.get('height')),
- 'tbr': int(r.get('bitrate')),
+ 'width': int_or_none(r.get('width')),
+ 'height': int_or_none(r.get('height')),
+ 'tbr': int_or_none(r.get('bitrate')),
}
for r in rendition_items
]
@@ -105,7 +124,8 @@ class GameOnePlaylistIE(InfoExtractor):
webpage = self._download_webpage('http://www.gameone.de/tv', 'TV')
max_id = max(map(int, re.findall(r'<a href="/tv/(\d+)"', webpage)))
entries = [
- self.url_result('http://www.gameone.de/tv/%d' % video_id, 'GameOne')
+ self.url_result('http://www.gameone.de/tv/%d' %
+ video_id, 'GameOne')
for video_id in range(max_id, 0, -1)]
return {
diff --git a/youtube_dl/extractor/smotri.py b/youtube_dl/extractor/smotri.py
index d031fe401..baef3daa0 100644
--- a/youtube_dl/extractor/smotri.py
+++ b/youtube_dl/extractor/smotri.py
@@ -69,6 +69,7 @@ class SmotriIE(InfoExtractor):
'params': {
'videopassword': 'qwerty',
},
+ 'skip': 'Video is not approved by moderator',
},
# age limit + video-password
{
@@ -86,7 +87,8 @@ class SmotriIE(InfoExtractor):
},
'params': {
'videopassword': '333'
- }
+ },
+ 'skip': 'Video is not approved by moderator',
},
# swf player
{
diff --git a/youtube_dl/extractor/sohu.py b/youtube_dl/extractor/sohu.py
index 07f514a46..c04791997 100644
--- a/youtube_dl/extractor/sohu.py
+++ b/youtube_dl/extractor/sohu.py
@@ -1,11 +1,10 @@
# encoding: utf-8
from __future__ import unicode_literals
-import json
import re
from .common import InfoExtractor
-from ..utils import ExtractorError
+from .common import compat_str
class SohuIE(InfoExtractor):
@@ -29,60 +28,73 @@ class SohuIE(InfoExtractor):
base_data_url = 'http://my.tv.sohu.com/play/videonew.do?vid='
else:
base_data_url = 'http://hot.vrs.sohu.com/vrs_flash.action?vid='
- data_url = base_data_url + str(vid_id)
- data_json = self._download_webpage(
- data_url, video_id,
- note='Downloading JSON data for ' + str(vid_id))
- return json.loads(data_json)
+
+ return self._download_json(
+ base_data_url + vid_id, video_id,
+ 'Downloading JSON data for %s' % vid_id)
mobj = re.match(self._VALID_URL, url)
video_id = mobj.group('id')
mytv = mobj.group('mytv') is not None
webpage = self._download_webpage(url, video_id)
- raw_title = self._html_search_regex(r'(?s)<title>(.+?)</title>',
- webpage, 'video title')
+ raw_title = self._html_search_regex(
+ r'(?s)<title>(.+?)</title>',
+ webpage, 'video title')
title = raw_title.partition('-')[0].strip()
- vid = self._html_search_regex(r'var vid ?= ?["\'](\d+)["\']', webpage,
- 'video path')
- data = _fetch_data(vid, mytv)
-
- QUALITIES = ('ori', 'super', 'high', 'nor')
- vid_ids = [data['data'][q + 'Vid']
- for q in QUALITIES
- if data['data'][q + 'Vid'] != 0]
- if not vid_ids:
- raise ExtractorError('No formats available for this video')
+ vid = self._html_search_regex(
+ r'var vid ?= ?["\'](\d+)["\']',
+ webpage, 'video path')
+ vid_data = _fetch_data(vid, mytv)
- # For now, we just pick the highest available quality
- vid_id = vid_ids[-1]
+ formats_json = {}
+ for format_id in ('nor', 'high', 'super', 'ori', 'h2644k', 'h2654k'):
+ vid_id = vid_data['data'].get('%sVid' % format_id)
+ if not vid_id:
+ continue
+ vid_id = compat_str(vid_id)
+ formats_json[format_id] = vid_data if vid == vid_id else _fetch_data(vid_id, mytv)
- format_data = data if vid == vid_id else _fetch_data(vid_id, mytv)
- part_count = format_data['data']['totalBlocks']
- allot = format_data['allot']
- prot = format_data['prot']
- clipsURL = format_data['data']['clipsURL']
- su = format_data['data']['su']
+ part_count = vid_data['data']['totalBlocks']
playlist = []
for i in range(part_count):
- part_url = ('http://%s/?prot=%s&file=%s&new=%s' %
- (allot, prot, clipsURL[i], su[i]))
- part_str = self._download_webpage(
- part_url, video_id,
- note='Downloading part %d of %d' % (i + 1, part_count))
-
- part_info = part_str.split('|')
- video_url = '%s%s?key=%s' % (part_info[0], su[i], part_info[3])
-
- video_info = {
- 'id': '%s_part%02d' % (video_id, i + 1),
+ formats = []
+ for format_id, format_data in formats_json.items():
+ allot = format_data['allot']
+ prot = format_data['prot']
+
+ data = format_data['data']
+ clips_url = data['clipsURL']
+ su = data['su']
+
+ part_str = self._download_webpage(
+ 'http://%s/?prot=%s&file=%s&new=%s' %
+ (allot, prot, clips_url[i], su[i]),
+ video_id,
+ 'Downloading %s video URL part %d of %d'
+ % (format_id, i + 1, part_count))
+
+ part_info = part_str.split('|')
+ video_url = '%s%s?key=%s' % (part_info[0], su[i], part_info[3])
+
+ formats.append({
+ 'url': video_url,
+ 'format_id': format_id,
+ 'filesize': data['clipsBytes'][i],
+ 'width': data['width'],
+ 'height': data['height'],
+ 'fps': data['fps'],
+ })
+ self._sort_formats(formats)
+
+ playlist.append({
+ 'id': '%s_part%d' % (video_id, i + 1),
'title': title,
- 'url': video_url,
- 'ext': 'mp4',
- }
- playlist.append(video_info)
+ 'duration': vid_data['data']['clipsDuration'][i],
+ 'formats': formats,
+ })
if len(playlist) == 1:
info = playlist[0]
diff --git a/youtube_dl/extractor/sportdeutschland.py b/youtube_dl/extractor/sportdeutschland.py
index 2f57f5b7c..1a57aebf1 100644
--- a/youtube_dl/extractor/sportdeutschland.py
+++ b/youtube_dl/extractor/sportdeutschland.py
@@ -60,9 +60,10 @@ class SportDeutschlandIE(InfoExtractor):
categories = list(data.get('section', {}).get('tags', {}).values())
asset = data['asset']
+ assets_info = self._download_json(asset['url'], video_id)
formats = []
- smil_url = asset['video']
+ smil_url = assets_info['video']
if '.smil' in smil_url:
m3u8_url = smil_url.replace('.smil', '.m3u8')
formats.extend(
diff --git a/youtube_dl/extractor/sunporno.py b/youtube_dl/extractor/sunporno.py
index 263f09b46..8a333f1d2 100644
--- a/youtube_dl/extractor/sunporno.py
+++ b/youtube_dl/extractor/sunporno.py
@@ -28,23 +28,27 @@ class SunPornoIE(InfoExtractor):
}
def _real_extract(self, url):
- mobj = re.match(self._VALID_URL, url)
- video_id = mobj.group('id')
+ video_id = self._match_id(url)
webpage = self._download_webpage(url, video_id)
- title = self._html_search_regex(r'<title>([^<]+)</title>', webpage, 'title')
- description = self._html_search_meta('description', webpage, 'description')
+ title = self._html_search_regex(
+ r'<title>([^<]+)</title>', webpage, 'title')
+ description = self._html_search_meta(
+ 'description', webpage, 'description')
thumbnail = self._html_search_regex(
r'poster="([^"]+)"', webpage, 'thumbnail', fatal=False)
duration = parse_duration(self._search_regex(
- r'Duration:\s*(\d+:\d+)\s*<', webpage, 'duration', fatal=False))
+ r'itemprop="duration">\s*(\d+:\d+)\s*<',
+ webpage, 'duration', fatal=False))
view_count = int_or_none(self._html_search_regex(
- r'class="views">\s*(\d+)\s*<', webpage, 'view count', fatal=False))
+ r'class="views">\s*(\d+)\s*<',
+ webpage, 'view count', fatal=False))
comment_count = int_or_none(self._html_search_regex(
- r'(\d+)</b> Comments?', webpage, 'comment count', fatal=False))
+ r'(\d+)</b> Comments?',
+ webpage, 'comment count', fatal=False))
formats = []
quality = qualities(['mp4', 'flv'])
diff --git a/youtube_dl/extractor/teletask.py b/youtube_dl/extractor/teletask.py
new file mode 100644
index 000000000..e54145105
--- /dev/null
+++ b/youtube_dl/extractor/teletask.py
@@ -0,0 +1,53 @@
+from __future__ import unicode_literals
+
+import re
+
+from .common import InfoExtractor
+from ..utils import unified_strdate
+
+
+class TeleTaskIE(InfoExtractor):
+ _VALID_URL = r'https?://(?:www\.)?tele-task\.de/archive/video/html5/(?P<id>[0-9]+)'
+ _TEST = {
+ 'url': 'http://www.tele-task.de/archive/video/html5/26168/',
+ 'info_dict': {
+ 'title': 'Duplicate Detection',
+ },
+ 'playlist': [{
+ 'md5': '290ef69fb2792e481169c3958dbfbd57',
+ 'info_dict': {
+ 'id': '26168-speaker',
+ 'ext': 'mp4',
+ 'title': 'Duplicate Detection',
+ 'upload_date': '20141218',
+ }
+ }, {
+ 'md5': 'e1e7218c5f0e4790015a437fcf6c71b4',
+ 'info_dict': {
+ 'id': '26168-slides',
+ 'ext': 'mp4',
+ 'title': 'Duplicate Detection',
+ 'upload_date': '20141218',
+ }
+ }]
+ }
+
+ def _real_extract(self, url):
+ lecture_id = self._match_id(url)
+
+ webpage = self._download_webpage(url, lecture_id)
+
+ title = self._html_search_regex(
+ r'itemprop="name">([^<]+)</a>', webpage, 'title')
+ upload_date = unified_strdate(self._html_search_regex(
+ r'Date:</td><td>([^<]+)</td>', webpage, 'date', fatal=False))
+
+ entries = [{
+ 'id': '%s-%s' % (lecture_id, format_id),
+ 'url': video_url,
+ 'title': title,
+ 'upload_date': upload_date,
+ } for format_id, video_url in re.findall(
+ r'<video class="([^"]+)"[^>]*>\s*<source src="([^"]+)"', webpage)]
+
+ return self.playlist_result(entries, lecture_id, title)