diff options
-rw-r--r-- | youtube_dl/extractor/__init__.py | 1 | ||||
-rw-r--r-- | youtube_dl/extractor/empflix.py | 22 | ||||
-rw-r--r-- | youtube_dl/extractor/gamekings.py | 2 | ||||
-rw-r--r-- | youtube_dl/extractor/pornhub.py | 2 | ||||
-rw-r--r-- | youtube_dl/extractor/streamcz.py | 22 | ||||
-rw-r--r-- | youtube_dl/extractor/swrmediathek.py | 104 |
6 files changed, 139 insertions, 14 deletions
diff --git a/youtube_dl/extractor/__init__.py b/youtube_dl/extractor/__init__.py index 3503c76b7..4a4eb4b74 100644 --- a/youtube_dl/extractor/__init__.py +++ b/youtube_dl/extractor/__init__.py @@ -260,6 +260,7 @@ from .stanfordoc import StanfordOpenClassroomIE from .steam import SteamIE from .streamcloud import StreamcloudIE from .streamcz import StreamCZIE +from .swrmediathek import SWRMediathekIE from .syfy import SyfyIE from .sztvhu import SztvHuIE from .teamcoco import TeamcocoIE diff --git a/youtube_dl/extractor/empflix.py b/youtube_dl/extractor/empflix.py index eaeee5a51..e6952588f 100644 --- a/youtube_dl/extractor/empflix.py +++ b/youtube_dl/extractor/empflix.py @@ -3,20 +3,18 @@ from __future__ import unicode_literals import re from .common import InfoExtractor -from ..utils import ( - ExtractorError, -) class EmpflixIE(InfoExtractor): _VALID_URL = r'^https?://www\.empflix\.com/videos/.*?-(?P<id>[0-9]+)\.html' _TEST = { 'url': 'http://www.empflix.com/videos/Amateur-Finger-Fuck-33051.html', - 'md5': '5e5cc160f38ca9857f318eb97146e13e', + 'md5': 'b1bc15b6412d33902d6e5952035fcabc', 'info_dict': { 'id': '33051', - 'ext': 'flv', + 'ext': 'mp4', 'title': 'Amateur Finger Fuck', + 'description': 'Amateur solo finger fucking.', 'age_limit': 18, } } @@ -30,6 +28,8 @@ class EmpflixIE(InfoExtractor): video_title = self._html_search_regex( r'name="title" value="(?P<title>[^"]*)"', webpage, 'title') + video_description = self._html_search_regex( + r'name="description" value="([^"]*)"', webpage, 'description', fatal=False) cfg_url = self._html_search_regex( r'flashvars\.config = escape\("([^"]+)"', @@ -37,12 +37,18 @@ class EmpflixIE(InfoExtractor): cfg_xml = self._download_xml( cfg_url, video_id, note='Downloading metadata') - video_url = cfg_xml.find('videoLink').text + + formats = [ + { + 'url': item.find('videoLink').text, + 'format_id': item.find('res').text, + } for item in cfg_xml.findall('./quality/item') + ] return { 'id': video_id, - 'url': video_url, - 'ext': 'flv', 'title': video_title, + 'description': video_description, + 'formats': formats, 'age_limit': age_limit, } diff --git a/youtube_dl/extractor/gamekings.py b/youtube_dl/extractor/gamekings.py index 233398966..11fee3d31 100644 --- a/youtube_dl/extractor/gamekings.py +++ b/youtube_dl/extractor/gamekings.py @@ -15,7 +15,7 @@ class GamekingsIE(InfoExtractor): 'id': '20130811', 'ext': 'mp4', 'title': 'Phoenix Wright: Ace Attorney \u2013 Dual Destinies Review', - 'description': 'md5:632e61a9f97d700e83f43d77ddafb6a4', + 'description': 'md5:36fd701e57e8c15ac8682a2374c99731', } } diff --git a/youtube_dl/extractor/pornhub.py b/youtube_dl/extractor/pornhub.py index 7dd3dca0d..4118ee956 100644 --- a/youtube_dl/extractor/pornhub.py +++ b/youtube_dl/extractor/pornhub.py @@ -45,7 +45,7 @@ class PornHubIE(InfoExtractor): video_title = self._html_search_regex(r'<h1 [^>]+>([^<]+)', webpage, 'title') video_uploader = self._html_search_regex( - r'(?s)<div class="video-info-row">\s*From: .+?<(?:a href="/users/|<span class="username)[^>]+>(.+?)<', + r'(?s)From: .+?<(?:a href="/users/|<span class="username)[^>]+>(.+?)<', webpage, 'uploader', fatal=False) thumbnail = self._html_search_regex(r'"image_url":"([^"]+)', webpage, 'thumbnail', fatal=False) if thumbnail: diff --git a/youtube_dl/extractor/streamcz.py b/youtube_dl/extractor/streamcz.py index 7362904db..73efe9542 100644 --- a/youtube_dl/extractor/streamcz.py +++ b/youtube_dl/extractor/streamcz.py @@ -5,13 +5,16 @@ import re import json from .common import InfoExtractor -from ..utils import int_or_none +from ..utils import ( + int_or_none, + compat_str, +) class StreamCZIE(InfoExtractor): _VALID_URL = r'https?://(?:www\.)?stream\.cz/.+/(?P<videoid>.+)' - _TEST = { + _TESTS = [{ 'url': 'http://www.stream.cz/peklonataliri/765767-ecka-pro-deti', 'md5': '6d3ca61a8d0633c9c542b92fcb936b0c', 'info_dict': { @@ -22,7 +25,18 @@ class StreamCZIE(InfoExtractor): 'thumbnail': 'http://im.stream.cz/episode/52961d7e19d423f8f06f0100', 'duration': 256, }, - } + }, { + 'url': 'http://www.stream.cz/blanik/10002447-tri-roky-pro-mazanka', + 'md5': '246272e753e26bbace7fcd9deca0650c', + 'info_dict': { + 'id': '10002447', + 'ext': 'mp4', + 'title': 'Kancelář Blaník: Tři roky pro Mazánka', + 'description': 'md5:9177695a8b756a0a8ab160de4043b392', + 'thumbnail': 'http://im.stream.cz/episode/537f838c50c11f8d21320000', + 'duration': 368, + }, + }] def _real_extract(self, url): mobj = re.match(self._VALID_URL, url) @@ -57,7 +71,7 @@ class StreamCZIE(InfoExtractor): self._sort_formats(formats) return { - 'id': str(jsonData['id']), + 'id': compat_str(jsonData['episode_id']), 'title': self._og_search_title(webpage), 'thumbnail': jsonData['episode_image_original_url'].replace('//', 'http://'), 'formats': formats, diff --git a/youtube_dl/extractor/swrmediathek.py b/youtube_dl/extractor/swrmediathek.py new file mode 100644 index 000000000..6c688c520 --- /dev/null +++ b/youtube_dl/extractor/swrmediathek.py @@ -0,0 +1,104 @@ +# -*- coding: utf-8 -*- +from __future__ import unicode_literals + +import re + +from .common import InfoExtractor +from ..utils import parse_duration + + +class SWRMediathekIE(InfoExtractor): + _VALID_URL = r'https?://(?:www\.)?swrmediathek\.de/player\.htm\?show=(?P<id>[\da-f]{8}-[\da-f]{4}-[\da-f]{4}-[\da-f]{4}-[\da-f]{12})' + + _TESTS = [{ + 'url': 'http://swrmediathek.de/player.htm?show=849790d0-dab8-11e3-a953-0026b975f2e6', + 'md5': '8c5f6f0172753368547ca8413a7768ac', + 'info_dict': { + 'id': '849790d0-dab8-11e3-a953-0026b975f2e6', + 'ext': 'mp4', + 'title': 'SWR odysso', + 'description': 'md5:2012e31baad36162e97ce9eb3f157b8a', + 'thumbnail': 're:^http:.*\.jpg$', + 'duration': 2602, + 'upload_date': '20140515', + 'uploader': 'SWR Fernsehen', + 'uploader_id': '990030', + }, + }, { + 'url': 'http://swrmediathek.de/player.htm?show=0e1a8510-ddf2-11e3-9be3-0026b975f2e6', + 'md5': 'b10ab854f912eecc5a6b55cd6fc1f545', + 'info_dict': { + 'id': '0e1a8510-ddf2-11e3-9be3-0026b975f2e6', + 'ext': 'mp4', + 'title': 'Nachtcafé - Alltagsdroge Alkohol - zwischen Sektempfang und Komasaufen', + 'description': 'md5:e0a3adc17e47db2c23aab9ebc36dbee2', + 'thumbnail': 're:http://.*\.jpg', + 'duration': 5305, + 'upload_date': '20140516', + 'uploader': 'SWR Fernsehen', + 'uploader_id': '990030', + }, + }, { + 'url': 'http://swrmediathek.de/player.htm?show=bba23e10-cb93-11e3-bf7f-0026b975f2e6', + 'md5': '4382e4ef2c9d7ce6852535fa867a0dd3', + 'info_dict': { + 'id': 'bba23e10-cb93-11e3-bf7f-0026b975f2e6', + 'ext': 'mp3', + 'title': 'Saša Stanišic: Vor dem Fest', + 'description': 'md5:5b792387dc3fbb171eb709060654e8c9', + 'thumbnail': 're:http://.*\.jpg', + 'duration': 3366, + 'upload_date': '20140520', + 'uploader': 'SWR 2', + 'uploader_id': '284670', + } + }] + + def _real_extract(self, url): + mobj = re.match(self._VALID_URL, url) + video_id = mobj.group('id') + + video = self._download_json( + 'http://swrmediathek.de/AjaxEntry?ekey=%s' % video_id, video_id, 'Downloading video JSON') + + attr = video['attr'] + media_type = attr['entry_etype'] + + formats = [] + for entry in video['sub']: + if entry['name'] != 'entry_media': + continue + + entry_attr = entry['attr'] + codec = entry_attr['val0'] + quality = int(entry_attr['val1']) + + fmt = { + 'url': entry_attr['val2'], + 'quality': quality, + } + + if media_type == 'Video': + fmt.update({ + 'format_note': ['144p', '288p', '544p'][quality-1], + 'vcodec': codec, + }) + elif media_type == 'Audio': + fmt.update({ + 'acodec': codec, + }) + formats.append(fmt) + + self._sort_formats(formats) + + return { + 'id': video_id, + 'title': attr['entry_title'], + 'description': attr['entry_descl'], + 'thumbnail': attr['entry_image_16_9'], + 'duration': parse_duration(attr['entry_durat']), + 'upload_date': attr['entry_pdatet'][:-4], + 'uploader': attr['channel_title'], + 'uploader_id': attr['channel_idkey'], + 'formats': formats, + }
\ No newline at end of file |