diff options
Diffstat (limited to 'youtube_dl/extractor')
-rw-r--r-- | youtube_dl/extractor/__init__.py | 1 | ||||
-rw-r--r-- | youtube_dl/extractor/aftenposten.py | 103 | ||||
-rw-r--r-- | youtube_dl/extractor/aparat.py | 7 | ||||
-rw-r--r-- | youtube_dl/extractor/common.py | 34 | ||||
-rw-r--r-- | youtube_dl/extractor/goshgay.py | 4 | ||||
-rw-r--r-- | youtube_dl/extractor/izlesene.py | 5 | ||||
-rw-r--r-- | youtube_dl/extractor/rtp.py | 43 | ||||
-rw-r--r-- | youtube_dl/extractor/rts.py | 28 | ||||
-rw-r--r-- | youtube_dl/extractor/soulanime.py | 80 | ||||
-rw-r--r-- | youtube_dl/extractor/teamcoco.py | 7 | ||||
-rw-r--r-- | youtube_dl/extractor/tvigle.py | 22 | ||||
-rw-r--r-- | youtube_dl/extractor/tweakers.py | 58 |
12 files changed, 257 insertions, 135 deletions
diff --git a/youtube_dl/extractor/__init__.py b/youtube_dl/extractor/__init__.py index 047f7002a..0d7a120bc 100644 --- a/youtube_dl/extractor/__init__.py +++ b/youtube_dl/extractor/__init__.py @@ -6,6 +6,7 @@ from .academicearth import AcademicEarthCourseIE from .addanime import AddAnimeIE from .adobetv import AdobeTVIE from .adultswim import AdultSwimIE +from .aftenposten import AftenpostenIE from .aftonbladet import AftonbladetIE from .aljazeera import AlJazeeraIE from .alphaporno import AlphaPornoIE diff --git a/youtube_dl/extractor/aftenposten.py b/youtube_dl/extractor/aftenposten.py new file mode 100644 index 000000000..2b257ede7 --- /dev/null +++ b/youtube_dl/extractor/aftenposten.py @@ -0,0 +1,103 @@ +# coding: utf-8 +from __future__ import unicode_literals + +import re + +from .common import InfoExtractor +from ..utils import ( + int_or_none, + parse_iso8601, + xpath_with_ns, + xpath_text, + find_xpath_attr, +) + + +class AftenpostenIE(InfoExtractor): + _VALID_URL = r'https?://(?:www\.)?aftenposten\.no/webtv/([^/]+/)*(?P<id>[^/]+)-\d+\.html' + + _TEST = { + 'url': 'http://www.aftenposten.no/webtv/serier-og-programmer/sweatshopenglish/TRAILER-SWEATSHOP---I-cant-take-any-more-7800835.html?paging=§ion=webtv_serierogprogrammer_sweatshop_sweatshopenglish', + 'md5': 'fd828cd29774a729bf4d4425fe192972', + 'info_dict': { + 'id': '21039', + 'ext': 'mov', + 'title': 'TRAILER: "Sweatshop" - I can´t take any more', + 'description': 'md5:21891f2b0dd7ec2f78d84a50e54f8238', + 'timestamp': 1416927969, + 'upload_date': '20141125', + } + } + + def _real_extract(self, url): + display_id = self._match_id(url) + + webpage = self._download_webpage(url, display_id) + + video_id = self._html_search_regex( + r'data-xs-id="(\d+)"', webpage, 'video id') + + data = self._download_xml( + 'http://frontend.xstream.dk/ap/feed/video/?platform=web&id=%s' % video_id, video_id) + + NS_MAP = { + 'atom': 'http://www.w3.org/2005/Atom', + 'xt': 'http://xstream.dk/', + 'media': 'http://search.yahoo.com/mrss/', + } + + entry = data.find(xpath_with_ns('./atom:entry', NS_MAP)) + + title = xpath_text( + entry, xpath_with_ns('./atom:title', NS_MAP), 'title') + description = xpath_text( + entry, xpath_with_ns('./atom:summary', NS_MAP), 'description') + timestamp = parse_iso8601(xpath_text( + entry, xpath_with_ns('./atom:published', NS_MAP), 'upload date')) + + formats = [] + media_group = entry.find(xpath_with_ns('./media:group', NS_MAP)) + for media_content in media_group.findall(xpath_with_ns('./media:content', NS_MAP)): + media_url = media_content.get('url') + if not media_url: + continue + tbr = int_or_none(media_content.get('bitrate')) + mobj = re.search(r'^(?P<url>rtmp://[^/]+/(?P<app>[^/]+))/(?P<playpath>.+)$', media_url) + if mobj: + formats.append({ + 'url': mobj.group('url'), + 'play_path': 'mp4:%s' % mobj.group('playpath'), + 'app': mobj.group('app'), + 'ext': 'flv', + 'tbr': tbr, + 'format_id': 'rtmp-%d' % tbr, + }) + else: + formats.append({ + 'url': media_url, + 'tbr': tbr, + }) + self._sort_formats(formats) + + link = find_xpath_attr( + entry, xpath_with_ns('./atom:link', NS_MAP), 'rel', 'original') + if link is not None: + formats.append({ + 'url': link.get('href'), + 'format_id': link.get('rel'), + }) + + thumbnails = [{ + 'url': splash.get('url'), + 'width': int_or_none(splash.get('width')), + 'height': int_or_none(splash.get('height')), + } for splash in media_group.findall(xpath_with_ns('./xt:splash', NS_MAP))] + + return { + 'id': video_id, + 'title': title, + 'description': description, + 'timestamp': timestamp, + 'formats': formats, + 'thumbnails': thumbnails, + } diff --git a/youtube_dl/extractor/aparat.py b/youtube_dl/extractor/aparat.py index 15006336f..63429780e 100644 --- a/youtube_dl/extractor/aparat.py +++ b/youtube_dl/extractor/aparat.py @@ -20,6 +20,7 @@ class AparatIE(InfoExtractor): 'id': 'wP8On', 'ext': 'mp4', 'title': 'تیم گلکسی 11 - زومیت', + 'age_limit': 0, }, # 'skip': 'Extremely unreliable', } @@ -34,7 +35,8 @@ class AparatIE(InfoExtractor): video_id + '/vt/frame') webpage = self._download_webpage(embed_url, video_id) - video_urls = re.findall(r'fileList\[[0-9]+\]\s*=\s*"([^"]+)"', webpage) + video_urls = [video_url.replace('\\/', '/') for video_url in re.findall( + r'(?:fileList\[[0-9]+\]\s*=|"file"\s*:)\s*"([^"]+)"', webpage)] for i, video_url in enumerate(video_urls): req = HEADRequest(video_url) res = self._request_webpage( @@ -46,7 +48,7 @@ class AparatIE(InfoExtractor): title = self._search_regex(r'\s+title:\s*"([^"]+)"', webpage, 'title') thumbnail = self._search_regex( - r'\s+image:\s*"([^"]+)"', webpage, 'thumbnail', fatal=False) + r'image:\s*"([^"]+)"', webpage, 'thumbnail', fatal=False) return { 'id': video_id, @@ -54,4 +56,5 @@ class AparatIE(InfoExtractor): 'url': video_url, 'ext': 'mp4', 'thumbnail': thumbnail, + 'age_limit': self._family_friendly_search(webpage), } diff --git a/youtube_dl/extractor/common.py b/youtube_dl/extractor/common.py index 602601b24..2f5ba7aee 100644 --- a/youtube_dl/extractor/common.py +++ b/youtube_dl/extractor/common.py @@ -656,6 +656,21 @@ class InfoExtractor(object): } return RATING_TABLE.get(rating.lower(), None) + def _family_friendly_search(self, html): + # See http://schema.org/VideoObj + family_friendly = self._html_search_meta('isFamilyFriendly', html) + + if not family_friendly: + return None + + RATING_TABLE = { + '1': 0, + 'true': 0, + '0': 18, + 'false': 18, + } + return RATING_TABLE.get(family_friendly.lower(), None) + def _twitter_search_player(self, html): return self._html_search_meta('twitter:player', html, 'twitter card player') @@ -707,9 +722,9 @@ class InfoExtractor(object): f.get('quality') if f.get('quality') is not None else -1, f.get('tbr') if f.get('tbr') is not None else -1, f.get('vbr') if f.get('vbr') is not None else -1, - ext_preference, f.get('height') if f.get('height') is not None else -1, f.get('width') if f.get('width') is not None else -1, + ext_preference, f.get('abr') if f.get('abr') is not None else -1, audio_ext_preference, f.get('fps') if f.get('fps') is not None else -1, @@ -765,7 +780,7 @@ class InfoExtractor(object): self.to_screen(msg) time.sleep(timeout) - def _extract_f4m_formats(self, manifest_url, video_id): + def _extract_f4m_formats(self, manifest_url, video_id, preference=None, f4m_id=None): manifest = self._download_xml( manifest_url, video_id, 'Downloading f4m manifest', 'Unable to download f4m manifest') @@ -778,26 +793,28 @@ class InfoExtractor(object): media_nodes = manifest.findall('{http://ns.adobe.com/f4m/2.0}media') for i, media_el in enumerate(media_nodes): if manifest_version == '2.0': - manifest_url = '/'.join(manifest_url.split('/')[:-1]) + '/' + media_el.attrib.get('href') + manifest_url = ('/'.join(manifest_url.split('/')[:-1]) + '/' + + (media_el.attrib.get('href') or media_el.attrib.get('url'))) tbr = int_or_none(media_el.attrib.get('bitrate')) - format_id = 'f4m-%d' % (i if tbr is None else tbr) formats.append({ - 'format_id': format_id, + 'format_id': '-'.join(filter(None, [f4m_id, 'f4m-%d' % (i if tbr is None else tbr)])), 'url': manifest_url, 'ext': 'flv', 'tbr': tbr, 'width': int_or_none(media_el.attrib.get('width')), 'height': int_or_none(media_el.attrib.get('height')), + 'preference': preference, }) self._sort_formats(formats) return formats def _extract_m3u8_formats(self, m3u8_url, video_id, ext=None, - entry_protocol='m3u8', preference=None): + entry_protocol='m3u8', preference=None, + m3u8_id=None): formats = [{ - 'format_id': 'm3u8-meta', + 'format_id': '-'.join(filter(None, [m3u8_id, 'm3u8-meta'])), 'url': m3u8_url, 'ext': ext, 'protocol': 'm3u8', @@ -833,9 +850,8 @@ class InfoExtractor(object): formats.append({'url': format_url(line)}) continue tbr = int_or_none(last_info.get('BANDWIDTH'), scale=1000) - f = { - 'format_id': 'm3u8-%d' % (tbr if tbr else len(formats)), + 'format_id': '-'.join(filter(None, [m3u8_id, 'm3u8-%d' % (tbr if tbr else len(formats))])), 'url': format_url(line.strip()), 'tbr': tbr, 'ext': ext, diff --git a/youtube_dl/extractor/goshgay.py b/youtube_dl/extractor/goshgay.py index b116d251d..1d9166455 100644 --- a/youtube_dl/extractor/goshgay.py +++ b/youtube_dl/extractor/goshgay.py @@ -34,8 +34,6 @@ class GoshgayIE(InfoExtractor): duration = parse_duration(self._html_search_regex( r'<span class="duration">\s*-?\s*(.*?)</span>', webpage, 'duration', fatal=False)) - family_friendly = self._html_search_meta( - 'isFamilyFriendly', webpage, default='false') flashvars = compat_parse_qs(self._html_search_regex( r'<embed.+?id="flash-player-embed".+?flashvars="([^"]+)"', @@ -49,5 +47,5 @@ class GoshgayIE(InfoExtractor): 'title': title, 'thumbnail': thumbnail, 'duration': duration, - 'age_limit': 0 if family_friendly == 'true' else 18, + 'age_limit': self._family_friendly_search(webpage), } diff --git a/youtube_dl/extractor/izlesene.py b/youtube_dl/extractor/izlesene.py index d16d483ee..99a1361f8 100644 --- a/youtube_dl/extractor/izlesene.py +++ b/youtube_dl/extractor/izlesene.py @@ -80,9 +80,6 @@ class IzleseneIE(InfoExtractor): r'comment_count\s*=\s*\'([^\']+)\';', webpage, 'comment_count', fatal=False) - family_friendly = self._html_search_meta( - 'isFamilyFriendly', webpage, 'age limit', fatal=False) - content_url = self._html_search_meta( 'contentURL', webpage, 'content URL', fatal=False) ext = determine_ext(content_url, 'mp4') @@ -120,6 +117,6 @@ class IzleseneIE(InfoExtractor): 'duration': duration, 'view_count': int_or_none(view_count), 'comment_count': int_or_none(comment_count), - 'age_limit': 18 if family_friendly == 'False' else 0, + 'age_limit': self._family_friendly_search(webpage), 'formats': formats, } diff --git a/youtube_dl/extractor/rtp.py b/youtube_dl/extractor/rtp.py index 7736cabba..ecf4939cd 100644 --- a/youtube_dl/extractor/rtp.py +++ b/youtube_dl/extractor/rtp.py @@ -1,16 +1,16 @@ # coding: utf-8 from __future__ import unicode_literals -import json +import re from .common import InfoExtractor -from ..utils import js_to_json class RTPIE(InfoExtractor): _VALID_URL = r'https?://(?:www\.)?rtp\.pt/play/p(?P<program_id>[0-9]+)/(?P<id>[^/?#]+)/?' _TESTS = [{ 'url': 'http://www.rtp.pt/play/p405/e174042/paixoes-cruzadas', + 'md5': 'e736ce0c665e459ddb818546220b4ef8', 'info_dict': { 'id': 'e174042', 'ext': 'mp3', @@ -18,9 +18,6 @@ class RTPIE(InfoExtractor): 'description': 'As paixões musicais de António Cartaxo e António Macedo', 'thumbnail': 're:^https?://.*\.jpg', }, - 'params': { - 'skip_download': True, # RTMP download - }, }, { 'url': 'http://www.rtp.pt/play/p831/a-quimica-das-coisas', 'only_matching': True, @@ -37,20 +34,48 @@ class RTPIE(InfoExtractor): player_config = self._search_regex( r'(?s)RTPPLAY\.player\.newPlayer\(\s*(\{.*?\})\s*\)', webpage, 'player config') - config = json.loads(js_to_json(player_config)) + config = self._parse_json(player_config, video_id) path, ext = config.get('file').rsplit('.', 1) formats = [{ + 'format_id': 'rtmp', + 'ext': ext, + 'vcodec': config.get('type') == 'audio' and 'none' or None, + 'preference': -2, + 'url': 'rtmp://{streamer:s}/{application:s}'.format(**config), 'app': config.get('application'), 'play_path': '{ext:s}:{path:s}'.format(ext=ext, path=path), 'page_url': url, - 'url': 'rtmp://{streamer:s}/{application:s}'.format(**config), 'rtmp_live': config.get('live', False), - 'ext': ext, - 'vcodec': config.get('type') == 'audio' and 'none' or None, 'player_url': 'http://programas.rtp.pt/play/player.swf?v3', + 'rtmp_real_time': True, }] + # Construct regular HTTP download URLs + replacements = { + 'audio': { + 'format_id': 'mp3', + 'pattern': r'^nas2\.share/wavrss/', + 'repl': 'http://rsspod.rtp.pt/podcasts/', + 'vcodec': 'none', + }, + 'video': { + 'format_id': 'mp4_h264', + 'pattern': r'^nas2\.share/h264/', + 'repl': 'http://rsspod.rtp.pt/videocasts/', + 'vcodec': 'h264', + }, + } + r = replacements[config['type']] + if re.match(r['pattern'], config['file']) is not None: + formats.append({ + 'format_id': r['format_id'], + 'url': re.sub(r['pattern'], r['repl'], config['file']), + 'vcodec': r['vcodec'], + }) + + self._sort_formats(formats) + return { 'id': video_id, 'title': title, diff --git a/youtube_dl/extractor/rts.py b/youtube_dl/extractor/rts.py index 5e84c1098..d0981115d 100644 --- a/youtube_dl/extractor/rts.py +++ b/youtube_dl/extractor/rts.py @@ -6,12 +6,14 @@ import re from .common import InfoExtractor from ..compat import ( compat_str, + compat_urllib_parse_urlparse, ) from ..utils import ( int_or_none, parse_duration, parse_iso8601, unescapeHTML, + xpath_text, ) @@ -159,11 +161,27 @@ class RTSIE(InfoExtractor): return int_or_none(self._search_regex( r'-([0-9]+)k\.', url, 'bitrate', default=None)) - formats = [{ - 'format_id': fid, - 'url': furl, - 'tbr': extract_bitrate(furl), - } for fid, furl in info['streams'].items()] + formats = [] + for format_id, format_url in info['streams'].items(): + if format_url.endswith('.f4m'): + token = self._download_xml( + 'http://tp.srgssr.ch/token/akahd.xml?stream=%s/*' % compat_urllib_parse_urlparse(format_url).path, + video_id, 'Downloading %s token' % format_id) + auth_params = xpath_text(token, './/authparams', 'auth params') + if not auth_params: + continue + formats.extend(self._extract_f4m_formats( + '%s?%s&hdcore=3.4.0&plugin=aasp-3.4.0.132.66' % (format_url, auth_params), + video_id, f4m_id=format_id)) + elif format_url.endswith('.m3u8'): + formats.extend(self._extract_m3u8_formats( + format_url, video_id, 'mp4', m3u8_id=format_id)) + else: + formats.append({ + 'format_id': format_id, + 'url': format_url, + 'tbr': extract_bitrate(format_url), + }) if 'media' in info: formats.extend([{ diff --git a/youtube_dl/extractor/soulanime.py b/youtube_dl/extractor/soulanime.py deleted file mode 100644 index feef33e27..000000000 --- a/youtube_dl/extractor/soulanime.py +++ /dev/null @@ -1,80 +0,0 @@ -from __future__ import unicode_literals - -import re - -from .common import InfoExtractor -from ..utils import ( - HEADRequest, - urlhandle_detect_ext, -) - - -class SoulAnimeWatchingIE(InfoExtractor): - IE_NAME = "soulanime:watching" - IE_DESC = "SoulAnime video" - _TEST = { - 'url': 'http://www.soul-anime.net/watching/seirei-tsukai-no-blade-dance-episode-9/', - 'md5': '05fae04abf72298098b528e98abf4298', - 'info_dict': { - 'id': 'seirei-tsukai-no-blade-dance-episode-9', - 'ext': 'mp4', - 'title': 'seirei-tsukai-no-blade-dance-episode-9', - 'description': 'seirei-tsukai-no-blade-dance-episode-9' - } - } - _VALID_URL = r'http://[w.]*soul-anime\.(?P<domain>[^/]+)/watch[^/]*/(?P<id>[^/]+)' - - def _real_extract(self, url): - mobj = re.match(self._VALID_URL, url) - video_id = mobj.group('id') - domain = mobj.group('domain') - - page = self._download_webpage(url, video_id) - - video_url_encoded = self._html_search_regex( - r'<div id="download">[^<]*<a href="(?P<url>[^"]+)"', page, 'url') - video_url = "http://www.soul-anime." + domain + video_url_encoded - - ext_req = HEADRequest(video_url) - ext_handle = self._request_webpage( - ext_req, video_id, note='Determining extension') - ext = urlhandle_detect_ext(ext_handle) - - return { - 'id': video_id, - 'url': video_url, - 'ext': ext, - 'title': video_id, - 'description': video_id - } - - -class SoulAnimeSeriesIE(InfoExtractor): - IE_NAME = "soulanime:series" - IE_DESC = "SoulAnime Series" - - _VALID_URL = r'http://[w.]*soul-anime\.(?P<domain>[^/]+)/anime./(?P<id>[^/]+)' - - _EPISODE_REGEX = r'<option value="(/watch[^/]*/[^"]+)">[^<]*</option>' - - _TEST = { - 'url': 'http://www.soul-anime.net/anime1/black-rock-shooter-tv/', - 'info_dict': { - 'id': 'black-rock-shooter-tv' - }, - 'playlist_count': 8 - } - - def _real_extract(self, url): - mobj = re.match(self._VALID_URL, url) - series_id = mobj.group('id') - domain = mobj.group('domain') - - pattern = re.compile(self._EPISODE_REGEX) - - page = self._download_webpage(url, series_id, "Downloading series page") - mobj = pattern.findall(page) - - entries = [self.url_result("http://www.soul-anime." + domain + obj) for obj in mobj] - - return self.playlist_result(entries, series_id) diff --git a/youtube_dl/extractor/teamcoco.py b/youtube_dl/extractor/teamcoco.py index 18a823719..e85d452a3 100644 --- a/youtube_dl/extractor/teamcoco.py +++ b/youtube_dl/extractor/teamcoco.py @@ -15,7 +15,8 @@ class TeamcocoIE(InfoExtractor): 'id': '80187', 'ext': 'mp4', 'title': 'Conan Becomes A Mary Kay Beauty Consultant', - 'description': 'Mary Kay is perhaps the most trusted name in female beauty, so of course Conan is a natural choice to sell their products.' + 'description': 'Mary Kay is perhaps the most trusted name in female beauty, so of course Conan is a natural choice to sell their products.', + 'age_limit': 0, } }, { 'url': 'http://teamcoco.com/video/louis-ck-interview-george-w-bush', @@ -24,7 +25,8 @@ class TeamcocoIE(InfoExtractor): 'id': '19705', 'ext': 'mp4', "description": "Louis C.K. got starstruck by George W. Bush, so what? Part one.", - "title": "Louis C.K. Interview Pt. 1 11/3/11" + "title": "Louis C.K. Interview Pt. 1 11/3/11", + 'age_limit': 0, } } ] @@ -83,4 +85,5 @@ class TeamcocoIE(InfoExtractor): 'title': self._og_search_title(webpage), 'thumbnail': self._og_search_thumbnail(webpage), 'description': self._og_search_description(webpage), + 'age_limit': self._family_friendly_search(webpage), } diff --git a/youtube_dl/extractor/tvigle.py b/youtube_dl/extractor/tvigle.py index ba65996dc..102362b29 100644 --- a/youtube_dl/extractor/tvigle.py +++ b/youtube_dl/extractor/tvigle.py @@ -1,6 +1,8 @@ # encoding: utf-8 from __future__ import unicode_literals +import re + from .common import InfoExtractor from ..utils import ( float_or_none, @@ -11,7 +13,7 @@ from ..utils import ( class TvigleIE(InfoExtractor): IE_NAME = 'tvigle' IE_DESC = 'Интернет-телевидение Tvigle.ru' - _VALID_URL = r'http://(?:www\.)?tvigle\.ru/(?:[^/]+/)+(?P<id>[^/]+)/$' + _VALID_URL = r'https?://(?:www\.)?(?:tvigle\.ru/(?:[^/]+/)+(?P<display_id>[^/]+)/$|cloud\.tvigle\.ru/video/(?P<id>\d+))' _TESTS = [ { @@ -38,16 +40,22 @@ class TvigleIE(InfoExtractor): 'duration': 186.080, 'age_limit': 0, }, - }, + }, { + 'url': 'https://cloud.tvigle.ru/video/5267604/', + 'only_matching': True, + } ] def _real_extract(self, url): - display_id = self._match_id(url) - - webpage = self._download_webpage(url, display_id) + mobj = re.match(self._VALID_URL, url) + video_id = mobj.group('id') + display_id = mobj.group('display_id') - video_id = self._html_search_regex( - r'<li class="video-preview current_playing" id="(\d+)">', webpage, 'video id') + if not video_id: + webpage = self._download_webpage(url, display_id) + video_id = self._html_search_regex( + r'<li class="video-preview current_playing" id="(\d+)">', + webpage, 'video id') video_data = self._download_json( 'http://cloud.tvigle.ru/api/play/video/%s/' % video_id, display_id) diff --git a/youtube_dl/extractor/tweakers.py b/youtube_dl/extractor/tweakers.py index e332d4694..c80ec15cf 100644 --- a/youtube_dl/extractor/tweakers.py +++ b/youtube_dl/extractor/tweakers.py @@ -1,35 +1,65 @@ -# coding: utf-8 from __future__ import unicode_literals -import re - from .common import InfoExtractor +from ..utils import ( + xpath_text, + xpath_with_ns, + int_or_none, + float_or_none, +) class TweakersIE(InfoExtractor): - _VALID_URL = r'https?://tweakers\.net/video/(?P<id>[0-9]+).*' + _VALID_URL = r'https?://tweakers\.net/video/(?P<id>\d+)' _TEST = { 'url': 'https://tweakers.net/video/9926/new-nintendo-3ds-xl-op-alle-fronten-beter.html', - 'md5': 'f7f7f3027166a7f32f024b4ae6571ced', + 'md5': '1b5afa817403bb5baa08359dca31e6df', 'info_dict': { 'id': '9926', 'ext': 'mp4', - 'title': 'New-Nintendo-3Ds-Xl-Op-Alle-Fronten-Beter', + 'title': 'New Nintendo 3DS XL - Op alle fronten beter', + 'description': 'md5:f97324cc71e86e11c853f0763820e3ba', + 'thumbnail': 're:^https?://.*\.jpe?g$', + 'duration': 386, } } def _real_extract(self, url): - splitted_url = re.split('.html|/', url) - del splitted_url[-1] # To remove extra '/' at the end video_id = self._match_id(url) - title = splitted_url[5].title() # Retrieve title for URL and capitalize - splitted_url[3] = splitted_url[3] + '/player' # Add /player to get the player page - player_url = '/'.join(splitted_url) + '.html' - player_page = self._download_webpage(player_url, video_id) + + playlist = self._download_xml( + 'https://tweakers.net/video/s1playlist/%s/playlist.xspf' % video_id, + video_id) + + NS_MAP = { + 'xspf': 'http://xspf.org/ns/0/', + 's1': 'http://static.streamone.nl/player/ns/0', + } + + track = playlist.find(xpath_with_ns('./xspf:trackList/xspf:track', NS_MAP)) + + title = xpath_text( + track, xpath_with_ns('./xspf:title', NS_MAP), 'title') + description = xpath_text( + track, xpath_with_ns('./xspf:annotation', NS_MAP), 'description') + thumbnail = xpath_text( + track, xpath_with_ns('./xspf:image', NS_MAP), 'thumbnail') + duration = float_or_none( + xpath_text(track, xpath_with_ns('./xspf:duration', NS_MAP), 'duration'), + 1000) + + formats = [{ + 'url': location.text, + 'format_id': location.get(xpath_with_ns('s1:label', NS_MAP)), + 'width': int_or_none(location.get(xpath_with_ns('s1:width', NS_MAP))), + 'height': int_or_none(location.get(xpath_with_ns('s1:height', NS_MAP))), + } for location in track.findall(xpath_with_ns('./xspf:location', NS_MAP))] return { 'id': video_id, - 'ext': 'mp4', 'title': title, - 'url': re.findall('http.*mp4', player_page)[0], + 'description': description, + 'thumbnail': thumbnail, + 'duration': duration, + 'formats': formats, } |