diff options
author | Sergey M․ <dstftw@gmail.com> | 2016-02-07 21:11:59 +0600 |
---|---|---|
committer | Sergey M․ <dstftw@gmail.com> | 2016-02-07 21:11:59 +0600 |
commit | 5a7699bb2ef77c2548bcc2112e042481a809ba35 (patch) | |
tree | 3bafd7a22b4cbe2f433442f159604ab2acbf3d01 | |
parent | 8628d26f3888d2f0541a5d0e5d7c35fa5cac3fc0 (diff) |
[konserthusetplay] Improve and extract all formats (Closes #8381)
-rw-r--r-- | youtube_dl/extractor/konserthusetplay.py | 111 |
1 files changed, 87 insertions, 24 deletions
diff --git a/youtube_dl/extractor/konserthusetplay.py b/youtube_dl/extractor/konserthusetplay.py index 8200942d3..f21d050d7 100644 --- a/youtube_dl/extractor/konserthusetplay.py +++ b/youtube_dl/extractor/konserthusetplay.py @@ -2,44 +2,107 @@ from __future__ import unicode_literals from .common import InfoExtractor +from ..utils import ( + float_or_none, + int_or_none, +) class KonserthusetPlayIE(InfoExtractor): - _VALID_URL = r'https?://(?:www\.)?konserthusetplay\.se/\?m=(?P<id>[0-9A-Za-z_-]+)' - - _TESTS = [{ + _VALID_URL = r'https?://(?:www\.)?konserthusetplay\.se/\?m=(?P<id>[^&]+)' + _TEST = { 'url': 'http://www.konserthusetplay.se/?m=CKDDnlCY-dhWAAqiMERd-A', - 'md5': 'e272a765e0d12a0226199e5f32d76116', 'info_dict': { 'id': 'CKDDnlCY-dhWAAqiMERd-A', - 'ext': 'mp4', + 'ext': 'flv', 'title': 'Orkesterns instrument: Valthornen', 'description': 'md5:f10e1f0030202020396a4d712d2fa827', - 'thumbnail': 'http://csp.picsearch.com/img/C/K/D/D/title_CKDDnlCY-dhWAAqiMERd-A' - } - }, { - 'url': 'http://www.konserthusetplay.se/?m=IyQcMOEpmKqT91SVT5OP8Q', - 'md5': 'c4adb8ca76fdd33d4cbdcc7c3d181f22', - 'info_dict': { - 'id': 'IyQcMOEpmKqT91SVT5OP8Q', - 'ext': 'mp4', - 'title': 'Eliasson Einsame Fahrt, violinkonsert', - 'description': 'md5:a8dcc8dfd9473d52433b2c5f588ba191', - 'thumbnail': 'http://csp.picsearch.com/img/I/y/Q/c/title_IyQcMOEpmKqT91SVT5OP8Q' - } - }] + 'thumbnail': 're:^https?://.*$', + 'duration': 398.8, + }, + 'params': { + # rtmp download + 'skip_download': True, + }, + } def _real_extract(self, url): video_id = self._match_id(url) + webpage = self._download_webpage(url, video_id) - description = self._og_search_description(webpage) - title = self._og_search_title(webpage) - main_video = self._html_search_regex(r'<link rel="video_src" href="(.+?)" />', webpage, 'url') - thumbnail = self._og_search_thumbnail(webpage) + + e = self._search_regex( + r'https?://csp\.picsearch\.com/rest\?.*\be=(.+?)[&"\']', webpage, 'e') + + rest = self._download_json( + 'http://csp.picsearch.com/rest?e=%s&containerId=mediaplayer&i=object' % e, + video_id, transform_source=lambda s: s[s.index('{'):s.rindex('}') + 1]) + + media = rest['media'] + player_config = media['playerconfig'] + playlist = player_config['playlist'] + + source = next(f for f in playlist if f.get('bitrates')) + + connection_url = (player_config.get('rtmp', {}).get( + 'netConnectionUrl') or player_config.get( + 'plugins', {}).get('bwcheck', {}).get('netConnectionUrl')) + + FORMAT_ID_REGEX = r'_([^_]+)_h264m\.mp4' + + formats = [] + + fallback_url = source.get('fallbackUrl') + fallback_format_id = None + if fallback_url: + fallback_format_id = self._search_regex( + FORMAT_ID_REGEX, fallback_url, 'format id', default=None) + + if connection_url: + for f in source['bitrates']: + video_url = f.get('url') + if not video_url: + continue + format_id = self._search_regex( + FORMAT_ID_REGEX, video_url, 'format id', default=None) + f_common = { + 'vbr': int_or_none(f.get('bitrate')), + 'width': int_or_none(f.get('width')), + 'height': int_or_none(f.get('height')), + } + f = f_common.copy() + f.update({ + 'url': connection_url, + 'play_path': video_url, + 'format_id': 'rtmp-%s' % format_id if format_id else 'rtmp', + 'ext': 'flv', + }) + formats.append(f) + if format_id and format_id == fallback_format_id: + f = f_common.copy() + f.update({ + 'url': fallback_url, + 'format_id': 'http-%s' % format_id if format_id else 'http', + }) + formats.append(f) + + if not formats and fallback_url: + formats.append({ + 'url': fallback_url, + }) + + self._sort_formats(formats) + + title = player_config.get('title') or media['title'] + description = player_config.get('mediaInfo', {}).get('description') + thumbnail = media.get('image') + duration = float_or_none(media.get('duration'), 1000) + return { 'id': video_id, 'title': title, 'description': description, - 'url': main_video, - 'thumbnail': thumbnail + 'thumbnail': thumbnail, + 'duration': duration, + 'formats': formats, } |