diff options
| author | Sergey M․ <dstftw@gmail.com> | 2016-02-07 21:11:59 +0600 | 
|---|---|---|
| committer | Sergey M․ <dstftw@gmail.com> | 2016-02-07 21:11:59 +0600 | 
| commit | 5a7699bb2ef77c2548bcc2112e042481a809ba35 (patch) | |
| tree | 3bafd7a22b4cbe2f433442f159604ab2acbf3d01 /youtube_dl/extractor/konserthusetplay.py | |
| parent | 8628d26f3888d2f0541a5d0e5d7c35fa5cac3fc0 (diff) | |
[konserthusetplay] Improve and extract all formats (Closes #8381)
Diffstat (limited to 'youtube_dl/extractor/konserthusetplay.py')
| -rw-r--r-- | youtube_dl/extractor/konserthusetplay.py | 111 | 
1 files changed, 87 insertions, 24 deletions
| diff --git a/youtube_dl/extractor/konserthusetplay.py b/youtube_dl/extractor/konserthusetplay.py index 8200942d3..f21d050d7 100644 --- a/youtube_dl/extractor/konserthusetplay.py +++ b/youtube_dl/extractor/konserthusetplay.py @@ -2,44 +2,107 @@  from __future__ import unicode_literals  from .common import InfoExtractor +from ..utils import ( +    float_or_none, +    int_or_none, +)  class KonserthusetPlayIE(InfoExtractor): -    _VALID_URL = r'https?://(?:www\.)?konserthusetplay\.se/\?m=(?P<id>[0-9A-Za-z_-]+)' - -    _TESTS = [{ +    _VALID_URL = r'https?://(?:www\.)?konserthusetplay\.se/\?m=(?P<id>[^&]+)' +    _TEST = {          'url': 'http://www.konserthusetplay.se/?m=CKDDnlCY-dhWAAqiMERd-A', -        'md5': 'e272a765e0d12a0226199e5f32d76116',          'info_dict': {              'id': 'CKDDnlCY-dhWAAqiMERd-A', -            'ext': 'mp4', +            'ext': 'flv',              'title': 'Orkesterns instrument: Valthornen',              'description': 'md5:f10e1f0030202020396a4d712d2fa827', -            'thumbnail': 'http://csp.picsearch.com/img/C/K/D/D/title_CKDDnlCY-dhWAAqiMERd-A' -        } -    }, { -        'url': 'http://www.konserthusetplay.se/?m=IyQcMOEpmKqT91SVT5OP8Q', -        'md5': 'c4adb8ca76fdd33d4cbdcc7c3d181f22', -        'info_dict': { -            'id': 'IyQcMOEpmKqT91SVT5OP8Q', -            'ext': 'mp4', -            'title': 'Eliasson Einsame Fahrt, violinkonsert', -            'description': 'md5:a8dcc8dfd9473d52433b2c5f588ba191', -            'thumbnail': 'http://csp.picsearch.com/img/I/y/Q/c/title_IyQcMOEpmKqT91SVT5OP8Q' -        } -    }] +            'thumbnail': 're:^https?://.*$', +            'duration': 398.8, +        }, +        'params': { +            # rtmp download +            'skip_download': True, +        }, +    }      def _real_extract(self, url):          video_id = self._match_id(url) +          webpage = self._download_webpage(url, video_id) -        description = self._og_search_description(webpage) -        title = self._og_search_title(webpage) -        main_video = self._html_search_regex(r'<link rel="video_src" href="(.+?)" />', webpage, 'url') -        thumbnail = self._og_search_thumbnail(webpage) + +        e = self._search_regex( +            r'https?://csp\.picsearch\.com/rest\?.*\be=(.+?)[&"\']', webpage, 'e') + +        rest = self._download_json( +            'http://csp.picsearch.com/rest?e=%s&containerId=mediaplayer&i=object' % e, +            video_id, transform_source=lambda s: s[s.index('{'):s.rindex('}') + 1]) + +        media = rest['media'] +        player_config = media['playerconfig'] +        playlist = player_config['playlist'] + +        source = next(f for f in playlist if f.get('bitrates')) + +        connection_url = (player_config.get('rtmp', {}).get( +            'netConnectionUrl') or player_config.get( +            'plugins', {}).get('bwcheck', {}).get('netConnectionUrl')) + +        FORMAT_ID_REGEX = r'_([^_]+)_h264m\.mp4' + +        formats = [] + +        fallback_url = source.get('fallbackUrl') +        fallback_format_id = None +        if fallback_url: +            fallback_format_id = self._search_regex( +                FORMAT_ID_REGEX, fallback_url, 'format id', default=None) + +        if connection_url: +            for f in source['bitrates']: +                video_url = f.get('url') +                if not video_url: +                    continue +                format_id = self._search_regex( +                    FORMAT_ID_REGEX, video_url, 'format id', default=None) +                f_common = { +                    'vbr': int_or_none(f.get('bitrate')), +                    'width': int_or_none(f.get('width')), +                    'height': int_or_none(f.get('height')), +                } +                f = f_common.copy() +                f.update({ +                    'url': connection_url, +                    'play_path': video_url, +                    'format_id': 'rtmp-%s' % format_id if format_id else 'rtmp', +                    'ext': 'flv', +                }) +                formats.append(f) +                if format_id and format_id == fallback_format_id: +                    f = f_common.copy() +                    f.update({ +                        'url': fallback_url, +                        'format_id': 'http-%s' % format_id if format_id else 'http', +                    }) +                    formats.append(f) + +        if not formats and fallback_url: +            formats.append({ +                'url': fallback_url, +            }) + +        self._sort_formats(formats) + +        title = player_config.get('title') or media['title'] +        description = player_config.get('mediaInfo', {}).get('description') +        thumbnail = media.get('image') +        duration = float_or_none(media.get('duration'), 1000) +          return {              'id': video_id,              'title': title,              'description': description, -            'url': main_video, -            'thumbnail': thumbnail +            'thumbnail': thumbnail, +            'duration': duration, +            'formats': formats,          } | 
