diff options
| author | Yen Chi Hsuan <yan12125@gmail.com> | 2016-04-16 16:13:14 +0800 | 
|---|---|---|
| committer | Yen Chi Hsuan <yan12125@gmail.com> | 2016-04-16 16:13:14 +0800 | 
| commit | 260c7036bab996a6364ed8578819fdfa034d6bd1 (patch) | |
| tree | 5fa1fe29dfffb1b5f36cb212c517052f696f6b80 | |
| parent | f74197a0746ac24fc6003166fbc7b36362ce8ba8 (diff) | |
[sportbox] Fix SportBoxEmbedIE
Also fixes test_Generic_29 (http://www.vestifinance.ru/articles/25753)
| -rw-r--r-- | youtube_dl/extractor/sportbox.py | 34 | 
1 files changed, 24 insertions, 10 deletions
diff --git a/youtube_dl/extractor/sportbox.py b/youtube_dl/extractor/sportbox.py index 4f0c66213..e5c28ae89 100644 --- a/youtube_dl/extractor/sportbox.py +++ b/youtube_dl/extractor/sportbox.py @@ -6,6 +6,7 @@ import re  from .common import InfoExtractor  from ..compat import compat_urlparse  from ..utils import ( +    js_to_json,      unified_strdate,  ) @@ -94,19 +95,32 @@ class SportBoxEmbedIE(InfoExtractor):          webpage = self._download_webpage(url, video_id) -        hls = self._search_regex( -            r"sportboxPlayer\.jwplayer_common_params\.file\s*=\s*['\"]([^'\"]+)['\"]", -            webpage, 'hls file') +        formats = [] -        formats = self._extract_m3u8_formats(hls, video_id, 'mp4') -        self._sort_formats(formats) +        def cleanup_js(code): +            # desktop_advert_config contains complex Javascripts and we don't need it +            return js_to_json(re.sub(r'desktop_advert_config.*', '', code)) + +        jwplayer_data = self._parse_json(self._search_regex( +            r'(?s)player\.setup\(({.+?})\);', webpage, 'jwplayer settings'), video_id, +            transform_source=cleanup_js) + +        hls_url = jwplayer_data.get('hls_url') +        if hls_url: +            formats.extend(self._extract_m3u8_formats( +                hls_url, video_id, ext='mp4', m3u8_id='hls')) -        title = self._search_regex( -            r'sportboxPlayer\.node_title\s*=\s*"([^"]+)"', webpage, 'title') +        rtsp_url = jwplayer_data.get('rtsp_url') +        if rtsp_url: +            formats.append({ +                'url': rtsp_url, +                'format_id': 'rtsp', +            }) + +        self._sort_formats(formats) -        thumbnail = self._search_regex( -            r'sportboxPlayer\.jwplayer_common_params\.image\s*=\s*"([^"]+)"', -            webpage, 'thumbnail', default=None) +        title = jwplayer_data['node_title'] +        thumbnail = jwplayer_data.get('image_url')          return {              'id': video_id,  | 
