diff options
author | Yen Chi Hsuan <yan12125@gmail.com> | 2016-04-16 16:13:14 +0800 |
---|---|---|
committer | Yen Chi Hsuan <yan12125@gmail.com> | 2016-04-16 16:13:14 +0800 |
commit | 260c7036bab996a6364ed8578819fdfa034d6bd1 (patch) | |
tree | 5fa1fe29dfffb1b5f36cb212c517052f696f6b80 /youtube_dl/extractor/sportbox.py | |
parent | f74197a0746ac24fc6003166fbc7b36362ce8ba8 (diff) |
[sportbox] Fix SportBoxEmbedIE
Also fixes test_Generic_29 (http://www.vestifinance.ru/articles/25753)
Diffstat (limited to 'youtube_dl/extractor/sportbox.py')
-rw-r--r-- | youtube_dl/extractor/sportbox.py | 34 |
1 files changed, 24 insertions, 10 deletions
diff --git a/youtube_dl/extractor/sportbox.py b/youtube_dl/extractor/sportbox.py index 4f0c66213..e5c28ae89 100644 --- a/youtube_dl/extractor/sportbox.py +++ b/youtube_dl/extractor/sportbox.py @@ -6,6 +6,7 @@ import re from .common import InfoExtractor from ..compat import compat_urlparse from ..utils import ( + js_to_json, unified_strdate, ) @@ -94,19 +95,32 @@ class SportBoxEmbedIE(InfoExtractor): webpage = self._download_webpage(url, video_id) - hls = self._search_regex( - r"sportboxPlayer\.jwplayer_common_params\.file\s*=\s*['\"]([^'\"]+)['\"]", - webpage, 'hls file') + formats = [] - formats = self._extract_m3u8_formats(hls, video_id, 'mp4') - self._sort_formats(formats) + def cleanup_js(code): + # desktop_advert_config contains complex Javascripts and we don't need it + return js_to_json(re.sub(r'desktop_advert_config.*', '', code)) + + jwplayer_data = self._parse_json(self._search_regex( + r'(?s)player\.setup\(({.+?})\);', webpage, 'jwplayer settings'), video_id, + transform_source=cleanup_js) + + hls_url = jwplayer_data.get('hls_url') + if hls_url: + formats.extend(self._extract_m3u8_formats( + hls_url, video_id, ext='mp4', m3u8_id='hls')) - title = self._search_regex( - r'sportboxPlayer\.node_title\s*=\s*"([^"]+)"', webpage, 'title') + rtsp_url = jwplayer_data.get('rtsp_url') + if rtsp_url: + formats.append({ + 'url': rtsp_url, + 'format_id': 'rtsp', + }) + + self._sort_formats(formats) - thumbnail = self._search_regex( - r'sportboxPlayer\.jwplayer_common_params\.image\s*=\s*"([^"]+)"', - webpage, 'thumbnail', default=None) + title = jwplayer_data['node_title'] + thumbnail = jwplayer_data.get('image_url') return { 'id': video_id, |