diff options
| author | Sergey M․ <dstftw@gmail.com> | 2015-05-15 23:19:21 +0600 | 
|---|---|---|
| committer | Sergey M․ <dstftw@gmail.com> | 2015-05-15 23:19:21 +0600 | 
| commit | 9123d645923741056d1e1f2be28974e992eea950 (patch) | |
| tree | d014a6580d7f71fc4bac6c0817edff9e72ed2d8d | |
| parent | eeb23eb7ea6953d7e90ccf669cd0e636d10b2b91 (diff) | |
| parent | b827a6015c145d67a4d4e9ea38aa54ebe347d3fe (diff) | |
Merge branch 'maddoger-sportbox-fix'
| -rw-r--r-- | youtube_dl/extractor/__init__.py | 5 | ||||
| -rw-r--r-- | youtube_dl/extractor/generic.py | 37 | ||||
| -rw-r--r-- | youtube_dl/extractor/sportbox.py | 125 | 
3 files changed, 127 insertions, 40 deletions
diff --git a/youtube_dl/extractor/__init__.py b/youtube_dl/extractor/__init__.py index 8ec0c1032..f293bc2a4 100644 --- a/youtube_dl/extractor/__init__.py +++ b/youtube_dl/extractor/__init__.py @@ -502,7 +502,10 @@ from .spiegel import SpiegelIE, SpiegelArticleIE  from .spiegeltv import SpiegeltvIE  from .spike import SpikeIE  from .sport5 import Sport5IE -from .sportbox import SportBoxIE +from .sportbox import ( +    SportBoxIE, +    SportBoxEmbedIE, +)  from .sportdeutschland import SportDeutschlandIE  from .srf import SrfIE  from .srmediathek import SRMediathekIE diff --git a/youtube_dl/extractor/generic.py b/youtube_dl/extractor/generic.py index 3d756e848..610e33091 100644 --- a/youtube_dl/extractor/generic.py +++ b/youtube_dl/extractor/generic.py @@ -32,6 +32,7 @@ from .brightcove import BrightcoveIE  from .nbc import NBCSportsVPlayerIE  from .ooyala import OoyalaIE  from .rutv import RUTVIE +from .sportbox import SportBoxEmbedIE  from .smotri import SmotriIE  from .condenast import CondeNastIE  from .udn import UDNEmbedIE @@ -224,6 +225,37 @@ class GenericIE(InfoExtractor):                  'skip_download': True,              },          }, +        # SportBox embed +        { +            'url': 'http://www.vestifinance.ru/articles/25753', +            'info_dict': { +                'id': '25753', +                'title': 'Вести Экономика ― Прямые трансляции с Форума-выставки "Госзаказ-2013"', +            }, +            'playlist': [{ +                'info_dict': { +                    'id': '370908', +                    'title': 'Госзаказ. День 3', +                    'ext': 'mp4', +                } +            }, { +                'info_dict': { +                    'id': '370905', +                    'title': 'Госзаказ. День 2', +                    'ext': 'mp4', +                } +            }, { +                'info_dict': { +                    'id': '370902', +                    'title': 'Госзаказ. День 1', +                    'ext': 'mp4', +                } +            }], +            'params': { +                # m3u8 download +                'skip_download': True, +            }, +        },          # Embedded TED video          {              'url': 'http://en.support.wordpress.com/videos/ted-talks/', @@ -1229,6 +1261,11 @@ class GenericIE(InfoExtractor):          if rutv_url:              return self.url_result(rutv_url, 'RUTV') +        # Look for embedded SportBox player +        sportbox_urls = SportBoxEmbedIE._extract_urls(webpage) +        if sportbox_urls: +            return _playlist_from_matches(sportbox_urls, ie='SportBoxEmbed') +          # Look for embedded TED player          mobj = re.search(              r'<iframe[^>]+?src=(["\'])(?P<url>https?://embed(?:-ssl)?\.ted\.com/.+?)\1', webpage) diff --git a/youtube_dl/extractor/sportbox.py b/youtube_dl/extractor/sportbox.py index becdf658f..8686f9d11 100644 --- a/youtube_dl/extractor/sportbox.py +++ b/youtube_dl/extractor/sportbox.py @@ -4,6 +4,7 @@ from __future__ import unicode_literals  import re  from .common import InfoExtractor +from ..compat import compat_urlparse  from ..utils import (      parse_duration,      parse_iso8601, @@ -11,30 +12,31 @@ from ..utils import (  class SportBoxIE(InfoExtractor): -    _VALID_URL = r'https?://news\.sportbox\.ru/Vidy_sporta/(?:[^/]+/)+spbvideo_NI\d+_(?P<display_id>.+)' -    _TESTS = [ -        { -            'url': 'http://news.sportbox.ru/Vidy_sporta/Avtosport/Rossijskij/spbvideo_NI483529_Gonka-2-zaezd-Obyedinenniy-2000-klassi-Turing-i-S', -            'md5': 'ff56a598c2cf411a9a38a69709e97079', -            'info_dict': { -                'id': '80822', -                'ext': 'mp4', -                'title': 'Гонка 2  заезд ««Объединенный 2000»: классы Туринг и Супер-продакшн', -                'description': 'md5:81715fa9c4ea3d9e7915dc8180c778ed', -                'thumbnail': 're:^https?://.*\.jpg$', -                'timestamp': 1411896237, -                'upload_date': '20140928', -                'duration': 4846, -            }, -            'params': { -                # m3u8 download -                'skip_download': True, -            }, -        }, { -            'url': 'http://news.sportbox.ru/Vidy_sporta/billiard/spbvideo_NI486287_CHempionat-mira-po-dinamichnoy-piramide-4', -            'only_matching': True, -        } -    ] +    _VALID_URL = r'https?://news\.sportbox\.ru/(?:[^/]+/)+spbvideo_NI\d+_(?P<display_id>.+)' +    _TESTS = [{ +        'url': 'http://news.sportbox.ru/Vidy_sporta/Avtosport/Rossijskij/spbvideo_NI483529_Gonka-2-zaezd-Obyedinenniy-2000-klassi-Turing-i-S', +        'md5': 'ff56a598c2cf411a9a38a69709e97079', +        'info_dict': { +            'id': '80822', +            'ext': 'mp4', +            'title': 'Гонка 2  заезд ««Объединенный 2000»: классы Туринг и Супер-продакшн', +            'description': 'md5:81715fa9c4ea3d9e7915dc8180c778ed', +            'thumbnail': 're:^https?://.*\.jpg$', +            'timestamp': 1411896237, +            'upload_date': '20140928', +            'duration': 4846, +        }, +        'params': { +            # m3u8 download +            'skip_download': True, +        }, +    }, { +        'url': 'http://news.sportbox.ru/Vidy_sporta/billiard/spbvideo_NI486287_CHempionat-mira-po-dinamichnoy-piramide-4', +        'only_matching': True, +    }, { +        'url': 'http://news.sportbox.ru/video/no_ads/spbvideo_NI536574_V_Novorossijske_proshel_detskij_turnir_Pole_slavy_bojevoj?ci=211355', +        'only_matching': True, +    }]      def _real_extract(self, url):          mobj = re.match(self._VALID_URL, url) @@ -42,35 +44,80 @@ class SportBoxIE(InfoExtractor):          webpage = self._download_webpage(url, display_id) -        video_id = self._search_regex( -            r'src="/vdl/player/media/(\d+)"', webpage, 'video id') - -        player = self._download_webpage( -            'http://news.sportbox.ru/vdl/player/media/%s' % video_id, -            display_id, 'Downloading player webpage') - -        hls = self._search_regex( -            r"var\s+original_hls_file\s*=\s*'([^']+)'", player, 'hls file') - -        formats = self._extract_m3u8_formats(hls, display_id, 'mp4') +        player = self._search_regex( +            r'src="/?(vdl/player/[^"]+)"', webpage, 'player')          title = self._html_search_regex(              r'<h1 itemprop="name">([^<]+)</h1>', webpage, 'title')          description = self._html_search_regex( -            r'(?s)<div itemprop="description">(.+?)</div>', webpage, 'description', fatal=False) +            r'(?s)<div itemprop="description">(.+?)</div>', +            webpage, 'description', fatal=False)          thumbnail = self._og_search_thumbnail(webpage)          timestamp = parse_iso8601(self._search_regex( -            r'<span itemprop="uploadDate">([^<]+)</span>', webpage, 'timestamp', fatal=False)) +            r'<span itemprop="uploadDate">([^<]+)</span>', +            webpage, 'timestamp', fatal=False))          duration = parse_duration(self._html_search_regex( -            r'<meta itemprop="duration" content="PT([^"]+)">', webpage, 'duration', fatal=False)) +            r'<meta itemprop="duration" content="PT([^"]+)">', +            webpage, 'duration', fatal=False))          return { -            'id': video_id, +            '_type': 'url_transparent', +            'url': compat_urlparse.urljoin(url, '/%s' % player),              'display_id': display_id,              'title': title,              'description': description,              'thumbnail': thumbnail,              'timestamp': timestamp,              'duration': duration, +        } + + +class SportBoxEmbedIE(InfoExtractor): +    _VALID_URL = r'https?://news\.sportbox\.ru/vdl/player(?:/[^/]+/|\?.*?\bn?id=)(?P<id>\d+)' +    _TESTS = [{ +        'url': 'http://news.sportbox.ru/vdl/player/ci/211355', +        'info_dict': { +            'id': '211355', +            'ext': 'mp4', +            'title': 'В Новороссийске прошел детский турнир «Поле славы боевой»', +            'thumbnail': 're:^https?://.*\.jpg$', +        }, +        'params': { +            # m3u8 download +            'skip_download': True, +        }, +    }, { +        'url': 'http://news.sportbox.ru/vdl/player?nid=370908&only_player=1&autostart=false&playeri=2&height=340&width=580', +        'only_matching': True, +    }] + +    @staticmethod +    def _extract_urls(webpage): +        return re.findall( +            r'<iframe[^>]+src="(https?://news\.sportbox\.ru/vdl/player[^"]+)"', +            webpage) + +    def _real_extract(self, url): +        video_id = self._match_id(url) + +        webpage = self._download_webpage(url, video_id) + +        hls = self._search_regex( +            r"sportboxPlayer\.jwplayer_common_params\.file\s*=\s*['\"]([^'\"]+)['\"]", +            webpage, 'hls file') + +        formats = self._extract_m3u8_formats(hls, video_id, 'mp4') + +        title = self._search_regex( +            r'sportboxPlayer\.node_title\s*=\s*"([^"]+)"', webpage, 'title') + +        thumbnail = self._search_regex( +            r'sportboxPlayer\.jwplayer_common_params\.image\s*=\s*"([^"]+)"', +            webpage, 'thumbnail', default=None) + +        return { +            'id': video_id, +            'title': title, +            'thumbnail': thumbnail,              'formats': formats,          }  | 
