diff options
author | Sergey M․ <dstftw@gmail.com> | 2018-05-02 07:21:24 +0700 |
---|---|---|
committer | Sergey M․ <dstftw@gmail.com> | 2018-05-02 07:21:24 +0700 |
commit | ea1f5e5dbd6c58d4f0872a65b97611732f4b29bd (patch) | |
tree | 68b83f271749afec55332f097ae40c0f6224f259 /youtube_dl/extractor/itv.py | |
parent | 5f95927a62a533b9e616abb5f1481cedeaa16a4a (diff) |
[itv:btcc] Add extractor (closes #16139)
Diffstat (limited to 'youtube_dl/extractor/itv.py')
-rw-r--r-- | youtube_dl/extractor/itv.py | 37 |
1 files changed, 37 insertions, 0 deletions
diff --git a/youtube_dl/extractor/itv.py b/youtube_dl/extractor/itv.py index 457b424a2..6a4f8a505 100644 --- a/youtube_dl/extractor/itv.py +++ b/youtube_dl/extractor/itv.py @@ -7,6 +7,7 @@ import json import re from .common import InfoExtractor +from .brightcove import BrightcoveNewIE from ..compat import ( compat_str, compat_etree_register_namespace, @@ -18,6 +19,7 @@ from ..utils import ( xpath_text, int_or_none, parse_duration, + smuggle_url, ExtractorError, determine_ext, ) @@ -260,3 +262,38 @@ class ITVIE(InfoExtractor): 'subtitles': subtitles, }) return info + + +class ITVBTCCIE(InfoExtractor): + _VALID_URL = r'https?://(?:www\.)?itv\.com/btcc/(?:[^/]+/)*(?P<id>[^/?#&]+)' + _TEST = { + 'url': 'http://www.itv.com/btcc/races/btcc-2018-all-the-action-from-brands-hatch', + 'info_dict': { + 'id': 'btcc-2018-all-the-action-from-brands-hatch', + 'title': 'BTCC 2018: All the action from Brands Hatch', + }, + 'playlist_mincount': 9, + } + BRIGHTCOVE_URL_TEMPLATE = 'http://players.brightcove.net/1582188683001/HkiHLnNRx_default/index.html?videoId=%s' + + def _real_extract(self, url): + playlist_id = self._match_id(url) + + webpage = self._download_webpage(url, playlist_id) + + entries = [ + self.url_result( + smuggle_url(self.BRIGHTCOVE_URL_TEMPLATE % video_id, { + # ITV does not like some GB IP ranges, so here are some + # IP blocks it accepts + 'geo_ip_blocks': [ + '193.113.0.0/16', '54.36.162.0/23', '159.65.16.0/21' + ], + 'referrer': url, + }), + ie=BrightcoveNewIE.ie_key(), video_id=video_id) + for video_id in re.findall(r'data-video-id=["\'](\d+)', webpage)] + + title = self._og_search_title(webpage, fatal=False) + + return self.playlist_result(entries, playlist_id, title) |