diff options
author | Sergey M․ <dstftw@gmail.com> | 2017-03-11 23:16:51 +0700 |
---|---|---|
committer | Sergey M․ <dstftw@gmail.com> | 2017-03-11 23:16:51 +0700 |
commit | 70497994707b6bcbd6c0ecc3fb1073be4d75d970 (patch) | |
tree | d65c700fe99206cf72711020a25b596326de3237 /youtube_dl/extractor/discoverygo.py | |
parent | 4605c94d1a386a71f170dc46d491c4ef78828753 (diff) |
[discoverygo:playlist] Add extractor (closes #12424)
Diffstat (limited to 'youtube_dl/extractor/discoverygo.py')
-rw-r--r-- | youtube_dl/extractor/discoverygo.py | 59 |
1 files changed, 55 insertions, 4 deletions
diff --git a/youtube_dl/extractor/discoverygo.py b/youtube_dl/extractor/discoverygo.py index 2042493a8..d14de8f00 100644 --- a/youtube_dl/extractor/discoverygo.py +++ b/youtube_dl/extractor/discoverygo.py @@ -1,17 +1,21 @@ from __future__ import unicode_literals +import re + from .common import InfoExtractor from ..compat import compat_str from ..utils import ( extract_attributes, + ExtractorError, int_or_none, parse_age_limit, - ExtractorError, + remove_end, + unescapeHTML, ) -class DiscoveryGoIE(InfoExtractor): - _VALID_URL = r'''(?x)https?://(?:www\.)?(?: +class DiscoveryGoBaseIE(InfoExtractor): + _VALID_URL_TEMPLATE = r'''(?x)https?://(?:www\.)?(?: discovery| investigationdiscovery| discoverylife| @@ -21,7 +25,11 @@ class DiscoveryGoIE(InfoExtractor): sciencechannel| tlc| velocitychannel - )go\.com/(?:[^/]+/)*(?P<id>[^/?#&]+)''' + )go\.com/%s(?P<id>[^/?#&]+)''' + + +class DiscoveryGoIE(DiscoveryGoBaseIE): + _VALID_URL = DiscoveryGoBaseIE._VALID_URL_TEMPLATE % r'(?:[^/]+/)+' _TEST = { 'url': 'https://www.discoverygo.com/love-at-first-kiss/kiss-first-ask-questions-later/', 'info_dict': { @@ -113,3 +121,46 @@ class DiscoveryGoIE(InfoExtractor): 'formats': formats, 'subtitles': subtitles, } + + +class DiscoveryGoPlaylistIE(DiscoveryGoBaseIE): + _VALID_URL = DiscoveryGoBaseIE._VALID_URL_TEMPLATE % '' + _TEST = { + 'url': 'https://www.discoverygo.com/bering-sea-gold/', + 'info_dict': { + 'id': 'bering-sea-gold', + 'title': 'Bering Sea Gold', + 'description': 'md5:cc5c6489835949043c0cc3ad66c2fa0e', + }, + 'playlist_mincount': 6, + } + + @classmethod + def suitable(cls, url): + return False if DiscoveryGoIE.suitable(url) else super( + DiscoveryGoPlaylistIE, cls).suitable(url) + + def _real_extract(self, url): + display_id = self._match_id(url) + + webpage = self._download_webpage(url, display_id) + + entries = [] + for mobj in re.finditer(r'data-json=(["\'])(?P<json>{.+?})\1', webpage): + data = self._parse_json( + mobj.group('json'), display_id, + transform_source=unescapeHTML, fatal=False) + if not isinstance(data, dict) or data.get('type') != 'episode': + continue + episode_url = data.get('socialUrl') + if not episode_url: + continue + entries.append(self.url_result( + episode_url, ie=DiscoveryGoIE.ie_key(), + video_id=data.get('id'))) + + return self.playlist_result( + entries, display_id, + remove_end(self._og_search_title( + webpage, fatal=False), ' | Discovery GO'), + self._og_search_description(webpage)) |