diff options
-rw-r--r-- | youtube_dl/extractor/npo.py | 63 |
1 files changed, 26 insertions, 37 deletions
diff --git a/youtube_dl/extractor/npo.py b/youtube_dl/extractor/npo.py index 3293bdb17..f95867d58 100644 --- a/youtube_dl/extractor/npo.py +++ b/youtube_dl/extractor/npo.py @@ -438,9 +438,29 @@ class SchoolTVIE(InfoExtractor): } -class VPROIE(NPOIE): +class NPOPlaylistBaseIE(NPOIE): + def _real_extract(self, url): + playlist_id = self._match_id(url) + + webpage = self._download_webpage(url, playlist_id) + + entries = [ + self.url_result('npo:%s' % video_id if not video_id.startswith('http') else video_id) + for video_id in re.findall(self._PLAYLIST_ENTRY_RE, webpage) + ] + + playlist_title = self._html_search_regex( + self._PLAYLIST_TITLE_RE, webpage, 'playlist title', + default=None) or self._og_search_title(webpage) + + return self.playlist_result(entries, playlist_id, playlist_title) + + +class VPROIE(NPOPlaylistBaseIE): IE_NAME = 'vpro' _VALID_URL = r'https?://(?:www\.)?(?:tegenlicht\.)?vpro\.nl/(?:[^/]+/){2,}(?P<id>[^/]+)\.html' + _PLAYLIST_TITLE_RE = r'<title>\s*([^>]+?)\s*-\s*Teledoc\s*-\s*VPRO\s*</title>' + _PLAYLIST_ENTRY_RE = r'data-media-id="([^"]+)"' _TESTS = [ { @@ -473,48 +493,17 @@ class VPROIE(NPOIE): } ] - def _real_extract(self, url): - playlist_id = self._match_id(url) - - webpage = self._download_webpage(url, playlist_id) - - entries = [ - self.url_result('npo:%s' % video_id if not video_id.startswith('http') else video_id) - for video_id in re.findall(r'data-media-id="([^"]+)"', webpage) - ] - - playlist_title = self._search_regex( - r'<title>\s*([^>]+?)\s*-\s*Teledoc\s*-\s*VPRO\s*</title>', - webpage, 'playlist title', default=None) or self._og_search_title(webpage) - - return self.playlist_result(entries, playlist_id, playlist_title) - -class WNLIE(InfoExtractor): +class WNLIE(NPOPlaylistBaseIE): _VALID_URL = r'https?://(?:www\.)?omroepwnl\.nl/video/detail/(?P<id>[^/]+)__\d+' + _PLAYLIST_TITLE_RE = r'(?s)<h1[^>]+class="subject"[^>]*>(.+?)</h1>' + _PLAYLIST_ENTRY_RE = r'<a[^>]+href="([^"]+)"[^>]+class="js-mid"[^>]*>Deel \d+' - _TEST = { + _TESTS = [{ 'url': 'http://www.omroepwnl.nl/video/detail/vandaag-de-dag-6-mei__060515', 'info_dict': { 'id': 'vandaag-de-dag-6-mei', 'title': 'Vandaag de Dag 6 mei', }, 'playlist_count': 4, - } - - def _real_extract(self, url): - playlist_id = self._match_id(url) - - webpage = self._download_webpage(url, playlist_id) - - entries = [ - self.url_result('npo:%s' % video_id, 'NPO') - for video_id, part in re.findall( - r'<a[^>]+href="([^"]+)"[^>]+class="js-mid"[^>]*>(Deel \d+)', webpage) - ] - - playlist_title = self._html_search_regex( - r'(?s)<h1[^>]+class="subject"[^>]*>(.+?)</h1>', - webpage, 'playlist title') - - return self.playlist_result(entries, playlist_id, playlist_title) + }] |