diff options
author | Sergey M․ <dstftw@gmail.com> | 2015-07-17 00:14:38 +0600 |
---|---|---|
committer | Sergey M․ <dstftw@gmail.com> | 2015-07-17 00:14:38 +0600 |
commit | 03f32a7eadf9d832aef55673edf38023a8daff95 (patch) | |
tree | 9195a0af3aad5a5c171227d7df46c67c359942e0 /youtube_dl/extractor | |
parent | 50ea2bb20d3a3e219910e87b8b30fc79ce534595 (diff) |
[wnl] Add extractor for omroepwnl playlists
Diffstat (limited to 'youtube_dl/extractor')
-rw-r--r-- | youtube_dl/extractor/__init__.py | 1 | ||||
-rw-r--r-- | youtube_dl/extractor/npo.py | 45 |
2 files changed, 40 insertions, 6 deletions
diff --git a/youtube_dl/extractor/__init__.py b/youtube_dl/extractor/__init__.py index 3f4f23521..1d55275dc 100644 --- a/youtube_dl/extractor/__init__.py +++ b/youtube_dl/extractor/__init__.py @@ -390,6 +390,7 @@ from .npo import ( NPORadioIE, NPORadioFragmentIE, TegenlichtVproIE, + WNLIE ) from .nrk import ( NRKIE, diff --git a/youtube_dl/extractor/npo.py b/youtube_dl/extractor/npo.py index cf6a388e5..c6bf7619d 100644 --- a/youtube_dl/extractor/npo.py +++ b/youtube_dl/extractor/npo.py @@ -46,12 +46,15 @@ class NPOIE(NPOBaseIE): IE_NAME = 'npo' IE_DESC = 'npo.nl and ntr.nl' _VALID_URL = r'''(?x) - https?:// - (?:www\.)? - (?: - npo\.nl/(?!live|radio)(?:[^/]+/){2}| - ntr\.nl/(?:[^/]+/){2,}| - omroepwnl\.nl/video/fragment/[^/]+__ + (?: + npo:| + https?:// + (?:www\.)? + (?: + npo\.nl/(?!live|radio)(?:[^/]+/){2}| + ntr\.nl/(?:[^/]+/){2,}| + omroepwnl\.nl/video/fragment/[^/]+__ + ) ) (?P<id>[^/?#]+) ''' @@ -426,3 +429,33 @@ class TegenlichtVproIE(NPOIE): info_page = self._download_json( 'http://rs.vpro.nl/v2/api/media/%s.json' % urn, name) return self._get_info(info_page['mid']) + + +class WNLIE(InfoExtractor): + _VALID_URL = r'https?://(?:www\.)?omroepwnl\.nl/video/detail/(?P<id>[^/]+)__\d+' + + _TEST = { + 'url': 'http://www.omroepwnl.nl/video/detail/vandaag-de-dag-6-mei__060515', + 'info_dict': { + 'id': 'vandaag-de-dag-6-mei', + 'title': 'Vandaag de Dag 6 mei', + }, + 'playlist_count': 4, + } + + def _real_extract(self, url): + playlist_id = self._match_id(url) + + webpage = self._download_webpage(url, playlist_id) + + entries = [ + self.url_result('npo:%s' % video_id, 'NPO') + for video_id, part in re.findall( + r'<a[^>]+href="([^"]+)"[^>]+class="js-mid"[^>]*>(Deel \d+)', webpage) + ] + + playlist_title = self._html_search_regex( + r'(?s)<h1[^>]+class="subject"[^>]*>(.+?)</h1>', + webpage, 'playlist title') + + return self.playlist_result(entries, playlist_id, playlist_title) |