diff options
author | Sergey M․ <dstftw@gmail.com> | 2015-03-21 18:22:08 +0600 |
---|---|---|
committer | Sergey M․ <dstftw@gmail.com> | 2015-03-21 18:22:08 +0600 |
commit | faa1b5c2920f1d50f01b83e96b5b0f136edc74f7 (patch) | |
tree | 83fc49c8fc0f8651691a37219f0879b353362b20 | |
parent | 393d9fc6d28704ef7b60acb1dfc785493ecbe0eb (diff) |
[nrk:playlist] Add extractor (Closes #5245)
-rw-r--r-- | youtube_dl/extractor/__init__.py | 1 | ||||
-rw-r--r-- | youtube_dl/extractor/nrk.py | 31 |
2 files changed, 32 insertions, 0 deletions
diff --git a/youtube_dl/extractor/__init__.py b/youtube_dl/extractor/__init__.py index 7eb9b4fbb..e25e4a582 100644 --- a/youtube_dl/extractor/__init__.py +++ b/youtube_dl/extractor/__init__.py @@ -346,6 +346,7 @@ from .npo import ( ) from .nrk import ( NRKIE, + NRKPlaylistIE, NRKTVIE, ) from .ntvde import NTVDeIE diff --git a/youtube_dl/extractor/nrk.py b/youtube_dl/extractor/nrk.py index 117a78aa1..c2a8202dd 100644 --- a/youtube_dl/extractor/nrk.py +++ b/youtube_dl/extractor/nrk.py @@ -76,6 +76,37 @@ class NRKIE(InfoExtractor): } +class NRKPlaylistIE(InfoExtractor): + _VALID_URL = r'http://(?:www\.)?nrk\.no/(?:[^/]+/)*(?P<id>[^/]+)' + + _TEST = { + 'url': 'http://www.nrk.no/troms/gjenopplev-den-historiske-solformorkelsen-1.12270763', + 'info_dict': { + 'id': 'gjenopplev-den-historiske-solformorkelsen-1.12270763', + 'title': 'Gjenopplev den historiske solformørkelsen', + 'description': 'md5:c2df8ea3bac5654a26fc2834a542feed', + }, + 'playlist_mincount': 2, + } + + def _real_extract(self, url): + playlist_id = self._match_id(url) + + webpage = self._download_webpage(url, playlist_id) + + entries = [ + self.url_result('nrk:%s' % video_id, 'NRK') + for video_id in re.findall( + r'class="[^"]*\brich\b[^"]*"[^>]+data-video-id="(\d+)"', webpage) + ] + + playlist_title = self._og_search_title(webpage) + playlist_description = self._og_search_description(webpage) + + return self.playlist_result( + entries, playlist_id, playlist_title, playlist_description) + + class NRKTVIE(InfoExtractor): _VALID_URL = r'(?P<baseurl>http://tv\.nrk(?:super)?\.no/)(?:serie/[^/]+|program)/(?P<id>[a-zA-Z]{4}\d{8})(?:/\d{2}-\d{2}-\d{4})?(?:#del=(?P<part_id>\d+))?' |