aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorSergey M․ <dstftw@gmail.com>2017-01-01 21:25:25 +0700
committerSergey M․ <dstftw@gmail.com>2017-01-01 21:26:32 +0700
commit966815e1390386948957ec5f269f470e87b85b2b (patch)
tree9e89739d62dc0cf6a73229a8753ed51d7949ca29
parente5e19379be8d2d721178e9b54780cdcce369939c (diff)
[nrktv:episodes] Add support for episodes (#11571)
-rw-r--r--youtube_dl/extractor/extractors.py1
-rw-r--r--youtube_dl/extractor/nrk.py66
2 files changed, 51 insertions, 16 deletions
diff --git a/youtube_dl/extractor/extractors.py b/youtube_dl/extractor/extractors.py
index 3b0bff0d7..3017bf56c 100644
--- a/youtube_dl/extractor/extractors.py
+++ b/youtube_dl/extractor/extractors.py
@@ -655,6 +655,7 @@ from .nrk import (
NRKSkoleIE,
NRKTVIE,
NRKTVDirekteIE,
+ NRKTVEpisodesIE,
)
from .ntvde import NTVDeIE
from .ntvru import NTVRuIE
diff --git a/youtube_dl/extractor/nrk.py b/youtube_dl/extractor/nrk.py
index 776c40b94..ea7be005a 100644
--- a/youtube_dl/extractor/nrk.py
+++ b/youtube_dl/extractor/nrk.py
@@ -207,7 +207,15 @@ class NRKIE(NRKBaseIE):
class NRKTVIE(NRKBaseIE):
IE_DESC = 'NRK TV and NRK Radio'
- _VALID_URL = r'https?://(?:tv|radio)\.nrk(?:super)?\.no/(?:serie/[^/]+|program)/(?P<id>[a-zA-Z]{4}\d{8})(?:/\d{2}-\d{2}-\d{4})?(?:#del=(?P<part_id>\d+))?'
+ _EPISODE_RE = r'(?P<id>[a-zA-Z]{4}\d{8})'
+ _VALID_URL = r'''(?x)
+ https?://
+ (?:tv|radio)\.nrk(?:super)?\.no/
+ (?:serie/[^/]+|program)/
+ (?![Ee]pisodes)%s
+ (?:/\d{2}-\d{2}-\d{4})?
+ (?:\#del=(?P<part_id>\d+))?
+ ''' % _EPISODE_RE
_API_HOST = 'psapi-we.nrk.no'
_TESTS = [{
@@ -286,9 +294,30 @@ class NRKTVDirekteIE(NRKTVIE):
}]
-class NRKPlaylistIE(InfoExtractor):
- _VALID_URL = r'https?://(?:www\.)?nrk\.no/(?!video|skole)(?:[^/]+/)+(?P<id>[^/]+)'
+class NRKPlaylistBaseIE(InfoExtractor):
+ def _extract_description(self, webpage):
+ pass
+
+ def _real_extract(self, url):
+ playlist_id = self._match_id(url)
+
+ webpage = self._download_webpage(url, playlist_id)
+
+ entries = [
+ self.url_result('nrk:%s' % video_id, NRKIE.ie_key())
+ for video_id in re.findall(self._ITEM_RE, webpage)
+ ]
+
+ playlist_title = self. _extract_title(webpage)
+ playlist_description = self._extract_description(webpage)
+
+ return self.playlist_result(
+ entries, playlist_id, playlist_title, playlist_description)
+
+class NRKPlaylistIE(NRKPlaylistBaseIE):
+ _VALID_URL = r'https?://(?:www\.)?nrk\.no/(?!video|skole)(?:[^/]+/)+(?P<id>[^/]+)'
+ _ITEM_RE = r'class="[^"]*\brich\b[^"]*"[^>]+data-video-id="([^"]+)"'
_TESTS = [{
'url': 'http://www.nrk.no/troms/gjenopplev-den-historiske-solformorkelsen-1.12270763',
'info_dict': {
@@ -307,23 +336,28 @@ class NRKPlaylistIE(InfoExtractor):
'playlist_count': 5,
}]
- def _real_extract(self, url):
- playlist_id = self._match_id(url)
+ def _extract_title(self, webpage):
+ return self._og_search_title(webpage, fatal=False)
- webpage = self._download_webpage(url, playlist_id)
+ def _extract_description(self, webpage):
+ return self._og_search_description(webpage)
- entries = [
- self.url_result('nrk:%s' % video_id, 'NRK')
- for video_id in re.findall(
- r'class="[^"]*\brich\b[^"]*"[^>]+data-video-id="([^"]+)"',
- webpage)
- ]
- playlist_title = self._og_search_title(webpage)
- playlist_description = self._og_search_description(webpage)
+class NRKTVEpisodesIE(NRKPlaylistBaseIE):
+ _VALID_URL = r'https?://tv\.nrk\.no/program/[Ee]pisodes/[^/]+/(?P<id>\d+)'
+ _ITEM_RE = r'data-episode=["\']%s' % NRKTVIE._EPISODE_RE
+ _TESTS = [{
+ 'url': 'https://tv.nrk.no/program/episodes/nytt-paa-nytt/69031',
+ 'info_dict': {
+ 'id': '69031',
+ 'title': 'Nytt på nytt, sesong: 201210',
+ },
+ 'playlist_count': 4,
+ }]
- return self.playlist_result(
- entries, playlist_id, playlist_title, playlist_description)
+ def _extract_title(self, webpage):
+ return self._html_search_regex(
+ r'<h1>([^<]+)</h1>', webpage, 'title', fatal=False)
class NRKSkoleIE(InfoExtractor):