diff options
| author | Odd Stråbø <oddstr13@openshell.no> | 2017-01-14 02:36:04 +0100 | 
|---|---|---|
| committer | Sergey M․ <dstftw@gmail.com> | 2017-01-31 23:10:40 +0700 | 
| commit | 8fd65faece98139def3a6538e98053bebd400263 (patch) | |
| tree | 7c4f4837aad5b276d0a7d09c368125def5a2054e | |
| parent | d7e215b42dcaf71298a7e1dc953cf93523b3da81 (diff) | |
[NRKTV] Added NRKTVSeriesIE
[NRKTV] Added season and episode number to metadata.
[NRKTV] Added category to metadata.
[NRKTV] Added tests to NRKTVSeries.
[NRKTV] Fixed whitespace issues (flake8).
| -rw-r--r-- | youtube_dl/extractor/extractors.py | 1 | ||||
| -rw-r--r-- | youtube_dl/extractor/nrk.py | 49 | 
2 files changed, 50 insertions, 0 deletions
| diff --git a/youtube_dl/extractor/extractors.py b/youtube_dl/extractor/extractors.py index 2590b5e1b..06e6d4620 100644 --- a/youtube_dl/extractor/extractors.py +++ b/youtube_dl/extractor/extractors.py @@ -668,6 +668,7 @@ from .nrk import (      NRKTVIE,      NRKTVDirekteIE,      NRKTVEpisodesIE, +    NRKTVSeriesIE,  )  from .ntvde import NTVDeIE  from .ntvru import NTVRuIE diff --git a/youtube_dl/extractor/nrk.py b/youtube_dl/extractor/nrk.py index ea7be005a..26604f84f 100644 --- a/youtube_dl/extractor/nrk.py +++ b/youtube_dl/extractor/nrk.py @@ -128,6 +128,18 @@ class NRKBaseIE(InfoExtractor):          series = conviva.get('seriesName') or data.get('seriesTitle')          episode = conviva.get('episodeName') or data.get('episodeNumberOrDate') +        season_number = None +        episode_number = None +        if data.get('mediaElementType') == 'Episode': +            _season_episode = data.get('scoresStatistics', {}).get('springStreamStream') or \ +                data.get('relativeOriginUrl', '') +            EPISODENUM_RE = [ +                r'/s(?P<season>\d+)e(?P<episode>\d+)\.', +                r'/sesong-(?P<season>\d+)/episode-(?P<episode>\d+)', +            ] +            season_number = int_or_none(self._search_regex(EPISODENUM_RE, _season_episode, "S##E##", fatal=False, group='season')) +            episode_number = int_or_none(self._search_regex(EPISODENUM_RE, _season_episode, "S##E##", fatal=False, group='episode')) +          thumbnails = None          images = data.get('images')          if images and isinstance(images, dict): @@ -140,11 +152,15 @@ class NRKBaseIE(InfoExtractor):                  } for image in web_images if image.get('imageUrl')]          description = data.get('description') +        category = data.get('mediaAnalytics', {}).get('category')          common_info = {              'description': description,              'series': series,              'episode': episode, +            'season_number': season_number, +            'episode_number': episode_number, +            'categories': [category] if category else None,              'age_limit': parse_age_limit(data.get('legalAge')),              'thumbnails': thumbnails,          } @@ -360,6 +376,39 @@ class NRKTVEpisodesIE(NRKPlaylistBaseIE):              r'<h1>([^<]+)</h1>', webpage, 'title', fatal=False) +class NRKTVSeriesIE(InfoExtractor): +    _VALID_URL = r'https?://tv\.nrk\.no/serie/(?P<id>[^/]+)/?' +    _ITEM_RE = r'data-season=["\'](?P<id>\d+)["\']' +    _TESTS = [{ +        'url': 'https://tv.nrk.no/serie/broedrene-dal-og-spektralsteinene', +        'playlist_count': 1, +    }, { +        'url': 'https://tv.nrk.no/serie/saving-the-human-race', +        'playlist_count': 1, +    }, { +        'url': 'https://tv.nrk.no/serie/postmann-pat', +        'playlist_count': 3, +    }, { +        'url': 'https://tv.nrk.no/serie/groenn-glede', +        'playlist_count': 9, +    }] + +    def _real_extract(self, url): +        series_id = self._match_id(url) + +        webpage = self._download_webpage(url, series_id) + +        entries = [ +            self.url_result('https://tv.nrk.no/program/Episodes/{series}/{season}'.format( +                series=series_id, +                season=season_id +            )) +            for season_id in re.findall(self._ITEM_RE, webpage) +        ] + +        return self.playlist_result(entries) + +  class NRKSkoleIE(InfoExtractor):      IE_DESC = 'NRK Skole'      _VALID_URL = r'https?://(?:www\.)?nrk\.no/skole/?\?.*\bmediaId=(?P<id>\d+)' | 
