diff options
| author | Sergey M․ <dstftw@gmail.com> | 2018-12-07 00:49:24 +0700 | 
|---|---|---|
| committer | Sergey M․ <dstftw@gmail.com> | 2018-12-07 00:49:24 +0700 | 
| commit | 15699ec8b0a9cb8d519b721a5cb8199afe62fc3c (patch) | |
| tree | 535da247b8fba92598d2f13a030fafb311f8e6df | |
| parent | 33cc1ea586480ccc60fd25f2d42cfb44eec605c6 (diff) | |
[nrktv:season,series] Fix extraction and update tests (closes #17159, closes #17258)
| -rw-r--r-- | youtube_dl/extractor/nrk.py | 68 | 
1 files changed, 43 insertions, 25 deletions
| diff --git a/youtube_dl/extractor/nrk.py b/youtube_dl/extractor/nrk.py index c5001ef48..48bc6fd7a 100644 --- a/youtube_dl/extractor/nrk.py +++ b/youtube_dl/extractor/nrk.py @@ -211,13 +211,13 @@ class NRKIE(NRKBaseIE):      _TESTS = [{          # video          'url': 'http://www.nrk.no/video/PS*150533', -        'md5': '2f7f6eeb2aacdd99885f355428715cfa', +        'md5': '706f34cdf1322577589e369e522b50ef',          'info_dict': {              'id': '150533',              'ext': 'mp4',              'title': 'Dompap og andre fugler i Piip-Show',              'description': 'md5:d9261ba34c43b61c812cb6b0269a5c8f', -            'duration': 263, +            'duration': 262,          }      }, {          # audio @@ -256,14 +256,14 @@ class NRKTVIE(NRKBaseIE):      _API_HOSTS = ('psapi-ne.nrk.no', 'psapi-we.nrk.no')      _TESTS = [{          'url': 'https://tv.nrk.no/serie/20-spoersmaal-tv/MUHH48000314/23-05-2014', -        'md5': '4e9ca6629f09e588ed240fb11619922a', +        'md5': '9a167e54d04671eb6317a37b7bc8a280',          'info_dict': {              'id': 'MUHH48000314AA',              'ext': 'mp4',              'title': '20 spørsmål 23.05.2014',              'description': 'md5:bdea103bc35494c143c6a9acdd84887a',              'duration': 1741, -            'series': '20 spørsmål - TV', +            'series': '20 spørsmål',              'episode': '23.05.2014',          },      }, { @@ -301,7 +301,7 @@ class NRKTVIE(NRKBaseIE):                  'id': 'MSPO40010515AH',                  'ext': 'mp4',                  'title': 'Sprint fri teknikk, kvinner og menn 06.01.2015 (Part 1)', -                'description': 'md5:c03aba1e917561eface5214020551b7a', +                'description': 'md5:1f97a41f05a9486ee00c56f35f82993d',                  'duration': 772,                  'series': 'Tour de Ski',                  'episode': '06.01.2015', @@ -314,7 +314,7 @@ class NRKTVIE(NRKBaseIE):                  'id': 'MSPO40010515BH',                  'ext': 'mp4',                  'title': 'Sprint fri teknikk, kvinner og menn 06.01.2015 (Part 2)', -                'description': 'md5:c03aba1e917561eface5214020551b7a', +                'description': 'md5:1f97a41f05a9486ee00c56f35f82993d',                  'duration': 6175,                  'series': 'Tour de Ski',                  'episode': '06.01.2015', @@ -326,7 +326,7 @@ class NRKTVIE(NRKBaseIE):          'info_dict': {              'id': 'MSPO40010515',              'title': 'Sprint fri teknikk, kvinner og menn 06.01.2015', -            'description': 'md5:c03aba1e917561eface5214020551b7a', +            'description': 'md5:1f97a41f05a9486ee00c56f35f82993d',          },          'expected_warnings': ['Video is geo restricted'],      }, { @@ -406,21 +406,35 @@ class NRKTVSerieBaseIE(InfoExtractor):      def _extract_series(self, webpage, display_id, fatal=True):          config = self._parse_json(              self._search_regex( -                r'({.+?})\s*,\s*"[^"]+"\s*\)\s*</script>', webpage, 'config', -                default='{}' if not fatal else NO_DEFAULT), +                (r'INITIAL_DATA_*\s*=\s*({.+?})\s*;', +                 r'({.+?})\s*,\s*"[^"]+"\s*\)\s*</script>'), +                webpage, 'config', default='{}' if not fatal else NO_DEFAULT),              display_id, fatal=False)          if not config:              return -        return try_get(config, lambda x: x['series'], dict) +        return try_get( +            config, +            (lambda x: x['initialState']['series'], lambda x: x['series']), +            dict) + +    def _extract_seasons(self, seasons): +        if not isinstance(seasons, list): +            return [] +        entries = [] +        for season in seasons: +            entries.extend(self._extract_episodes(season)) +        return entries      def _extract_episodes(self, season): -        entries = []          if not isinstance(season, dict): -            return entries -        episodes = season.get('episodes') -        if not isinstance(episodes, list): -            return entries -        for episode in episodes: +            return [] +        return self._extract_entries(season.get('episodes')) + +    def _extract_entries(self, entry_list): +        if not isinstance(entry_list, list): +            return [] +        entries = [] +        for episode in entry_list:              nrk_id = episode.get('prfId')              if not nrk_id or not isinstance(nrk_id, compat_str):                  continue @@ -465,7 +479,7 @@ class NRKTVSeriesIE(NRKTVSerieBaseIE):      _VALID_URL = r'https?://(?:tv|radio)\.nrk(?:super)?\.no/serie/(?P<id>[^/]+)'      _ITEM_RE = r'(?:data-season=["\']|id=["\']season-)(?P<id>\d+)'      _TESTS = [{ -        # new layout +        # new layout, seasons          'url': 'https://tv.nrk.no/serie/backstage',          'info_dict': {              'id': 'backstage', @@ -474,20 +488,21 @@ class NRKTVSeriesIE(NRKTVSerieBaseIE):          },          'playlist_mincount': 60,      }, { -        # old layout +        # new layout, instalments          'url': 'https://tv.nrk.no/serie/groenn-glede',          'info_dict': {              'id': 'groenn-glede',              'title': 'Grønn glede',              'description': 'md5:7576e92ae7f65da6993cf90ee29e4608',          }, -        'playlist_mincount': 9, +        'playlist_mincount': 10,      }, { -        'url': 'http://tv.nrksuper.no/serie/labyrint', +        # old layout +        'url': 'https://tv.nrksuper.no/serie/labyrint',          'info_dict': {              'id': 'labyrint',              'title': 'Labyrint', -            'description': 'md5:58afd450974c89e27d5a19212eee7115', +            'description': 'md5:318b597330fdac5959247c9b69fdb1ec',          },          'playlist_mincount': 3,      }, { @@ -520,11 +535,11 @@ class NRKTVSeriesIE(NRKTVSerieBaseIE):              description = try_get(                  series, lambda x: x['titles']['subtitle'], compat_str)              entries = [] -            for season in series['seasons']: -                entries.extend(self._extract_episodes(season)) +            entries.extend(self._extract_seasons(series.get('seasons'))) +            entries.extend(self._extract_entries(series.get('instalments')))              return self.playlist_result(entries, series_id, title, description) -        # Old layout (e.g. https://tv.nrk.no/serie/groenn-glede) +        # Old layout (e.g. https://tv.nrksuper.no/serie/labyrint)          entries = [              self.url_result(                  'https://tv.nrk.no/program/Episodes/{series}/{season}'.format( @@ -536,6 +551,9 @@ class NRKTVSeriesIE(NRKTVSerieBaseIE):              'seriestitle', webpage,              'title', default=None) or self._og_search_title(              webpage, fatal=False) +        if title: +            title = self._search_regex( +                r'NRK (?:Super )?TV\s*[-–]\s*(.+)', title, 'title', default=title)          description = self._html_search_meta(              'series_description', webpage, @@ -596,7 +614,7 @@ class NRKPlaylistIE(NRKPlaylistBaseIE):              'title': 'Rivertonprisen til Karin Fossum',              'description': 'Første kvinne på 15 år til å vinne krimlitteraturprisen.',          }, -        'playlist_count': 5, +        'playlist_count': 2,      }]      def _extract_title(self, webpage): | 
