diff options
| author | Remita Amine <remitamine@gmail.com> | 2020-12-04 18:04:38 +0100 | 
|---|---|---|
| committer | Remita Amine <remitamine@gmail.com> | 2020-12-04 18:04:38 +0100 | 
| commit | 5e95e18ce949e759e8e26de76c386c44e50b2abd (patch) | |
| tree | b952756b5a0f1d675923acdfeb2da1b9836d4003 | |
| parent | e91df0c5501bd5df9987e310a37df51129ee1cf2 (diff) | |
[nrk] improve format extraction and geo-restriction detection (closes #24221)
| -rw-r--r-- | youtube_dl/extractor/nrk.py | 43 | 
1 files changed, 21 insertions, 22 deletions
diff --git a/youtube_dl/extractor/nrk.py b/youtube_dl/extractor/nrk.py index 0c4b126ed..19d820f61 100644 --- a/youtube_dl/extractor/nrk.py +++ b/youtube_dl/extractor/nrk.py @@ -24,6 +24,11 @@ from ..utils import (  class NRKBaseIE(InfoExtractor):      _GEO_COUNTRIES = ['NO'] +    def _extract_nrk_formats(self, asset_url, video_id): +        return self._extract_m3u8_formats( +            re.sub(r'(?:bw_(?:low|high)=\d+|no_audio_only)&?', '', asset_url), +            video_id, 'mp4', 'm3u8_native', fatal=False) +  class NRKIE(NRKBaseIE):      _VALID_URL = r'''(?x) @@ -94,9 +99,7 @@ class NRKIE(NRKBaseIE):              if not format_url:                  continue              if asset.get('format') == 'HLS' or determine_ext(format_url) == 'm3u8': -                formats.extend(self._extract_m3u8_formats( -                    format_url, video_id, 'mp4', entry_protocol='m3u8_native', -                    m3u8_id='hls', fatal=False)) +                formats.extend(self._extract_nrk_formats(format_url, video_id))          self._sort_formats(formats)          data = self._download_json( @@ -298,6 +301,7 @@ class NRKTVIE(NRKBaseIE):          title = data.get('fullTitle') or data.get('mainTitle') or data['title']          video_id = data.get('id') or video_id +        urls = []          entries = []          conviva = data.get('convivaStatistics') or {} @@ -314,19 +318,13 @@ class NRKTVIE(NRKBaseIE):                          else ('%s-%d' % (video_id, idx), '%s (Part %d)' % (title, idx)))              for num, asset in enumerate(media_assets, 1):                  asset_url = asset.get('url') -                if not asset_url: +                if not asset_url or asset_url in urls:                      continue -                formats = self._extract_akamai_formats(asset_url, video_id) +                formats = extract_nrk_formats(asset_url, video_id)                  if not formats:                      continue                  self._sort_formats(formats) -                # Some f4m streams may not work with hdcore in fragments' URLs -                for f in formats: -                    extra_param = f.get('extra_param_to_segment_url') -                    if extra_param and 'hdcore' in extra_param: -                        del f['extra_param_to_segment_url'] -                  entry_id, entry_title = video_id_and_title(num)                  duration = parse_duration(asset.get('duration'))                  subtitles = {} @@ -346,16 +344,17 @@ class NRKTVIE(NRKBaseIE):          if not entries:              media_url = data.get('mediaUrl') -            if media_url: -                formats = self._extract_akamai_formats(media_url, video_id) -                self._sort_formats(formats) -                duration = parse_duration(data.get('duration')) -                entries = [{ -                    'id': video_id, -                    'title': make_title(title), -                    'duration': duration, -                    'formats': formats, -                }] +            if media_url and media_url not in urls: +                formats = extract_nrk_formats(media_url, video_id) +                if formats: +                    self._sort_formats(formats) +                    duration = parse_duration(data.get('duration')) +                    entries = [{ +                        'id': video_id, +                        'title': make_title(title), +                        'duration': duration, +                        'formats': formats, +                    }]          if not entries:              MESSAGES = { @@ -366,7 +365,7 @@ class NRKTVIE(NRKBaseIE):              }              message_type = data.get('messageType', '')              # Can be ProgramIsGeoBlocked or ChannelIsGeoBlocked* -            if 'IsGeoBlocked' in message_type: +            if 'IsGeoBlocked' in message_type or try_get(data, lambda x: x['usageRights']['isGeoBlocked']) is Trues:                  self.raise_geo_restricted(                      msg=MESSAGES.get('ProgramIsGeoBlocked'),                      countries=self._GEO_COUNTRIES)  | 
