diff options
| -rw-r--r-- | youtube_dl/extractor/nrk.py | 273 | 
1 files changed, 57 insertions, 216 deletions
| diff --git a/youtube_dl/extractor/nrk.py b/youtube_dl/extractor/nrk.py index b545f291b..871e4845c 100644 --- a/youtube_dl/extractor/nrk.py +++ b/youtube_dl/extractor/nrk.py @@ -6,15 +6,11 @@ import random  import re  from .common import InfoExtractor -from ..compat import ( -    compat_str, -    compat_urllib_parse_unquote, -) +from ..compat import compat_str  from ..utils import (      determine_ext,      ExtractorError,      int_or_none, -    parse_age_limit,      parse_duration,      try_get,      urljoin, @@ -63,60 +59,8 @@ class NRKBaseIE(InfoExtractor):          return self._download_json(              urljoin('http://psapi.nrk.no/', path),              video_id, note or 'Downloading %s JSON' % item, -            fatal=fatal, query=query) - - -class NRKIE(NRKBaseIE): -    _VALID_URL = r'''(?x) -                        (?: -                            nrk:| -                            https?:// -                                (?: -                                    (?:www\.)?nrk\.no/video/(?:PS\*|[^_]+_)| -                                    v8[-.]psapi\.nrk\.no/mediaelement/ -                                ) -                            ) -                            (?P<id>[^?\#&]+) -                        ''' - -    _TESTS = [{ -        # video -        'url': 'http://www.nrk.no/video/PS*150533', -        'md5': 'f46be075326e23ad0e524edfcb06aeb6', -        'info_dict': { -            'id': '150533', -            'ext': 'mp4', -            'title': 'Dompap og andre fugler i Piip-Show', -            'description': 'md5:d9261ba34c43b61c812cb6b0269a5c8f', -            'duration': 262, -        } -    }, { -        # audio -        'url': 'http://www.nrk.no/video/PS*154915', -        # MD5 is unstable -        'info_dict': { -            'id': '154915', -            'ext': 'mp4', -            'title': 'Slik høres internett ut når du er blind', -            'description': 'md5:a621f5cc1bd75c8d5104cb048c6b8568', -            'duration': 20, -        } -    }, { -        'url': 'nrk:ecc1b952-96dc-4a98-81b9-5296dc7a98d9', -        'only_matching': True, -    }, { -        'url': 'nrk:clip/7707d5a3-ebe7-434a-87d5-a3ebe7a34a70', -        'only_matching': True, -    }, { -        'url': 'https://v8-psapi.nrk.no/mediaelement/ecc1b952-96dc-4a98-81b9-5296dc7a98d9', -        'only_matching': True, -    }, { -        'url': 'https://www.nrk.no/video/dompap-og-andre-fugler-i-piip-show_150533', -        'only_matching': True, -    }, { -        'url': 'https://www.nrk.no/video/humor/kommentatorboksen-reiser-til-sjos_d1fda11f-a4ad-437a-a374-0398bc84e999', -        'only_matching': True, -    }] +            fatal=fatal, query=query, +            headers={'Accept-Encoding': 'gzip, deflate, br'})      def _extract_from_playback(self, video_id):          path_templ = 'playback/%s/' + video_id @@ -178,6 +122,59 @@ class NRKIE(NRKBaseIE):              'formats': formats,          } + +class NRKIE(NRKBaseIE): +    _VALID_URL = r'''(?x) +                        (?: +                            nrk:| +                            https?:// +                                (?: +                                    (?:www\.)?nrk\.no/video/(?:PS\*|[^_]+_)| +                                    v8[-.]psapi\.nrk\.no/mediaelement/ +                                ) +                            ) +                            (?P<id>[^?\#&]+) +                        ''' + +    _TESTS = [{ +        # video +        'url': 'http://www.nrk.no/video/PS*150533', +        'md5': 'f46be075326e23ad0e524edfcb06aeb6', +        'info_dict': { +            'id': '150533', +            'ext': 'mp4', +            'title': 'Dompap og andre fugler i Piip-Show', +            'description': 'md5:d9261ba34c43b61c812cb6b0269a5c8f', +            'duration': 262, +        } +    }, { +        # audio +        'url': 'http://www.nrk.no/video/PS*154915', +        # MD5 is unstable +        'info_dict': { +            'id': '154915', +            'ext': 'mp4', +            'title': 'Slik høres internett ut når du er blind', +            'description': 'md5:a621f5cc1bd75c8d5104cb048c6b8568', +            'duration': 20, +        } +    }, { +        'url': 'nrk:ecc1b952-96dc-4a98-81b9-5296dc7a98d9', +        'only_matching': True, +    }, { +        'url': 'nrk:clip/7707d5a3-ebe7-434a-87d5-a3ebe7a34a70', +        'only_matching': True, +    }, { +        'url': 'https://v8-psapi.nrk.no/mediaelement/ecc1b952-96dc-4a98-81b9-5296dc7a98d9', +        'only_matching': True, +    }, { +        'url': 'https://www.nrk.no/video/dompap-og-andre-fugler-i-piip-show_150533', +        'only_matching': True, +    }, { +        'url': 'https://www.nrk.no/video/humor/kommentatorboksen-reiser-til-sjos_d1fda11f-a4ad-437a-a374-0398bc84e999', +        'only_matching': True, +    }] +      def _real_extract(self, url):          video_id = self._match_id(url)          return self._extract_from_playback(video_id) @@ -187,7 +184,6 @@ class NRKTVIE(NRKBaseIE):      IE_DESC = 'NRK TV and NRK Radio'      _EPISODE_RE = r'(?P<id>[a-zA-Z]{4}\d{8})'      _VALID_URL = r'https?://(?:tv|radio)\.nrk(?:super)?\.no/(?:[^/]+/)*%s' % _EPISODE_RE -    _API_HOSTS = ('psapi-ne.nrk.no', 'psapi-we.nrk.no')      _TESTS = [{          'url': 'https://tv.nrk.no/program/MDDP12000117',          'md5': 'c4a5960f1b00b40d47db65c1064e0ab1', @@ -290,164 +286,9 @@ class NRKTVIE(NRKBaseIE):          'only_matching': True,      }] -    _api_host = None - -    def _extract_from_mediaelement(self, video_id): -        api_hosts = (self._api_host, ) if self._api_host else self._API_HOSTS - -        for api_host in api_hosts: -            data = self._download_json( -                'http://%s/mediaelement/%s' % (api_host, video_id), -                video_id, 'Downloading mediaelement JSON', -                fatal=api_host == api_hosts[-1]) -            if not data: -                continue -            self._api_host = api_host -            break - -        title = data.get('fullTitle') or data.get('mainTitle') or data['title'] -        video_id = data.get('id') or video_id - -        urls = [] -        entries = [] - -        conviva = data.get('convivaStatistics') or {} -        live = (data.get('mediaElementType') == 'Live' -                or data.get('isLive') is True or conviva.get('isLive')) - -        def make_title(t): -            return self._live_title(t) if live else t - -        media_assets = data.get('mediaAssets') -        if media_assets and isinstance(media_assets, list): -            def video_id_and_title(idx): -                return ((video_id, title) if len(media_assets) == 1 -                        else ('%s-%d' % (video_id, idx), '%s (Part %d)' % (title, idx))) -            for num, asset in enumerate(media_assets, 1): -                asset_url = asset.get('url') -                if not asset_url or asset_url in urls: -                    continue -                urls.append(asset_url) -                formats = self._extract_nrk_formats(asset_url, video_id) -                if not formats: -                    continue -                self._sort_formats(formats) - -                entry_id, entry_title = video_id_and_title(num) -                duration = parse_duration(asset.get('duration')) -                subtitles = {} -                for subtitle in ('webVtt', 'timedText'): -                    subtitle_url = asset.get('%sSubtitlesUrl' % subtitle) -                    if subtitle_url: -                        subtitles.setdefault('no', []).append({ -                            'url': compat_urllib_parse_unquote(subtitle_url) -                        }) -                entries.append({ -                    'id': asset.get('carrierId') or entry_id, -                    'title': make_title(entry_title), -                    'duration': duration, -                    'subtitles': subtitles, -                    'formats': formats, -                    'is_live': live, -                }) - -        if not entries: -            media_url = data.get('mediaUrl') -            if media_url and media_url not in urls: -                formats = self._extract_nrk_formats(media_url, video_id) -                if formats: -                    self._sort_formats(formats) -                    duration = parse_duration(data.get('duration')) -                    entries = [{ -                        'id': video_id, -                        'title': make_title(title), -                        'duration': duration, -                        'formats': formats, -                        'is_live': live, -                    }] - -        if not entries: -            self._raise_error(data) - -        series = conviva.get('seriesName') or data.get('seriesTitle') -        episode = conviva.get('episodeName') or data.get('episodeNumberOrDate') - -        season_number = None -        episode_number = None -        if data.get('mediaElementType') == 'Episode': -            _season_episode = data.get('scoresStatistics', {}).get('springStreamStream') or \ -                data.get('relativeOriginUrl', '') -            EPISODENUM_RE = [ -                r'/s(?P<season>\d{,2})e(?P<episode>\d{,2})\.', -                r'/sesong-(?P<season>\d{,2})/episode-(?P<episode>\d{,2})', -            ] -            season_number = int_or_none(self._search_regex( -                EPISODENUM_RE, _season_episode, 'season number', -                default=None, group='season')) -            episode_number = int_or_none(self._search_regex( -                EPISODENUM_RE, _season_episode, 'episode number', -                default=None, group='episode')) - -        thumbnails = None -        images = data.get('images') -        if images and isinstance(images, dict): -            web_images = images.get('webImages') -            if isinstance(web_images, list): -                thumbnails = [{ -                    'url': image['imageUrl'], -                    'width': int_or_none(image.get('width')), -                    'height': int_or_none(image.get('height')), -                } for image in web_images if image.get('imageUrl')] - -        description = data.get('description') -        category = data.get('mediaAnalytics', {}).get('category') - -        common_info = { -            'description': description, -            'series': series, -            'episode': episode, -            'season_number': season_number, -            'episode_number': episode_number, -            'categories': [category] if category else None, -            'age_limit': parse_age_limit(data.get('legalAge')), -            'thumbnails': thumbnails, -        } - -        vcodec = 'none' if data.get('mediaType') == 'Audio' else None - -        for entry in entries: -            entry.update(common_info) -            for f in entry['formats']: -                f['vcodec'] = vcodec - -        points = data.get('shortIndexPoints') -        if isinstance(points, list): -            chapters = [] -            for next_num, point in enumerate(points, start=1): -                if not isinstance(point, dict): -                    continue -                start_time = parse_duration(point.get('startPoint')) -                if start_time is None: -                    continue -                end_time = parse_duration( -                    data.get('duration') -                    if next_num == len(points) -                    else points[next_num].get('startPoint')) -                if end_time is None: -                    continue -                chapters.append({ -                    'start_time': start_time, -                    'end_time': end_time, -                    'title': point.get('title'), -                }) -            if chapters and len(entries) == 1: -                entries[0]['chapters'] = chapters - -        return self.playlist_result(entries, video_id, title, description) -      def _real_extract(self, url):          video_id = self._match_id(url) -        return self._extract_from_mediaelement(video_id) +        return self._extract_from_playback(video_id)  class NRKTVEpisodeIE(InfoExtractor): | 
