diff options
| author | Remita Amine <remitamine@gmail.com> | 2021-04-07 09:27:05 +0100 | 
|---|---|---|
| committer | Remita Amine <remitamine@gmail.com> | 2021-04-07 09:27:05 +0100 | 
| commit | c0c5134c5771dd2a1caeeaee62dcd207d169e981 (patch) | |
| tree | a0d0f7a03fb5014364cd6c47404868f6abfcbfd4 | |
| parent | 72a2c0a9ede04c6b82235e453b1a933faf072a76 (diff) | |
[curiositystream] fix format extraction(closes #26845, closes #28668)
| -rw-r--r-- | youtube_dl/extractor/curiositystream.py | 103 | 
1 files changed, 58 insertions, 45 deletions
| diff --git a/youtube_dl/extractor/curiositystream.py b/youtube_dl/extractor/curiositystream.py index e4a7fca6c..ae64a07d7 100644 --- a/youtube_dl/extractor/curiositystream.py +++ b/youtube_dl/extractor/curiositystream.py @@ -25,12 +25,12 @@ class CuriosityStreamBaseIE(InfoExtractor):              raise ExtractorError(                  '%s said: %s' % (self.IE_NAME, error), expected=True) -    def _call_api(self, path, video_id): +    def _call_api(self, path, video_id, query=None):          headers = {}          if self._auth_token:              headers['X-Auth-Token'] = self._auth_token          result = self._download_json( -            self._API_BASE_URL + path, video_id, headers=headers) +            self._API_BASE_URL + path, video_id, headers=headers, query=query)          self._handle_errors(result)          return result['data'] @@ -52,62 +52,75 @@ class CuriosityStreamIE(CuriosityStreamBaseIE):      _VALID_URL = r'https?://(?:app\.)?curiositystream\.com/video/(?P<id>\d+)'      _TEST = {          'url': 'https://app.curiositystream.com/video/2', -        'md5': '262bb2f257ff301115f1973540de8983',          'info_dict': {              'id': '2',              'ext': 'mp4',              'title': 'How Did You Develop The Internet?',              'description': 'Vint Cerf, Google\'s Chief Internet Evangelist, describes how he and Bob Kahn created the internet.', -        } +        }, +        'params': { +            'format': 'bestvideo', +            # m3u8 download +            'skip_download': True, +        },      }      def _real_extract(self, url):          video_id = self._match_id(url) -        media = self._call_api('media/' + video_id, video_id) -        title = media['title']          formats = [] -        for encoding in media.get('encodings', []): -            m3u8_url = encoding.get('master_playlist_url') -            if m3u8_url: -                formats.extend(self._extract_m3u8_formats( -                    m3u8_url, video_id, 'mp4', 'm3u8_native', -                    m3u8_id='hls', fatal=False)) -            encoding_url = encoding.get('url') -            file_url = encoding.get('file_url') -            if not encoding_url and not file_url: -                continue -            f = { -                'width': int_or_none(encoding.get('width')), -                'height': int_or_none(encoding.get('height')), -                'vbr': int_or_none(encoding.get('video_bitrate')), -                'abr': int_or_none(encoding.get('audio_bitrate')), -                'filesize': int_or_none(encoding.get('size_in_bytes')), -                'vcodec': encoding.get('video_codec'), -                'acodec': encoding.get('audio_codec'), -                'container': encoding.get('container_type'), -            } -            for f_url in (encoding_url, file_url): -                if not f_url: +        for encoding_format in ('m3u8', 'mpd'): +            media = self._call_api('media/' + video_id, video_id, query={ +                'encodingsNew': 'true', +                'encodingsFormat': encoding_format, +            }) +            for encoding in media.get('encodings', []): +                playlist_url = encoding.get('master_playlist_url') +                if encoding_format == 'm3u8': +                    # use `m3u8` entry_protocol until EXT-X-MAP is properly supported by `m3u8_native` entry_protocol +                    formats.extend(self._extract_m3u8_formats( +                        playlist_url, video_id, 'mp4', +                        m3u8_id='hls', fatal=False)) +                elif encoding_format == 'mpd': +                    formats.extend(self._extract_mpd_formats( +                        playlist_url, video_id, mpd_id='dash', fatal=False)) +                encoding_url = encoding.get('url') +                file_url = encoding.get('file_url') +                if not encoding_url and not file_url:                      continue -                fmt = f.copy() -                rtmp = re.search(r'^(?P<url>rtmpe?://(?P<host>[^/]+)/(?P<app>.+))/(?P<playpath>mp[34]:.+)$', f_url) -                if rtmp: -                    fmt.update({ -                        'url': rtmp.group('url'), -                        'play_path': rtmp.group('playpath'), -                        'app': rtmp.group('app'), -                        'ext': 'flv', -                        'format_id': 'rtmp', -                    }) -                else: -                    fmt.update({ -                        'url': f_url, -                        'format_id': 'http', -                    }) -                formats.append(fmt) +                f = { +                    'width': int_or_none(encoding.get('width')), +                    'height': int_or_none(encoding.get('height')), +                    'vbr': int_or_none(encoding.get('video_bitrate')), +                    'abr': int_or_none(encoding.get('audio_bitrate')), +                    'filesize': int_or_none(encoding.get('size_in_bytes')), +                    'vcodec': encoding.get('video_codec'), +                    'acodec': encoding.get('audio_codec'), +                    'container': encoding.get('container_type'), +                } +                for f_url in (encoding_url, file_url): +                    if not f_url: +                        continue +                    fmt = f.copy() +                    rtmp = re.search(r'^(?P<url>rtmpe?://(?P<host>[^/]+)/(?P<app>.+))/(?P<playpath>mp[34]:.+)$', f_url) +                    if rtmp: +                        fmt.update({ +                            'url': rtmp.group('url'), +                            'play_path': rtmp.group('playpath'), +                            'app': rtmp.group('app'), +                            'ext': 'flv', +                            'format_id': 'rtmp', +                        }) +                    else: +                        fmt.update({ +                            'url': f_url, +                            'format_id': 'http', +                        }) +                    formats.append(fmt)          self._sort_formats(formats) +        title = media['title'] +          subtitles = {}          for closed_caption in media.get('closed_captions', []):              sub_url = closed_caption.get('file') @@ -140,7 +153,7 @@ class CuriosityStreamCollectionIE(CuriosityStreamBaseIE):              'title': 'Curious Minds: The Internet',              'description': 'How is the internet shaping our lives in the 21st Century?',          }, -        'playlist_mincount': 17, +        'playlist_mincount': 16,      }, {          'url': 'https://curiositystream.com/series/2',          'only_matching': True, | 
