diff options
Diffstat (limited to 'youtube_dl/extractor/tv2.py')
| -rw-r--r-- | youtube_dl/extractor/tv2.py | 173 | 
1 files changed, 138 insertions, 35 deletions
diff --git a/youtube_dl/extractor/tv2.py b/youtube_dl/extractor/tv2.py index d5071e8a5..334b7d540 100644 --- a/youtube_dl/extractor/tv2.py +++ b/youtube_dl/extractor/tv2.py @@ -4,23 +4,27 @@ from __future__ import unicode_literals  import re  from .common import InfoExtractor +from ..compat import compat_HTTPError  from ..utils import (      determine_ext, +    ExtractorError,      int_or_none,      float_or_none,      js_to_json,      parse_iso8601,      remove_end, +    strip_or_none, +    try_get,  )  class TV2IE(InfoExtractor):      _VALID_URL = r'https?://(?:www\.)?tv2\.no/v/(?P<id>\d+)' -    _TEST = { +    _TESTS = [{          'url': 'http://www.tv2.no/v/916509/',          'info_dict': {              'id': '916509', -            'ext': 'mp4', +            'ext': 'flv',              'title': 'Se Frode Gryttens hyllest av Steven Gerrard',              'description': 'TV 2 Sportens huspoet tar avskjed med Liverpools kaptein Steven Gerrard.',              'timestamp': 1431715610, @@ -29,22 +33,46 @@ class TV2IE(InfoExtractor):              'view_count': int,              'categories': list,          }, -        'params': { -            # m3u8 download -            'skip_download': True, -        }, -    } +    }] +    _API_DOMAIN = 'sumo.tv2.no' +    _PROTOCOLS = ('HDS', 'HLS', 'DASH') +    _GEO_COUNTRIES = ['NO']      def _real_extract(self, url):          video_id = self._match_id(url) +        api_base = 'http://%s/api/web/asset/%s' % (self._API_DOMAIN, video_id) + +        asset = self._download_json( +            api_base + '.json', video_id, +            'Downloading metadata JSON')['asset'] +        title = asset.get('subtitle') or asset['title'] +        is_live = asset.get('live') is True          formats = []          format_urls = [] -        for protocol in ('HDS', 'HLS'): -            data = self._download_json( -                'http://sumo.tv2.no/api/web/asset/%s/play.json?protocol=%s&videoFormat=SMIL+ISMUSP' % (video_id, protocol), -                video_id, 'Downloading play JSON')['playback'] -            for item in data['items']['item']: +        for protocol in self._PROTOCOLS: +            try: +                data = self._download_json( +                    api_base + '/play.json?protocol=%s&videoFormat=SMIL+ISMUSP' % protocol, +                    video_id, 'Downloading play JSON')['playback'] +            except ExtractorError as e: +                if isinstance(e.cause, compat_HTTPError) and e.cause.code == 401: +                    error = self._parse_json(e.cause.read().decode(), video_id)['error'] +                    error_code = error.get('code') +                    if error_code == 'ASSET_PLAYBACK_INVALID_GEO_LOCATION': +                        self.raise_geo_restricted(countries=self._GEO_COUNTRIES) +                    elif error_code == 'SESSION_NOT_AUTHENTICATED': +                        self.raise_login_required() +                    raise ExtractorError(error['description']) +                raise +            items = try_get(data, lambda x: x['items']['item']) +            if not items: +                continue +            if not isinstance(items, list): +                items = [items] +            for item in items: +                if not isinstance(item, dict): +                    continue                  video_url = item.get('url')                  if not video_url or video_url in format_urls:                      continue @@ -57,9 +85,14 @@ class TV2IE(InfoExtractor):                      formats.extend(self._extract_f4m_formats(                          video_url, video_id, f4m_id=format_id, fatal=False))                  elif ext == 'm3u8': -                    formats.extend(self._extract_m3u8_formats( -                        video_url, video_id, 'mp4', entry_protocol='m3u8_native', -                        m3u8_id=format_id, fatal=False)) +                    if not data.get('drmProtected'): +                        formats.extend(self._extract_m3u8_formats( +                            video_url, video_id, 'mp4', +                            'm3u8' if is_live else 'm3u8_native', +                            m3u8_id=format_id, fatal=False)) +                elif ext == 'mpd': +                    formats.extend(self._extract_mpd_formats( +                        video_url, video_id, format_id, fatal=False))                  elif ext == 'ism' or video_url.endswith('.ism/Manifest'):                      pass                  else: @@ -69,35 +102,27 @@ class TV2IE(InfoExtractor):                          'tbr': int_or_none(item.get('bitrate')),                          'filesize': int_or_none(item.get('fileSize')),                      }) +        if not formats and data.get('drmProtected'): +            raise ExtractorError('This video is DRM protected.', expected=True)          self._sort_formats(formats) -        asset = self._download_json( -            'http://sumo.tv2.no/api/web/asset/%s.json' % video_id, -            video_id, 'Downloading metadata JSON')['asset'] - -        title = asset['title'] -        description = asset.get('description') -        timestamp = parse_iso8601(asset.get('createTime')) -        duration = float_or_none(asset.get('accurateDuration') or asset.get('duration')) -        view_count = int_or_none(asset.get('views')) -        categories = asset.get('keywords', '').split(',') -          thumbnails = [{              'id': thumbnail.get('@type'),              'url': thumbnail.get('url'), -        } for _, thumbnail in asset.get('imageVersions', {}).items()] +        } for _, thumbnail in (asset.get('imageVersions') or {}).items()]          return {              'id': video_id,              'url': video_url, -            'title': title, -            'description': description, +            'title': self._live_title(title) if is_live else title, +            'description': strip_or_none(asset.get('description')),              'thumbnails': thumbnails, -            'timestamp': timestamp, -            'duration': duration, -            'view_count': view_count, -            'categories': categories, +            'timestamp': parse_iso8601(asset.get('createTime')), +            'duration': float_or_none(asset.get('accurateDuration') or asset.get('duration')), +            'view_count': int_or_none(asset.get('views')), +            'categories': asset.get('keywords', '').split(','),              'formats': formats, +            'is_live': is_live,          } @@ -108,7 +133,7 @@ class TV2ArticleIE(InfoExtractor):          'info_dict': {              'id': '6930542',              'title': 'Russen hetses etter pingvintyveri - innrømmer å ha åpnet luken på buret', -            'description': 'md5:339573779d3eea3542ffe12006190954', +            'description': 'De fire siktede nekter fortsatt for å ha stjålet pingvinbabyene, men innrømmer å ha åpnet luken til de små kyllingene.',          },          'playlist_count': 2,      }, { @@ -126,7 +151,7 @@ class TV2ArticleIE(InfoExtractor):          if not assets:              # New embed pattern -            for v in re.findall(r'TV2ContentboxVideo\(({.+?})\)', webpage): +            for v in re.findall(r'(?s)TV2ContentboxVideo\(({.+?})\)', webpage):                  video = self._parse_json(                      v, playlist_id, transform_source=js_to_json, fatal=False)                  if not video: @@ -143,3 +168,81 @@ class TV2ArticleIE(InfoExtractor):          description = remove_end(self._og_search_description(webpage), ' - TV2.no')          return self.playlist_result(entries, playlist_id, title, description) + + +class KatsomoIE(TV2IE): +    _VALID_URL = r'https?://(?:www\.)?(?:katsomo|mtv(uutiset)?)\.fi/(?:sarja/[0-9a-z-]+-\d+/[0-9a-z-]+-|(?:#!/)?jakso/(?:\d+/[^/]+/)?|video/prog)(?P<id>\d+)' +    _TESTS = [{ +        'url': 'https://www.mtv.fi/sarja/mtv-uutiset-live-33001002003/lahden-pelicans-teki-kovan-ratkaisun-ville-nieminen-pihalle-1181321', +        'info_dict': { +            'id': '1181321', +            'ext': 'mp4', +            'title': 'Lahden Pelicans teki kovan ratkaisun – Ville Nieminen pihalle', +            'description': 'Päätöksen teki Pelicansin hallitus.', +            'timestamp': 1575116484, +            'upload_date': '20191130', +            'duration': 37.12, +            'view_count': int, +            'categories': list, +        }, +        'params': { +            # m3u8 download +            'skip_download': True, +        }, +    }, { +        'url': 'http://www.katsomo.fi/#!/jakso/33001005/studio55-fi/658521/jukka-kuoppamaki-tekee-yha-lauluja-vaikka-lentokoneessa', +        'only_matching': True, +    }, { +        'url': 'https://www.mtvuutiset.fi/video/prog1311159', +        'only_matching': True, +    }, { +        'url': 'https://www.katsomo.fi/#!/jakso/1311159', +        'only_matching': True, +    }] +    _API_DOMAIN = 'api.katsomo.fi' +    _PROTOCOLS = ('HLS', 'MPD') +    _GEO_COUNTRIES = ['FI'] + + +class MTVUutisetArticleIE(InfoExtractor): +    _VALID_URL = r'https?://(?:www\.)mtvuutiset\.fi/artikkeli/[^/]+/(?P<id>\d+)' +    _TESTS = [{ +        'url': 'https://www.mtvuutiset.fi/artikkeli/tallaisia-vaurioita-viking-amorellassa-on-useamman-osaston-alla-vetta/7931384', +        'info_dict': { +            'id': '1311159', +            'ext': 'mp4', +            'title': 'Viking Amorellan matkustajien evakuointi on alkanut – tältä operaatio näyttää laivalla', +            'description': 'Viking Amorellan matkustajien evakuointi on alkanut – tältä operaatio näyttää laivalla', +            'timestamp': 1600608966, +            'upload_date': '20200920', +            'duration': 153.7886666, +            'view_count': int, +            'categories': list, +        }, +        'params': { +            # m3u8 download +            'skip_download': True, +        }, +    }, { +        # multiple Youtube embeds +        'url': 'https://www.mtvuutiset.fi/artikkeli/50-vuotta-subarun-vastaiskua/6070962', +        'only_matching': True, +    }] + +    def _real_extract(self, url): +        article_id = self._match_id(url) +        article = self._download_json( +            'http://api.mtvuutiset.fi/mtvuutiset/api/json/' + article_id, +            article_id) + +        def entries(): +            for video in (article.get('videos') or []): +                video_type = video.get('videotype') +                video_url = video.get('url') +                if not (video_url and video_type in ('katsomo', 'youtube')): +                    continue +                yield self.url_result( +                    video_url, video_type.capitalize(), video.get('video_id')) + +        return self.playlist_result( +            entries(), article_id, article.get('title'), article.get('description'))  | 
