diff options
Diffstat (limited to 'youtube_dl/extractor/sportdeutschland.py')
| -rw-r--r-- | youtube_dl/extractor/sportdeutschland.py | 157 | 
1 files changed, 81 insertions, 76 deletions
diff --git a/youtube_dl/extractor/sportdeutschland.py b/youtube_dl/extractor/sportdeutschland.py index a3c35a899..3e497a939 100644 --- a/youtube_dl/extractor/sportdeutschland.py +++ b/youtube_dl/extractor/sportdeutschland.py @@ -1,100 +1,105 @@  # coding: utf-8  from __future__ import unicode_literals -import re -  from .common import InfoExtractor +from ..compat import ( +    compat_parse_qs, +    compat_urllib_parse_urlparse, +)  from ..utils import ( +    clean_html, +    float_or_none, +    int_or_none,      parse_iso8601, -    sanitized_Request, +    strip_or_none, +    try_get,  )  class SportDeutschlandIE(InfoExtractor): -    _VALID_URL = r'https?://sportdeutschland\.tv/(?P<sport>[^/?#]+)/(?P<id>[^?#/]+)(?:$|[?#])' +    _VALID_URL = r'https?://sportdeutschland\.tv/(?P<id>(?:[^/]+/)?[^?#/&]+)'      _TESTS = [{ -        'url': 'http://sportdeutschland.tv/badminton/live-li-ning-badminton-weltmeisterschaft-2014-kopenhagen', +        'url': 'https://sportdeutschland.tv/badminton/re-live-deutsche-meisterschaften-2020-halbfinals?playlistId=0',          'info_dict': { -            'id': 'live-li-ning-badminton-weltmeisterschaft-2014-kopenhagen', +            'id': '5318cac0275701382770543d7edaf0a0',              'ext': 'mp4', -            'title': 're:Li-Ning Badminton Weltmeisterschaft 2014 Kopenhagen', -            'categories': ['Badminton'], -            'view_count': int, -            'thumbnail': r're:^https?://.*\.jpg$', -            'description': r're:Die Badminton-WM 2014 aus Kopenhagen bei Sportdeutschland\.TV', -            'timestamp': int, -            'upload_date': 're:^201408[23][0-9]$', +            'title': 'Re-live: Deutsche Meisterschaften 2020 - Halbfinals - Teil 1', +            'duration': 16106.36,          },          'params': { -            'skip_download': 'Live stream', +            'noplaylist': True, +            # m3u8 download +            'skip_download': True,          },      }, { -        'url': 'http://sportdeutschland.tv/li-ning-badminton-wm-2014/lee-li-ning-badminton-weltmeisterschaft-2014-kopenhagen-herren-einzel-wei-vs', +        'url': 'https://sportdeutschland.tv/badminton/re-live-deutsche-meisterschaften-2020-halbfinals?playlistId=0',          'info_dict': { -            'id': 'lee-li-ning-badminton-weltmeisterschaft-2014-kopenhagen-herren-einzel-wei-vs', -            'ext': 'mp4', -            'upload_date': '20140825', -            'description': 'md5:60a20536b57cee7d9a4ec005e8687504', -            'timestamp': 1408976060, -            'duration': 2732, -            'title': 'Li-Ning Badminton Weltmeisterschaft 2014 Kopenhagen: Herren Einzel, Wei Lee vs. Keun Lee', -            'thumbnail': r're:^https?://.*\.jpg$', -            'view_count': int, -            'categories': ['Li-Ning Badminton WM 2014'], - -        } +            'id': 'c6e2fdd01f63013854c47054d2ab776f', +            'title': 'Re-live: Deutsche Meisterschaften 2020 - Halbfinals', +            'description': 'md5:5263ff4c31c04bb780c9f91130b48530', +            'duration': 31397, +        }, +        'playlist_count': 2, +    }, { +        'url': 'https://sportdeutschland.tv/freeride-world-tour-2021-fieberbrunn-oesterreich', +        'only_matching': True,      }]      def _real_extract(self, url): -        mobj = re.match(self._VALID_URL, url) -        video_id = mobj.group('id') -        sport_id = mobj.group('sport') - -        api_url = 'http://proxy.vidibusdynamic.net/sportdeutschland.tv/api/permalinks/%s/%s?access_token=true' % ( -            sport_id, video_id) -        req = sanitized_Request(api_url, headers={ -            'Accept': 'application/vnd.vidibus.v2.html+json', -            'Referer': url, -        }) -        data = self._download_json(req, video_id) - +        display_id = self._match_id(url) +        data = self._download_json( +            'https://backend.sportdeutschland.tv/api/permalinks/' + display_id, +            display_id, query={'access_token': 'true'})          asset = data['asset'] -        categories = [data['section']['title']] - -        formats = [] -        smil_url = asset['video'] -        if '.smil' in smil_url: -            m3u8_url = smil_url.replace('.smil', '.m3u8') -            formats.extend( -                self._extract_m3u8_formats(m3u8_url, video_id, ext='mp4')) +        title = (asset.get('title') or asset['label']).strip() +        asset_id = asset.get('id') or asset.get('uuid') +        info = { +            'id': asset_id, +            'title': title, +            'description': clean_html(asset.get('body') or asset.get('description')) or asset.get('teaser'), +            'duration': int_or_none(asset.get('seconds')), +        } +        videos = asset.get('videos') or [] +        if len(videos) > 1: +            playlist_id = compat_parse_qs(compat_urllib_parse_urlparse(url).query).get('playlistId', [None])[0] +            if playlist_id: +                if self._downloader.params.get('noplaylist'): +                    videos = [videos[int(playlist_id)]] +                    self.to_screen('Downloading just a single video because of --no-playlist') +                else: +                    self.to_screen('Downloading playlist %s - add --no-playlist to just download video' % asset_id) -            smil_doc = self._download_xml( -                smil_url, video_id, note='Downloading SMIL metadata') -            base_url_el = smil_doc.find('./head/meta') -            if base_url_el: -                base_url = base_url_el.attrib['base'] -            formats.extend([{ -                'format_id': 'rmtp', -                'url': base_url if base_url_el else n.attrib['src'], -                'play_path': n.attrib['src'], -                'ext': 'flv', -                'preference': -100, -                'format_note': 'Seems to fail at example stream', -            } for n in smil_doc.findall('./body/video')]) +            def entries(): +                for i, video in enumerate(videos, 1): +                    video_id = video.get('uuid') +                    video_url = video.get('url') +                    if not (video_id and video_url): +                        continue +                    formats = self._extract_m3u8_formats( +                        video_url.replace('.smil', '.m3u8'), video_id, 'mp4', fatal=False) +                    if not formats: +                        continue +                    yield { +                        'id': video_id, +                        'formats': formats, +                        'title': title + ' - ' + (video.get('label') or 'Teil %d' % i), +                        'duration': float_or_none(video.get('duration')), +                    } +            info.update({ +                '_type': 'multi_video', +                'entries': entries(), +            })          else: -            formats.append({'url': smil_url}) - -        self._sort_formats(formats) - -        return { -            'id': video_id, -            'formats': formats, -            'title': asset['title'], -            'thumbnail': asset.get('image'), -            'description': asset.get('teaser'), -            'duration': asset.get('duration'), -            'categories': categories, -            'view_count': asset.get('views'), -            'rtmp_live': asset.get('live'), -            'timestamp': parse_iso8601(asset.get('date')), -        } +            formats = self._extract_m3u8_formats( +                videos[0]['url'].replace('.smil', '.m3u8'), asset_id, 'mp4') +            section_title = strip_or_none(try_get(data, lambda x: x['section']['title'])) +            info.update({ +                'formats': formats, +                'display_id': asset.get('permalink'), +                'thumbnail': try_get(asset, lambda x: x['images'][0]), +                'categories': [section_title] if section_title else None, +                'view_count': int_or_none(asset.get('views')), +                'is_live': asset.get('is_live') is True, +                'timestamp': parse_iso8601(asset.get('date') or asset.get('published_at')), +            }) +        return info  | 
