diff options
| author | Remita Amine <remitamine@gmail.com> | 2021-03-13 15:19:24 +0100 | 
|---|---|---|
| committer | Remita Amine <remitamine@gmail.com> | 2021-03-13 15:19:24 +0100 | 
| commit | 60845121ca2f49172e7cd941c0cb43363cb86e46 (patch) | |
| tree | 6222b712561e33cc229f7c13753249c44b5028b1 | |
| parent | 1182f9567b86f2af747cdb8769ab87649c8ce4c2 (diff) | |
[sportdeutschland] fix extraction(closes #21856)(closes #28425)
| -rw-r--r-- | youtube_dl/extractor/sportdeutschland.py | 145 | 
1 files changed, 84 insertions, 61 deletions
diff --git a/youtube_dl/extractor/sportdeutschland.py b/youtube_dl/extractor/sportdeutschland.py index 378fc7568..3e497a939 100644 --- a/youtube_dl/extractor/sportdeutschland.py +++ b/youtube_dl/extractor/sportdeutschland.py @@ -1,82 +1,105 @@  # coding: utf-8  from __future__ import unicode_literals -import re -  from .common import InfoExtractor +from ..compat import ( +    compat_parse_qs, +    compat_urllib_parse_urlparse, +)  from ..utils import ( +    clean_html, +    float_or_none, +    int_or_none,      parse_iso8601, -    sanitized_Request, +    strip_or_none, +    try_get,  )  class SportDeutschlandIE(InfoExtractor): -    _VALID_URL = r'https?://sportdeutschland\.tv/(?P<sport>[^/?#]+)/(?P<id>[^?#/]+)(?:$|[?#])' +    _VALID_URL = r'https?://sportdeutschland\.tv/(?P<id>(?:[^/]+/)?[^?#/&]+)'      _TESTS = [{          'url': 'https://sportdeutschland.tv/badminton/re-live-deutsche-meisterschaften-2020-halbfinals?playlistId=0',          'info_dict': { -            'id': 're-live-deutsche-meisterschaften-2020-halbfinals', +            'id': '5318cac0275701382770543d7edaf0a0',              'ext': 'mp4', -            'title': 're:Re-live: Deutsche Meisterschaften 2020.*Halbfinals', -            'categories': ['Badminton-Deutschland'], -            'view_count': int, -            'thumbnail': r're:^https?://.*\.(?:jpg|png)$', -            'timestamp': int, -            'upload_date': '20200201', -            'description': 're:.*',  # meaningless description for THIS video +            'title': 'Re-live: Deutsche Meisterschaften 2020 - Halbfinals - Teil 1', +            'duration': 16106.36,          }, +        'params': { +            'noplaylist': True, +            # m3u8 download +            'skip_download': True, +        }, +    }, { +        'url': 'https://sportdeutschland.tv/badminton/re-live-deutsche-meisterschaften-2020-halbfinals?playlistId=0', +        'info_dict': { +            'id': 'c6e2fdd01f63013854c47054d2ab776f', +            'title': 'Re-live: Deutsche Meisterschaften 2020 - Halbfinals', +            'description': 'md5:5263ff4c31c04bb780c9f91130b48530', +            'duration': 31397, +        }, +        'playlist_count': 2, +    }, { +        'url': 'https://sportdeutschland.tv/freeride-world-tour-2021-fieberbrunn-oesterreich', +        'only_matching': True,      }]      def _real_extract(self, url): -        mobj = re.match(self._VALID_URL, url) -        video_id = mobj.group('id') -        sport_id = mobj.group('sport') - -        api_url = 'https://proxy.vidibusdynamic.net/ssl/backend.sportdeutschland.tv/api/permalinks/%s/%s?access_token=true' % ( -            sport_id, video_id) -        req = sanitized_Request(api_url, headers={ -            'Accept': 'application/vnd.vidibus.v2.html+json', -            'Referer': url, -        }) -        data = self._download_json(req, video_id) - +        display_id = self._match_id(url) +        data = self._download_json( +            'https://backend.sportdeutschland.tv/api/permalinks/' + display_id, +            display_id, query={'access_token': 'true'})          asset = data['asset'] -        categories = [data['section']['title']] - -        formats = [] -        smil_url = asset['video'] -        if '.smil' in smil_url: -            m3u8_url = smil_url.replace('.smil', '.m3u8') -            formats.extend( -                self._extract_m3u8_formats(m3u8_url, video_id, ext='mp4')) +        title = (asset.get('title') or asset['label']).strip() +        asset_id = asset.get('id') or asset.get('uuid') +        info = { +            'id': asset_id, +            'title': title, +            'description': clean_html(asset.get('body') or asset.get('description')) or asset.get('teaser'), +            'duration': int_or_none(asset.get('seconds')), +        } +        videos = asset.get('videos') or [] +        if len(videos) > 1: +            playlist_id = compat_parse_qs(compat_urllib_parse_urlparse(url).query).get('playlistId', [None])[0] +            if playlist_id: +                if self._downloader.params.get('noplaylist'): +                    videos = [videos[int(playlist_id)]] +                    self.to_screen('Downloading just a single video because of --no-playlist') +                else: +                    self.to_screen('Downloading playlist %s - add --no-playlist to just download video' % asset_id) -            smil_doc = self._download_xml( -                smil_url, video_id, note='Downloading SMIL metadata') -            base_url_el = smil_doc.find('./head/meta') -            if base_url_el: -                base_url = base_url_el.attrib['base'] -            formats.extend([{ -                'format_id': 'rmtp', -                'url': base_url if base_url_el else n.attrib['src'], -                'play_path': n.attrib['src'], -                'ext': 'flv', -                'preference': -100, -                'format_note': 'Seems to fail at example stream', -            } for n in smil_doc.findall('./body/video')]) +            def entries(): +                for i, video in enumerate(videos, 1): +                    video_id = video.get('uuid') +                    video_url = video.get('url') +                    if not (video_id and video_url): +                        continue +                    formats = self._extract_m3u8_formats( +                        video_url.replace('.smil', '.m3u8'), video_id, 'mp4', fatal=False) +                    if not formats: +                        continue +                    yield { +                        'id': video_id, +                        'formats': formats, +                        'title': title + ' - ' + (video.get('label') or 'Teil %d' % i), +                        'duration': float_or_none(video.get('duration')), +                    } +            info.update({ +                '_type': 'multi_video', +                'entries': entries(), +            })          else: -            formats.append({'url': smil_url}) - -        self._sort_formats(formats) - -        return { -            'id': video_id, -            'formats': formats, -            'title': asset['title'], -            'thumbnail': asset.get('image'), -            'description': asset.get('teaser'), -            'duration': asset.get('duration'), -            'categories': categories, -            'view_count': asset.get('views'), -            'rtmp_live': asset.get('live'), -            'timestamp': parse_iso8601(asset.get('date')), -        } +            formats = self._extract_m3u8_formats( +                videos[0]['url'].replace('.smil', '.m3u8'), asset_id, 'mp4') +            section_title = strip_or_none(try_get(data, lambda x: x['section']['title'])) +            info.update({ +                'formats': formats, +                'display_id': asset.get('permalink'), +                'thumbnail': try_get(asset, lambda x: x['images'][0]), +                'categories': [section_title] if section_title else None, +                'view_count': int_or_none(asset.get('views')), +                'is_live': asset.get('is_live') is True, +                'timestamp': parse_iso8601(asset.get('date') or asset.get('published_at')), +            }) +        return info  | 
