aboutsummaryrefslogtreecommitdiff
path: root/youtube_dl
diff options
context:
space:
mode:
authorRemita Amine <remitamine@gmail.com>2021-03-13 15:19:24 +0100
committerRemita Amine <remitamine@gmail.com>2021-03-13 15:19:24 +0100
commit60845121ca2f49172e7cd941c0cb43363cb86e46 (patch)
tree6222b712561e33cc229f7c13753249c44b5028b1 /youtube_dl
parent1182f9567b86f2af747cdb8769ab87649c8ce4c2 (diff)
[sportdeutschland] fix extraction(closes #21856)(closes #28425)
Diffstat (limited to 'youtube_dl')
-rw-r--r--youtube_dl/extractor/sportdeutschland.py145
1 files changed, 84 insertions, 61 deletions
diff --git a/youtube_dl/extractor/sportdeutschland.py b/youtube_dl/extractor/sportdeutschland.py
index 378fc7568..3e497a939 100644
--- a/youtube_dl/extractor/sportdeutschland.py
+++ b/youtube_dl/extractor/sportdeutschland.py
@@ -1,82 +1,105 @@
# coding: utf-8
from __future__ import unicode_literals
-import re
-
from .common import InfoExtractor
+from ..compat import (
+ compat_parse_qs,
+ compat_urllib_parse_urlparse,
+)
from ..utils import (
+ clean_html,
+ float_or_none,
+ int_or_none,
parse_iso8601,
- sanitized_Request,
+ strip_or_none,
+ try_get,
)
class SportDeutschlandIE(InfoExtractor):
- _VALID_URL = r'https?://sportdeutschland\.tv/(?P<sport>[^/?#]+)/(?P<id>[^?#/]+)(?:$|[?#])'
+ _VALID_URL = r'https?://sportdeutschland\.tv/(?P<id>(?:[^/]+/)?[^?#/&]+)'
_TESTS = [{
'url': 'https://sportdeutschland.tv/badminton/re-live-deutsche-meisterschaften-2020-halbfinals?playlistId=0',
'info_dict': {
- 'id': 're-live-deutsche-meisterschaften-2020-halbfinals',
+ 'id': '5318cac0275701382770543d7edaf0a0',
'ext': 'mp4',
- 'title': 're:Re-live: Deutsche Meisterschaften 2020.*Halbfinals',
- 'categories': ['Badminton-Deutschland'],
- 'view_count': int,
- 'thumbnail': r're:^https?://.*\.(?:jpg|png)$',
- 'timestamp': int,
- 'upload_date': '20200201',
- 'description': 're:.*', # meaningless description for THIS video
+ 'title': 'Re-live: Deutsche Meisterschaften 2020 - Halbfinals - Teil 1',
+ 'duration': 16106.36,
},
+ 'params': {
+ 'noplaylist': True,
+ # m3u8 download
+ 'skip_download': True,
+ },
+ }, {
+ 'url': 'https://sportdeutschland.tv/badminton/re-live-deutsche-meisterschaften-2020-halbfinals?playlistId=0',
+ 'info_dict': {
+ 'id': 'c6e2fdd01f63013854c47054d2ab776f',
+ 'title': 'Re-live: Deutsche Meisterschaften 2020 - Halbfinals',
+ 'description': 'md5:5263ff4c31c04bb780c9f91130b48530',
+ 'duration': 31397,
+ },
+ 'playlist_count': 2,
+ }, {
+ 'url': 'https://sportdeutschland.tv/freeride-world-tour-2021-fieberbrunn-oesterreich',
+ 'only_matching': True,
}]
def _real_extract(self, url):
- mobj = re.match(self._VALID_URL, url)
- video_id = mobj.group('id')
- sport_id = mobj.group('sport')
-
- api_url = 'https://proxy.vidibusdynamic.net/ssl/backend.sportdeutschland.tv/api/permalinks/%s/%s?access_token=true' % (
- sport_id, video_id)
- req = sanitized_Request(api_url, headers={
- 'Accept': 'application/vnd.vidibus.v2.html+json',
- 'Referer': url,
- })
- data = self._download_json(req, video_id)
-
+ display_id = self._match_id(url)
+ data = self._download_json(
+ 'https://backend.sportdeutschland.tv/api/permalinks/' + display_id,
+ display_id, query={'access_token': 'true'})
asset = data['asset']
- categories = [data['section']['title']]
-
- formats = []
- smil_url = asset['video']
- if '.smil' in smil_url:
- m3u8_url = smil_url.replace('.smil', '.m3u8')
- formats.extend(
- self._extract_m3u8_formats(m3u8_url, video_id, ext='mp4'))
+ title = (asset.get('title') or asset['label']).strip()
+ asset_id = asset.get('id') or asset.get('uuid')
+ info = {
+ 'id': asset_id,
+ 'title': title,
+ 'description': clean_html(asset.get('body') or asset.get('description')) or asset.get('teaser'),
+ 'duration': int_or_none(asset.get('seconds')),
+ }
+ videos = asset.get('videos') or []
+ if len(videos) > 1:
+ playlist_id = compat_parse_qs(compat_urllib_parse_urlparse(url).query).get('playlistId', [None])[0]
+ if playlist_id:
+ if self._downloader.params.get('noplaylist'):
+ videos = [videos[int(playlist_id)]]
+ self.to_screen('Downloading just a single video because of --no-playlist')
+ else:
+ self.to_screen('Downloading playlist %s - add --no-playlist to just download video' % asset_id)
- smil_doc = self._download_xml(
- smil_url, video_id, note='Downloading SMIL metadata')
- base_url_el = smil_doc.find('./head/meta')
- if base_url_el:
- base_url = base_url_el.attrib['base']
- formats.extend([{
- 'format_id': 'rmtp',
- 'url': base_url if base_url_el else n.attrib['src'],
- 'play_path': n.attrib['src'],
- 'ext': 'flv',
- 'preference': -100,
- 'format_note': 'Seems to fail at example stream',
- } for n in smil_doc.findall('./body/video')])
+ def entries():
+ for i, video in enumerate(videos, 1):
+ video_id = video.get('uuid')
+ video_url = video.get('url')
+ if not (video_id and video_url):
+ continue
+ formats = self._extract_m3u8_formats(
+ video_url.replace('.smil', '.m3u8'), video_id, 'mp4', fatal=False)
+ if not formats:
+ continue
+ yield {
+ 'id': video_id,
+ 'formats': formats,
+ 'title': title + ' - ' + (video.get('label') or 'Teil %d' % i),
+ 'duration': float_or_none(video.get('duration')),
+ }
+ info.update({
+ '_type': 'multi_video',
+ 'entries': entries(),
+ })
else:
- formats.append({'url': smil_url})
-
- self._sort_formats(formats)
-
- return {
- 'id': video_id,
- 'formats': formats,
- 'title': asset['title'],
- 'thumbnail': asset.get('image'),
- 'description': asset.get('teaser'),
- 'duration': asset.get('duration'),
- 'categories': categories,
- 'view_count': asset.get('views'),
- 'rtmp_live': asset.get('live'),
- 'timestamp': parse_iso8601(asset.get('date')),
- }
+ formats = self._extract_m3u8_formats(
+ videos[0]['url'].replace('.smil', '.m3u8'), asset_id, 'mp4')
+ section_title = strip_or_none(try_get(data, lambda x: x['section']['title']))
+ info.update({
+ 'formats': formats,
+ 'display_id': asset.get('permalink'),
+ 'thumbnail': try_get(asset, lambda x: x['images'][0]),
+ 'categories': [section_title] if section_title else None,
+ 'view_count': int_or_none(asset.get('views')),
+ 'is_live': asset.get('is_live') is True,
+ 'timestamp': parse_iso8601(asset.get('date') or asset.get('published_at')),
+ })
+ return info