diff options
author | dirkf <fieldhouse@gmx.net> | 2021-02-24 11:52:30 +0000 |
---|---|---|
committer | Remita Amine <remitamine@gmail.com> | 2021-03-02 10:49:20 +0100 |
commit | e465b25c1fb0e72b97a032220399d4a959662095 (patch) | |
tree | e819c725d7d9d036fb6079c06a6a60a9b54448e4 | |
parent | 7c06216abff092d43e47b584699d435c40a8115e (diff) |
[bbc] add support for BBC Reel videos(closes #21870, closes #23660, closes #28268)
-rw-r--r-- | youtube_dl/extractor/bbc.py | 59 |
1 files changed, 52 insertions, 7 deletions
diff --git a/youtube_dl/extractor/bbc.py b/youtube_dl/extractor/bbc.py index b4daee54e..a0c557929 100644 --- a/youtube_dl/extractor/bbc.py +++ b/youtube_dl/extractor/bbc.py @@ -5,10 +5,15 @@ import itertools import re from .common import InfoExtractor +from ..compat import ( + compat_etree_Element, + compat_HTTPError, + compat_urlparse, +) from ..utils import ( + ExtractorError, clean_html, dict_get, - ExtractorError, float_or_none, get_element_by_class, int_or_none, @@ -21,11 +26,6 @@ from ..utils import ( urlencode_postdata, urljoin, ) -from ..compat import ( - compat_etree_Element, - compat_HTTPError, - compat_urlparse, -) class BBCCoUkIE(InfoExtractor): @@ -793,6 +793,20 @@ class BBCIE(BBCCoUkIE): 'description': 'Learn English words and phrases from this story', }, 'add_ie': [BBCCoUkIE.ie_key()], + }, { + # BBC Reel + 'url': 'https://www.bbc.com/reel/video/p07c6sb6/how-positive-thinking-is-harming-your-happiness', + 'info_dict': { + 'id': 'p07c6sb9', + 'ext': 'mp4', + 'title': 'How positive thinking is harming your happiness', + 'alt_title': 'The downsides of positive thinking', + 'description': 'md5:fad74b31da60d83b8265954ee42d85b4', + 'duration': 235, + 'thumbnail': r're:https?://.+/p07c9dsr.jpg', + 'upload_date': '20190604', + 'categories': ['Psychology'], + }, }] @classmethod @@ -980,6 +994,37 @@ class BBCIE(BBCCoUkIE): 'subtitles': subtitles, } + # bbc reel (e.g. https://www.bbc.com/reel/video/p07c6sb6/how-positive-thinking-is-harming-your-happiness) + initial_data = self._parse_json(self._html_search_regex( + r'<script[^>]+id=(["\'])initial-data\1[^>]+data-json=(["\'])(?P<json>(?:(?!\2).)+)', + webpage, 'initial data', default='{}', group='json'), playlist_id, fatal=False) + if initial_data: + init_data = try_get( + initial_data, lambda x: x['initData']['items'][0], dict) or {} + smp_data = init_data.get('smpData') or {} + clip_data = try_get(smp_data, lambda x: x['items'][0], dict) or {} + version_id = clip_data.get('versionID') + if version_id: + title = smp_data['title'] + formats, subtitles = self._download_media_selector(version_id) + self._sort_formats(formats) + image_url = smp_data.get('holdingImageURL') + display_date = init_data.get('displayDate') + topic_title = init_data.get('topicTitle') + + return { + 'id': version_id, + 'title': title, + 'formats': formats, + 'alt_title': init_data.get('shortTitle'), + 'thumbnail': image_url.replace('$recipe', 'raw') if image_url else None, + 'description': smp_data.get('summary') or init_data.get('shortSummary'), + 'upload_date': display_date.replace('-', '') if display_date else None, + 'subtitles': subtitles, + 'duration': int_or_none(clip_data.get('duration')), + 'categories': [topic_title] if topic_title else None, + } + # Morph based embed (e.g. http://www.bbc.co.uk/sport/live/olympics/36895975) # There are several setPayload calls may be present but the video # seems to be always related to the first one @@ -1041,7 +1086,7 @@ class BBCIE(BBCCoUkIE): thumbnail = None image_url = current_programme.get('image_url') if image_url: - thumbnail = image_url.replace('{recipe}', '1920x1920') + thumbnail = image_url.replace('{recipe}', 'raw') return { 'id': programme_id, 'title': title, |