aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authordirkf <fieldhouse@gmx.net>2021-02-24 11:52:30 +0000
committerRemita Amine <remitamine@gmail.com>2021-03-02 10:49:20 +0100
commite465b25c1fb0e72b97a032220399d4a959662095 (patch)
treee819c725d7d9d036fb6079c06a6a60a9b54448e4
parent7c06216abff092d43e47b584699d435c40a8115e (diff)
[bbc] add support for BBC Reel videos(closes #21870, closes #23660, closes #28268)
-rw-r--r--youtube_dl/extractor/bbc.py59
1 files changed, 52 insertions, 7 deletions
diff --git a/youtube_dl/extractor/bbc.py b/youtube_dl/extractor/bbc.py
index b4daee54e..a0c557929 100644
--- a/youtube_dl/extractor/bbc.py
+++ b/youtube_dl/extractor/bbc.py
@@ -5,10 +5,15 @@ import itertools
import re
from .common import InfoExtractor
+from ..compat import (
+ compat_etree_Element,
+ compat_HTTPError,
+ compat_urlparse,
+)
from ..utils import (
+ ExtractorError,
clean_html,
dict_get,
- ExtractorError,
float_or_none,
get_element_by_class,
int_or_none,
@@ -21,11 +26,6 @@ from ..utils import (
urlencode_postdata,
urljoin,
)
-from ..compat import (
- compat_etree_Element,
- compat_HTTPError,
- compat_urlparse,
-)
class BBCCoUkIE(InfoExtractor):
@@ -793,6 +793,20 @@ class BBCIE(BBCCoUkIE):
'description': 'Learn English words and phrases from this story',
},
'add_ie': [BBCCoUkIE.ie_key()],
+ }, {
+ # BBC Reel
+ 'url': 'https://www.bbc.com/reel/video/p07c6sb6/how-positive-thinking-is-harming-your-happiness',
+ 'info_dict': {
+ 'id': 'p07c6sb9',
+ 'ext': 'mp4',
+ 'title': 'How positive thinking is harming your happiness',
+ 'alt_title': 'The downsides of positive thinking',
+ 'description': 'md5:fad74b31da60d83b8265954ee42d85b4',
+ 'duration': 235,
+ 'thumbnail': r're:https?://.+/p07c9dsr.jpg',
+ 'upload_date': '20190604',
+ 'categories': ['Psychology'],
+ },
}]
@classmethod
@@ -980,6 +994,37 @@ class BBCIE(BBCCoUkIE):
'subtitles': subtitles,
}
+ # bbc reel (e.g. https://www.bbc.com/reel/video/p07c6sb6/how-positive-thinking-is-harming-your-happiness)
+ initial_data = self._parse_json(self._html_search_regex(
+ r'<script[^>]+id=(["\'])initial-data\1[^>]+data-json=(["\'])(?P<json>(?:(?!\2).)+)',
+ webpage, 'initial data', default='{}', group='json'), playlist_id, fatal=False)
+ if initial_data:
+ init_data = try_get(
+ initial_data, lambda x: x['initData']['items'][0], dict) or {}
+ smp_data = init_data.get('smpData') or {}
+ clip_data = try_get(smp_data, lambda x: x['items'][0], dict) or {}
+ version_id = clip_data.get('versionID')
+ if version_id:
+ title = smp_data['title']
+ formats, subtitles = self._download_media_selector(version_id)
+ self._sort_formats(formats)
+ image_url = smp_data.get('holdingImageURL')
+ display_date = init_data.get('displayDate')
+ topic_title = init_data.get('topicTitle')
+
+ return {
+ 'id': version_id,
+ 'title': title,
+ 'formats': formats,
+ 'alt_title': init_data.get('shortTitle'),
+ 'thumbnail': image_url.replace('$recipe', 'raw') if image_url else None,
+ 'description': smp_data.get('summary') or init_data.get('shortSummary'),
+ 'upload_date': display_date.replace('-', '') if display_date else None,
+ 'subtitles': subtitles,
+ 'duration': int_or_none(clip_data.get('duration')),
+ 'categories': [topic_title] if topic_title else None,
+ }
+
# Morph based embed (e.g. http://www.bbc.co.uk/sport/live/olympics/36895975)
# There are several setPayload calls may be present but the video
# seems to be always related to the first one
@@ -1041,7 +1086,7 @@ class BBCIE(BBCCoUkIE):
thumbnail = None
image_url = current_programme.get('image_url')
if image_url:
- thumbnail = image_url.replace('{recipe}', '1920x1920')
+ thumbnail = image_url.replace('{recipe}', 'raw')
return {
'id': programme_id,
'title': title,