aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--yt_dlp/downloader/__init__.py2
-rw-r--r--yt_dlp/downloader/bunnycdn.py50
-rw-r--r--yt_dlp/extractor/_extractors.py1
-rw-r--r--yt_dlp/extractor/bunnycdn.py178
-rw-r--r--yt_dlp/extractor/sovietscloset.py45
5 files changed, 246 insertions, 30 deletions
diff --git a/yt_dlp/downloader/__init__.py b/yt_dlp/downloader/__init__.py
index 51a9f28f0..1b12bd4be 100644
--- a/yt_dlp/downloader/__init__.py
+++ b/yt_dlp/downloader/__init__.py
@@ -35,6 +35,7 @@ from .rtmp import RtmpFD
from .rtsp import RtspFD
from .websocket import WebSocketFragmentFD
from .youtube_live_chat import YoutubeLiveChatFD
+from .bunnycdn import BunnyCdnFD
PROTOCOL_MAP = {
'rtmp': RtmpFD,
@@ -55,6 +56,7 @@ PROTOCOL_MAP = {
'websocket_frag': WebSocketFragmentFD,
'youtube_live_chat': YoutubeLiveChatFD,
'youtube_live_chat_replay': YoutubeLiveChatFD,
+ 'bunnycdn': BunnyCdnFD,
}
diff --git a/yt_dlp/downloader/bunnycdn.py b/yt_dlp/downloader/bunnycdn.py
new file mode 100644
index 000000000..e787f698a
--- /dev/null
+++ b/yt_dlp/downloader/bunnycdn.py
@@ -0,0 +1,50 @@
+import hashlib
+import random
+import threading
+
+from .common import FileDownloader
+from . import HlsFD
+from ..networking import Request
+from ..networking.exceptions import network_exceptions
+
+
+class BunnyCdnFD(FileDownloader):
+ """
+ Downloads from BunnyCDN with required pings
+ Note, this is not a part of public API, and will be removed without notice.
+ DO NOT USE
+ """
+
+ def real_download(self, filename, info_dict):
+ self.to_screen(f'[{self.FD_NAME}] Downloading from BunnyCDN')
+
+ fd = HlsFD(self.ydl, self.params)
+
+ stop_event = threading.Event()
+ ping_thread = threading.Thread(target=self.ping_thread, args=(stop_event,), kwargs=info_dict['_bunnycdn_ping_data'])
+ ping_thread.start()
+
+ try:
+ return fd.real_download(filename, info_dict)
+ finally:
+ stop_event.set()
+
+ def ping_thread(self, stop_event, url, headers, secret, context_id):
+ # Site sends ping every 4 seconds, but this throttles the download. Pinging every 2 seconds seems to work.
+ ping_interval = 2
+ # Hard coded resolution as it doesn't seem to matter
+ res = 1080
+ paused = 'false'
+ current_time = 0
+
+ while not stop_event.wait(ping_interval):
+ current_time += ping_interval
+
+ time = current_time + round(random.random(), 6)
+ md5_hash = hashlib.md5(f'{secret}_{context_id}_{time}_{paused}_{res}'.encode()).hexdigest()
+ ping_url = f'{url}?hash={md5_hash}&time={time}&paused={paused}&resolution={res}'
+
+ try:
+ self.ydl.urlopen(Request(ping_url, headers=headers)).read()
+ except network_exceptions as e:
+ self.to_screen(f'[{self.FD_NAME}] Ping failed: {e}')
diff --git a/yt_dlp/extractor/_extractors.py b/yt_dlp/extractor/_extractors.py
index c937dfe13..9a49bcb30 100644
--- a/yt_dlp/extractor/_extractors.py
+++ b/yt_dlp/extractor/_extractors.py
@@ -312,6 +312,7 @@ from .brilliantpala import (
)
from .bundesliga import BundesligaIE
from .bundestag import BundestagIE
+from .bunnycdn import BunnyCdnIE
from .businessinsider import BusinessInsiderIE
from .buzzfeed import BuzzFeedIE
from .byutv import BYUtvIE
diff --git a/yt_dlp/extractor/bunnycdn.py b/yt_dlp/extractor/bunnycdn.py
new file mode 100644
index 000000000..d78753384
--- /dev/null
+++ b/yt_dlp/extractor/bunnycdn.py
@@ -0,0 +1,178 @@
+import json
+
+from .common import InfoExtractor
+from ..networking import HEADRequest
+from ..utils import (
+ ExtractorError,
+ extract_attributes,
+ int_or_none,
+ parse_qs,
+ smuggle_url,
+ unsmuggle_url,
+ url_or_none,
+ urlhandle_detect_ext,
+)
+from ..utils.traversal import find_element, traverse_obj
+
+
+class BunnyCdnIE(InfoExtractor):
+ _VALID_URL = r'https?://(?:iframe\.mediadelivery\.net|video\.bunnycdn\.com)/(?:embed|play)/(?P<library_id>\d+)/(?P<id>[\da-f-]+)'
+ _EMBED_REGEX = [rf'<iframe[^>]+src=[\'"](?P<url>{_VALID_URL}[^\'"]*)[\'"]']
+ _TESTS = [{
+ 'url': 'https://iframe.mediadelivery.net/embed/113933/e73edec1-e381-4c8b-ae73-717a140e0924',
+ 'info_dict': {
+ 'id': 'e73edec1-e381-4c8b-ae73-717a140e0924',
+ 'ext': 'mp4',
+ 'title': 'mistress morgana (3).mp4',
+ 'description': '',
+ 'timestamp': 1693251673,
+ 'thumbnail': r're:^https?://.*\.b-cdn\.net/e73edec1-e381-4c8b-ae73-717a140e0924/thumbnail\.jpg',
+ 'duration': 7.0,
+ 'upload_date': '20230828',
+ },
+ 'params': {'skip_download': True},
+ }, {
+ 'url': 'https://iframe.mediadelivery.net/play/136145/32e34c4b-0d72-437c-9abb-05e67657da34',
+ 'info_dict': {
+ 'id': '32e34c4b-0d72-437c-9abb-05e67657da34',
+ 'ext': 'mp4',
+ 'timestamp': 1691145748,
+ 'thumbnail': r're:^https?://.*\.b-cdn\.net/32e34c4b-0d72-437c-9abb-05e67657da34/thumbnail_9172dc16\.jpg',
+ 'duration': 106.0,
+ 'description': 'md5:981a3e899a5c78352b21ed8b2f1efd81',
+ 'upload_date': '20230804',
+ 'title': 'Sanela ist Teil der #arbeitsmarktkraft',
+ },
+ 'params': {'skip_download': True},
+ }, {
+ # Stream requires activation and pings
+ 'url': 'https://iframe.mediadelivery.net/embed/200867/2e8545ec-509d-4571-b855-4cf0235ccd75',
+ 'info_dict': {
+ 'id': '2e8545ec-509d-4571-b855-4cf0235ccd75',
+ 'ext': 'mp4',
+ 'timestamp': 1708497752,
+ 'title': 'netflix part 1',
+ 'duration': 3959.0,
+ 'description': '',
+ 'upload_date': '20240221',
+ 'thumbnail': r're:^https?://.*\.b-cdn\.net/2e8545ec-509d-4571-b855-4cf0235ccd75/thumbnail\.jpg',
+ },
+ 'params': {'skip_download': True},
+ }]
+ _WEBPAGE_TESTS = [{
+ # Stream requires Referer
+ 'url': 'https://conword.io/',
+ 'info_dict': {
+ 'id': '3a5d863e-9cd6-447e-b6ef-e289af50b349',
+ 'ext': 'mp4',
+ 'title': 'Conword bei der Stadt Köln und Stadt Dortmund',
+ 'description': '',
+ 'upload_date': '20231031',
+ 'duration': 31.0,
+ 'thumbnail': 'https://video.watchuh.com/3a5d863e-9cd6-447e-b6ef-e289af50b349/thumbnail.jpg',
+ 'timestamp': 1698783879,
+ },
+ 'params': {'skip_download': True},
+ }, {
+ # URL requires token and expires
+ 'url': 'https://www.stockphotos.com/video/moscow-subway-the-train-is-arriving-at-the-park-kultury-station-10017830',
+ 'info_dict': {
+ 'id': '0b02fa20-4e8c-4140-8f87-f64d820a3386',
+ 'ext': 'mp4',
+ 'thumbnail': r're:^https?://.*\.b-cdn\.net/0b02fa20-4e8c-4140-8f87-f64d820a3386/thumbnail\.jpg',
+ 'title': 'Moscow subway. The train is arriving at the Park Kultury station.',
+ 'upload_date': '20240531',
+ 'duration': 18.0,
+ 'timestamp': 1717152269,
+ 'description': '',
+ },
+ 'params': {'skip_download': True},
+ }]
+
+ @classmethod
+ def _extract_embed_urls(cls, url, webpage):
+ for embed_url in super()._extract_embed_urls(url, webpage):
+ yield smuggle_url(embed_url, {'Referer': url})
+
+ def _real_extract(self, url):
+ url, smuggled_data = unsmuggle_url(url, {})
+
+ video_id, library_id = self._match_valid_url(url).group('id', 'library_id')
+ webpage = self._download_webpage(
+ f'https://iframe.mediadelivery.net/embed/{library_id}/{video_id}', video_id,
+ headers=traverse_obj(smuggled_data, {'Referer': 'Referer'}),
+ query=traverse_obj(parse_qs(url), {'token': 'token', 'expires': 'expires'}))
+
+ if html_title := self._html_extract_title(webpage, default=None) == '403':
+ raise ExtractorError(
+ 'This video is inaccessible. Setting a Referer header '
+ 'might be required to access the video', expected=True)
+ elif html_title == '404':
+ raise ExtractorError('This video does not exist', expected=True)
+
+ headers = {'Referer': url}
+
+ info = traverse_obj(self._parse_html5_media_entries(url, webpage, video_id, _headers=headers), 0) or {}
+ formats = info.get('formats') or []
+ subtitles = info.get('subtitles') or {}
+
+ original_url = self._search_regex(
+ r'(?:var|const|let)\s+originalUrl\s*=\s*["\']([^"\']+)["\']', webpage, 'original url', default=None)
+ if url_or_none(original_url):
+ urlh = self._request_webpage(
+ HEADRequest(original_url), video_id=video_id, note='Checking original',
+ headers=headers, fatal=False, expected_status=(403, 404))
+ if urlh and urlh.status == 200:
+ formats.append({
+ 'url': original_url,
+ 'format_id': 'source',
+ 'quality': 1,
+ 'http_headers': headers,
+ 'ext': urlhandle_detect_ext(urlh, default='mp4'),
+ 'filesize': int_or_none(urlh.get_header('Content-Length')),
+ })
+
+ # MediaCage Streams require activation and pings
+ src_url = self._search_regex(
+ r'\.setAttribute\([\'"]src[\'"],\s*[\'"]([^\'"]+)[\'"]\)', webpage, 'src url', default=None)
+ activation_url = self._search_regex(
+ r'loadUrl\([\'"]([^\'"]+/activate)[\'"]', webpage, 'activation url', default=None)
+ ping_url = self._search_regex(
+ r'loadUrl\([\'"]([^\'"]+/ping)[\'"]', webpage, 'ping url', default=None)
+ secret = traverse_obj(parse_qs(src_url), ('secret', 0))
+ context_id = traverse_obj(parse_qs(src_url), ('contextId', 0))
+ ping_data = {}
+ if src_url and activation_url and ping_url and secret and context_id:
+ self._download_webpage(
+ activation_url, video_id, headers=headers, note='Downloading activation data')
+
+ fmts, subs = self._extract_m3u8_formats_and_subtitles(
+ src_url, video_id, 'mp4', headers=headers, m3u8_id='hls', fatal=False)
+ for fmt in fmts:
+ fmt.update({
+ 'protocol': 'bunnycdn',
+ 'http_headers': headers,
+ })
+ formats.extend(fmts)
+ self._merge_subtitles(subs, target=subtitles)
+
+ ping_data = {
+ '_bunnycdn_ping_data': {
+ 'url': ping_url,
+ 'headers': headers,
+ 'secret': secret,
+ 'context_id': context_id,
+ },
+ }
+
+ return {
+ 'id': video_id,
+ 'formats': formats,
+ 'subtitles': subtitles,
+ **traverse_obj(webpage, ({find_element(id='main-video', html=True)}, {extract_attributes}, {
+ 'title': ('data-plyr-config', {json.loads}, 'title', {str}),
+ 'thumbnail': ('data-poster', {url_or_none}),
+ })),
+ **ping_data,
+ **self._search_json_ld(webpage, video_id, fatal=False),
+ }
diff --git a/yt_dlp/extractor/sovietscloset.py b/yt_dlp/extractor/sovietscloset.py
index 773ddd344..d35214aa8 100644
--- a/yt_dlp/extractor/sovietscloset.py
+++ b/yt_dlp/extractor/sovietscloset.py
@@ -1,5 +1,6 @@
+from .bunnycdn import BunnyCdnIE
from .common import InfoExtractor
-from ..utils import try_get, unified_timestamp
+from ..utils import make_archive_id, try_get, unified_timestamp
class SovietsClosetBaseIE(InfoExtractor):
@@ -43,7 +44,7 @@ class SovietsClosetIE(SovietsClosetBaseIE):
'url': 'https://sovietscloset.com/video/1337',
'md5': 'bd012b04b261725510ca5383074cdd55',
'info_dict': {
- 'id': '1337',
+ 'id': '2f0cfbf4-3588-43a9-a7d6-7c9ea3755e67',
'ext': 'mp4',
'title': 'The Witcher #13',
'thumbnail': r're:^https?://.*\.b-cdn\.net/2f0cfbf4-3588-43a9-a7d6-7c9ea3755e67/thumbnail\.jpg$',
@@ -55,20 +56,23 @@ class SovietsClosetIE(SovietsClosetBaseIE):
'upload_date': '20170413',
'uploader_id': 'SovietWomble',
'uploader_url': 'https://www.twitch.tv/SovietWomble',
- 'duration': 7007,
+ 'duration': 7008,
'was_live': True,
'availability': 'public',
'series': 'The Witcher',
'season': 'Misc',
'episode_number': 13,
'episode': 'Episode 13',
+ 'creators': ['SovietWomble'],
+ 'description': '',
+ '_old_archive_ids': ['sovietscloset 1337'],
},
},
{
'url': 'https://sovietscloset.com/video/1105',
'md5': '89fa928f183893cb65a0b7be846d8a90',
'info_dict': {
- 'id': '1105',
+ 'id': 'c0e5e76f-3a93-40b4-bf01-12343c2eec5d',
'ext': 'mp4',
'title': 'Arma 3 - Zeus Games #5',
'uploader': 'SovietWomble',
@@ -80,39 +84,20 @@ class SovietsClosetIE(SovietsClosetBaseIE):
'upload_date': '20160420',
'uploader_id': 'SovietWomble',
'uploader_url': 'https://www.twitch.tv/SovietWomble',
- 'duration': 8804,
+ 'duration': 8805,
'was_live': True,
'availability': 'public',
'series': 'Arma 3',
'season': 'Zeus Games',
'episode_number': 5,
'episode': 'Episode 5',
+ 'creators': ['SovietWomble'],
+ 'description': '',
+ '_old_archive_ids': ['sovietscloset 1105'],
},
},
]
- def _extract_bunnycdn_iframe(self, video_id, bunnycdn_id):
- iframe = self._download_webpage(
- f'https://iframe.mediadelivery.net/embed/5105/{bunnycdn_id}',
- video_id, note='Downloading BunnyCDN iframe', headers=self.MEDIADELIVERY_REFERER)
-
- m3u8_url = self._search_regex(r'(https?://.*?\.m3u8)', iframe, 'm3u8 url')
- thumbnail_url = self._search_regex(r'(https?://.*?thumbnail\.jpg)', iframe, 'thumbnail url')
-
- m3u8_formats = self._extract_m3u8_formats(m3u8_url, video_id, headers=self.MEDIADELIVERY_REFERER)
-
- if not m3u8_formats:
- duration = None
- else:
- duration = self._extract_m3u8_vod_duration(
- m3u8_formats[0]['url'], video_id, headers=self.MEDIADELIVERY_REFERER)
-
- return {
- 'formats': m3u8_formats,
- 'thumbnail': thumbnail_url,
- 'duration': duration,
- }
-
def _real_extract(self, url):
video_id = self._match_id(url)
webpage = self._download_webpage(url, video_id)
@@ -122,13 +107,13 @@ class SovietsClosetIE(SovietsClosetBaseIE):
stream = self.parse_nuxt_jsonp(f'{static_assets_base}/video/{video_id}/payload.js', video_id, 'video')['stream']
- return {
+ return self.url_result(
+ f'https://iframe.mediadelivery.net/embed/5105/{stream["bunnyId"]}', ie=BunnyCdnIE, url_transparent=True,
**self.video_meta(
video_id=video_id, game_name=stream['game']['name'],
category_name=try_get(stream, lambda x: x['subcategory']['name'], str),
episode_number=stream.get('number'), stream_date=stream.get('date')),
- **self._extract_bunnycdn_iframe(video_id, stream['bunnyId']),
- }
+ _old_archive_ids=[make_archive_id(self, video_id)])
class SovietsClosetPlaylistIE(SovietsClosetBaseIE):