aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorbashonly <bashonly@bashonly.com>2023-08-16 18:42:48 -0500
committerSimon Sawicki <contact@grub4k.xyz>2023-11-14 22:04:25 +0100
commitf04b5bedad7b281bee9814686bba1762bae092eb (patch)
treec18255b7b8d917a21ef1d4aa1fd66b62806fed8c
parentd4f14a72dc1dd79396e0e80980268aee902b61e4 (diff)
[ie] Do not smuggle `http_headers`
See: https://github.com/yt-dlp/yt-dlp/security/advisories/GHSA-3ch3-jhc6-5r8x Authored by: coletdjnz
-rw-r--r--test/test_networking.py4
-rw-r--r--yt_dlp/extractor/cybrary.py2
-rw-r--r--yt_dlp/extractor/duboku.py2
-rw-r--r--yt_dlp/extractor/embedly.py2
-rw-r--r--yt_dlp/extractor/generic.py11
-rw-r--r--yt_dlp/extractor/slideslive.py2
-rw-r--r--yt_dlp/extractor/storyfire.py4
-rw-r--r--yt_dlp/extractor/vimeo.py6
-rw-r--r--yt_dlp/utils/networking.py1
9 files changed, 19 insertions, 15 deletions
diff --git a/test/test_networking.py b/test/test_networking.py
index 689161fb2..4466fc048 100644
--- a/test/test_networking.py
+++ b/test/test_networking.py
@@ -1293,6 +1293,10 @@ class TestYoutubeDLNetworking:
assert 'Youtubedl-no-compression' not in rh.headers
assert rh.headers.get('Accept-Encoding') == 'identity'
+ with FakeYDL({'http_headers': {'Ytdl-socks-proxy': 'socks://localhost:1080'}}) as ydl:
+ rh = self.build_handler(ydl)
+ assert 'Ytdl-socks-proxy' not in rh.headers
+
def test_build_handler_params(self):
with FakeYDL({
'http_headers': {'test': 'testtest'},
diff --git a/yt_dlp/extractor/cybrary.py b/yt_dlp/extractor/cybrary.py
index aeffe93b4..c4c78ee1b 100644
--- a/yt_dlp/extractor/cybrary.py
+++ b/yt_dlp/extractor/cybrary.py
@@ -105,7 +105,7 @@ class CybraryIE(CybraryBaseIE):
'chapter': module.get('title'),
'chapter_id': str_or_none(module.get('id')),
'title': activity.get('title'),
- 'url': smuggle_url(f'https://player.vimeo.com/video/{vimeo_id}', {'http_headers': {'Referer': 'https://api.cybrary.it'}})
+ 'url': smuggle_url(f'https://player.vimeo.com/video/{vimeo_id}', {'referer': 'https://api.cybrary.it'})
}
diff --git a/yt_dlp/extractor/duboku.py b/yt_dlp/extractor/duboku.py
index fb0546cae..fc9564cef 100644
--- a/yt_dlp/extractor/duboku.py
+++ b/yt_dlp/extractor/duboku.py
@@ -138,7 +138,7 @@ class DubokuIE(InfoExtractor):
# of the video.
return {
'_type': 'url_transparent',
- 'url': smuggle_url(data_url, {'http_headers': headers}),
+ 'url': smuggle_url(data_url, {'referer': webpage_url}),
'id': video_id,
'title': title,
'series': series_title,
diff --git a/yt_dlp/extractor/embedly.py b/yt_dlp/extractor/embedly.py
index 458aaa0a0..a424b49df 100644
--- a/yt_dlp/extractor/embedly.py
+++ b/yt_dlp/extractor/embedly.py
@@ -106,4 +106,4 @@ class EmbedlyIE(InfoExtractor):
return self.url_result(src, YoutubeTabIE)
return self.url_result(smuggle_url(
urllib.parse.unquote(traverse_obj(qs, ('src', 0), ('url', 0))),
- {'http_headers': {'Referer': url}}))
+ {'referer': url}))
diff --git a/yt_dlp/extractor/generic.py b/yt_dlp/extractor/generic.py
index ac7cc673f..1503e5146 100644
--- a/yt_dlp/extractor/generic.py
+++ b/yt_dlp/extractor/generic.py
@@ -17,6 +17,7 @@ from ..utils import (
determine_protocol,
dict_get,
extract_basic_auth,
+ filter_dict,
format_field,
int_or_none,
is_html,
@@ -2435,10 +2436,10 @@ class GenericIE(InfoExtractor):
# to accept raw bytes and being able to download only a chunk.
# It may probably better to solve this by checking Content-Type for application/octet-stream
# after a HEAD request, but not sure if we can rely on this.
- full_response = self._request_webpage(url, video_id, headers={
+ full_response = self._request_webpage(url, video_id, headers=filter_dict({
'Accept-Encoding': 'identity',
- **smuggled_data.get('http_headers', {})
- })
+ 'Referer': smuggled_data.get('referer'),
+ }))
new_url = full_response.url
url = urllib.parse.urlparse(url)._replace(scheme=urllib.parse.urlparse(new_url).scheme).geturl()
if new_url != extract_basic_auth(url)[0]:
@@ -2458,7 +2459,7 @@ class GenericIE(InfoExtractor):
m = re.match(r'^(?P<type>audio|video|application(?=/(?:ogg$|(?:vnd\.apple\.|x-)?mpegurl)))/(?P<format_id>[^;\s]+)', content_type)
if m:
self.report_detected('direct video link')
- headers = smuggled_data.get('http_headers', {})
+ headers = filter_dict({'Referer': smuggled_data.get('referer')})
format_id = str(m.group('format_id'))
ext = determine_ext(url, default_ext=None) or urlhandle_detect_ext(full_response)
subtitles = {}
@@ -2710,7 +2711,7 @@ class GenericIE(InfoExtractor):
'url': smuggle_url(json_ld['url'], {
'force_videoid': video_id,
'to_generic': True,
- 'http_headers': {'Referer': url},
+ 'referer': url,
}),
}, json_ld)]
diff --git a/yt_dlp/extractor/slideslive.py b/yt_dlp/extractor/slideslive.py
index 13f3109d7..df2af3b35 100644
--- a/yt_dlp/extractor/slideslive.py
+++ b/yt_dlp/extractor/slideslive.py
@@ -530,7 +530,7 @@ class SlidesLiveIE(InfoExtractor):
if service_name == 'vimeo':
info['url'] = smuggle_url(
f'https://player.vimeo.com/video/{service_id}',
- {'http_headers': {'Referer': url}})
+ {'referer': url})
video_slides = traverse_obj(slides, ('slides', ..., 'video', 'id'))
if not video_slides:
diff --git a/yt_dlp/extractor/storyfire.py b/yt_dlp/extractor/storyfire.py
index 035747c31..566f77782 100644
--- a/yt_dlp/extractor/storyfire.py
+++ b/yt_dlp/extractor/storyfire.py
@@ -32,9 +32,7 @@ class StoryFireBaseIE(InfoExtractor):
'description': video.get('description'),
'url': smuggle_url(
'https://player.vimeo.com/video/' + vimeo_id, {
- 'http_headers': {
- 'Referer': 'https://storyfire.com/',
- }
+ 'referer': 'https://storyfire.com/',
}),
'thumbnail': video.get('storyImage'),
'view_count': int_or_none(video.get('views')),
diff --git a/yt_dlp/extractor/vimeo.py b/yt_dlp/extractor/vimeo.py
index e72fa50fa..e5e8144bb 100644
--- a/yt_dlp/extractor/vimeo.py
+++ b/yt_dlp/extractor/vimeo.py
@@ -37,14 +37,14 @@ class VimeoBaseInfoExtractor(InfoExtractor):
@staticmethod
def _smuggle_referrer(url, referrer_url):
- return smuggle_url(url, {'http_headers': {'Referer': referrer_url}})
+ return smuggle_url(url, {'referer': referrer_url})
def _unsmuggle_headers(self, url):
"""@returns (url, smuggled_data, headers)"""
url, data = unsmuggle_url(url, {})
headers = self.get_param('http_headers').copy()
- if 'http_headers' in data:
- headers.update(data['http_headers'])
+ if 'referer' in data:
+ headers['Referer'] = data['referer']
return url, data, headers
def _perform_login(self, username, password):
diff --git a/yt_dlp/utils/networking.py b/yt_dlp/utils/networking.py
index ba0493cc2..ed0250011 100644
--- a/yt_dlp/utils/networking.py
+++ b/yt_dlp/utils/networking.py
@@ -123,6 +123,7 @@ def clean_headers(headers: HTTPHeaderDict):
if 'Youtubedl-No-Compression' in headers: # compat
del headers['Youtubedl-No-Compression']
headers['Accept-Encoding'] = 'identity'
+ headers.pop('Ytdl-socks-proxy', None)
def remove_dot_segments(path):