aboutsummaryrefslogtreecommitdiff
path: root/yt_dlp/extractor/generic.py
diff options
context:
space:
mode:
authorbashonly <88596187+bashonly@users.noreply.github.com>2024-10-24 23:11:48 +0000
committerGitHub <noreply@github.com>2024-10-24 23:11:48 +0000
commitc29f5a7fae93a08f3cfbb6127b2faa75145b06a0 (patch)
tree832e9dc27befb7fae292a762c2beacbcc30adb7b /yt_dlp/extractor/generic.py
parentec9b25043f399de6a591d8370d32bf0e66c117f2 (diff)
[ie/generic] Do not impersonate by default (fix edfd095b1917701c5046bd51f9542897c17d41a7) (#11336)
Closes #11335 Authored by: bashonly
Diffstat (limited to 'yt_dlp/extractor/generic.py')
-rw-r--r--yt_dlp/extractor/generic.py36
1 files changed, 28 insertions, 8 deletions
diff --git a/yt_dlp/extractor/generic.py b/yt_dlp/extractor/generic.py
index 9b5421e41..320a47772 100644
--- a/yt_dlp/extractor/generic.py
+++ b/yt_dlp/extractor/generic.py
@@ -8,6 +8,8 @@ from .common import InfoExtractor
from .commonprotocols import RtmpIE
from .youtube import YoutubeIE
from ..compat import compat_etree_fromstring
+from ..cookies import LenientSimpleCookie
+from ..networking.exceptions import HTTPError
from ..networking.impersonate import ImpersonateTarget
from ..utils import (
KNOWN_EXTENSIONS,
@@ -2374,10 +2376,9 @@ class GenericIE(InfoExtractor):
else:
video_id = self._generic_id(url)
- # Try to impersonate a web-browser by default if possible
- # Skip impersonation if not available to omit the warning
- impersonate = self._configuration_arg('impersonate', [''])
- if 'false' in impersonate or not self._downloader._impersonate_target_available(ImpersonateTarget()):
+ # Do not impersonate by default; see https://github.com/yt-dlp/yt-dlp/issues/11335
+ impersonate = self._configuration_arg('impersonate', ['false'])
+ if 'false' in impersonate:
impersonate = None
# Some webservers may serve compressed content of rather big size (e.g. gzipped flac)
@@ -2388,10 +2389,29 @@ class GenericIE(InfoExtractor):
# to accept raw bytes and being able to download only a chunk.
# It may probably better to solve this by checking Content-Type for application/octet-stream
# after a HEAD request, but not sure if we can rely on this.
- full_response = self._request_webpage(url, video_id, headers=filter_dict({
- 'Accept-Encoding': 'identity',
- 'Referer': smuggled_data.get('referer'),
- }), impersonate=impersonate)
+ try:
+ full_response = self._request_webpage(url, video_id, headers=filter_dict({
+ 'Accept-Encoding': 'identity',
+ 'Referer': smuggled_data.get('referer'),
+ }), impersonate=impersonate)
+ except ExtractorError as e:
+ if not (isinstance(e.cause, HTTPError) and e.cause.status == 403
+ and e.cause.response.get_header('cf-mitigated') == 'challenge'
+ and e.cause.response.extensions.get('impersonate') is None):
+ raise
+ cf_cookie_domain = traverse_obj(
+ LenientSimpleCookie(e.cause.response.get_header('set-cookie')),
+ ('__cf_bm', 'domain'))
+ if cf_cookie_domain:
+ self.write_debug(f'Clearing __cf_bm cookie for {cf_cookie_domain}')
+ self.cookiejar.clear(domain=cf_cookie_domain, path='/', name='__cf_bm')
+ msg = 'Got HTTP Error 403 caused by Cloudflare anti-bot challenge; '
+ if not self._downloader._impersonate_target_available(ImpersonateTarget()):
+ msg += ('see https://github.com/yt-dlp/yt-dlp#impersonation for '
+ 'how to install the required impersonation dependency, and ')
+ raise ExtractorError(
+ f'{msg}try again with --extractor-args "generic:impersonate"', expected=True)
+
new_url = full_response.url
if new_url != extract_basic_auth(url)[0]:
self.report_following_redirect(new_url)