aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorSimon Sawicki <contact@grub4k.xyz>2024-10-13 03:42:43 +0200
committerGitHub <noreply@github.com>2024-10-13 03:42:43 +0200
commitedfd095b1917701c5046bd51f9542897c17d41a7 (patch)
tree7748ab949fd5ad14f5160960092e1cff7ae2788f
parentc5f0f58efd8c3930de8202c15a5c53b1b635bd51 (diff)
[ie/generic] Impersonate browser by default (#11206)
Also adds `impersonate` extractor arg Authored by: Grub4K
-rw-r--r--README.md1
-rw-r--r--yt_dlp/extractor/generic.py9
2 files changed, 9 insertions, 1 deletions
diff --git a/README.md b/README.md
index fbf50072d..4b1ada82e 100644
--- a/README.md
+++ b/README.md
@@ -1795,6 +1795,7 @@ The following extractors use this feature:
* `key_query`: Passthrough the master m3u8 URL query to its HLS AES-128 decryption key URI if no value is provided, or else apply the query string given as `key_query=VALUE`. Note that this will have no effect if the key URI is provided via the `hls_key` extractor-arg. Does not apply to ffmpeg
* `hls_key`: An HLS AES-128 key URI *or* key (as hex), and optionally the IV (as hex), in the form of `(URI|KEY)[,IV]`; e.g. `generic:hls_key=ABCDEF1234567980,0xFEDCBA0987654321`. Passing any of these values will force usage of the native HLS downloader and override the corresponding values found in the m3u8 playlist
* `is_live`: Bypass live HLS detection and manually set `live_status` - a value of `false` will set `not_live`, any other value (or no value) will set `is_live`
+* `impersonate`: Target(s) to try and impersonate with the initial webpage request; e.g. `safari,chrome-110`. By default any available target will be used. Use `false` to disable impersonation
#### funimation
* `language`: Audio languages to extract, e.g. `funimation:language=english,japanese`
diff --git a/yt_dlp/extractor/generic.py b/yt_dlp/extractor/generic.py
index 592800287..9b5421e41 100644
--- a/yt_dlp/extractor/generic.py
+++ b/yt_dlp/extractor/generic.py
@@ -8,6 +8,7 @@ from .common import InfoExtractor
from .commonprotocols import RtmpIE
from .youtube import YoutubeIE
from ..compat import compat_etree_fromstring
+from ..networking.impersonate import ImpersonateTarget
from ..utils import (
KNOWN_EXTENSIONS,
MEDIA_EXTENSIONS,
@@ -2373,6 +2374,12 @@ class GenericIE(InfoExtractor):
else:
video_id = self._generic_id(url)
+ # Try to impersonate a web-browser by default if possible
+ # Skip impersonation if not available to omit the warning
+ impersonate = self._configuration_arg('impersonate', [''])
+ if 'false' in impersonate or not self._downloader._impersonate_target_available(ImpersonateTarget()):
+ impersonate = None
+
# Some webservers may serve compressed content of rather big size (e.g. gzipped flac)
# making it impossible to download only chunk of the file (yet we need only 512kB to
# test whether it's HTML or not). According to yt-dlp default Accept-Encoding
@@ -2384,7 +2391,7 @@ class GenericIE(InfoExtractor):
full_response = self._request_webpage(url, video_id, headers=filter_dict({
'Accept-Encoding': 'identity',
'Referer': smuggled_data.get('referer'),
- }))
+ }), impersonate=impersonate)
new_url = full_response.url
if new_url != extract_basic_auth(url)[0]:
self.report_following_redirect(new_url)