aboutsummaryrefslogtreecommitdiff
path: root/youtube_dl/extractor
diff options
context:
space:
mode:
authordirkf <fieldhouse@gmx.net>2022-11-13 14:59:30 +0000
committerGitHub <noreply@github.com>2022-11-13 14:59:30 +0000
commitfc2beab0e701c497a003f11fef5c0df54fba1da3 (patch)
treeac324114fad110f75a1de8645bd0767b8df1765e /youtube_dl/extractor
parent1a4fbe8462f5e531a891aeac7db6c0bde49c5536 (diff)
downloadyoutube-dl-fc2beab0e701c497a003f11fef5c0df54fba1da3.tar.xz
[generic] Improve KVS (etc) extraction
* detect kt_player('kt_player', 'https://.../kt_player.swf?v=5... * detect age limit if 18 USC 2257 is mentioned * test with shooshtime.com Partially resolves #31332.
Diffstat (limited to 'youtube_dl/extractor')
-rw-r--r--youtube_dl/extractor/generic.py29
1 files changed, 22 insertions, 7 deletions
diff --git a/youtube_dl/extractor/generic.py b/youtube_dl/extractor/generic.py
index 597611157..3e8281ed3 100644
--- a/youtube_dl/extractor/generic.py
+++ b/youtube_dl/extractor/generic.py
@@ -36,6 +36,7 @@ from ..utils import (
unsmuggle_url,
UnsupportedError,
url_or_none,
+ urljoin,
xpath_attr,
xpath_text,
xpath_with_ns,
@@ -2308,6 +2309,17 @@ class GenericIE(InfoExtractor):
'height': 720,
'age_limit': 18,
},
+ }, {
+ 'url': 'https://shooshtime.com/videos/284002/just-out-of-the-shower-joi/',
+ 'md5': 'e2f0a4c329f7986280b7328e24036d60',
+ 'info_dict': {
+ 'id': '284002',
+ 'display_id': 'just-out-of-the-shower-joi',
+ 'ext': 'mp4',
+ 'title': 'Just Out Of The Shower JOI - Shooshtime',
+ 'height': 720,
+ 'age_limit': 18,
+ },
},
]
@@ -2477,7 +2489,7 @@ class GenericIE(InfoExtractor):
format_id = flashvars.get(key + '_text', key)
formats.append(merge_dicts(
parse_resolution(format_id) or parse_resolution(flashvars[key]), {
- 'url': getrealurl(flashvars[key], flashvars['license_code']),
+ 'url': urljoin(url, getrealurl(flashvars[key], flashvars['license_code'])),
'format_id': format_id,
'ext': 'mp4',
'http_headers': {'Referer': url},
@@ -2704,6 +2716,7 @@ class GenericIE(InfoExtractor):
AGE_LIMIT_MARKERS = [
r'Proudly Labeled <a href="http://www\.rtalabel\.org/" title="Restricted to Adults">RTA</a>',
r'>[^<]*you acknowledge you are at least (\d+) years old',
+ r'>\s*(?:18\s+U(?:\.S\.C\.|SC)\s+)?(?:ยง+\s*)?2257\b',
]
for marker in AGE_LIMIT_MARKERS:
m = re.search(marker, webpage)
@@ -3559,13 +3572,15 @@ class GenericIE(InfoExtractor):
return info_dict
# Look for generic KVS player (before ld+json for tests)
- found = re.search(
- r'<script\b[^>]+?\bsrc\s*=\s*(["\'])https?://(?:\S+?/)+kt_player\.js\?v=(?P<ver>(?P<maj_ver>\d+)(\.\d+)+)\1[^>]*>',
- webpage)
+ found = self._search_regex(
+ (r'<script\b[^>]+?\bsrc\s*=\s*(["\'])https?://(?:\S+?/)+kt_player\.js\?v=(?P<ver>\d+(?:\.\d+)+)\1[^>]*>',
+ # kt_player('kt_player', 'https://i.shoosh.co/player/kt_player.swf?v=5.5.1', ...
+ r'kt_player\s*\(\s*(["\'])(?:(?!\1)[\w\W])+\1\s*,\s*(["\'])https?://(?:\S+?/)+kt_player\.swf\?v=(?P<ver>\d+(?:\.\d+)+)\2\s*,',
+ ), webpage, 'KVS player', group='ver', default=False)
if found:
- self.report_extraction('KVS Player')
- if found.group('maj_ver') not in ('4', '5', '6'):
- self.report_warning('Untested major version (%s) in player engine - download may fail.' % (found.group('ver'), ))
+ self.report_extraction('%s: KVS Player' % (video_id, ))
+ if found.split('.')[0] not in ('4', '5', '6'):
+ self.report_warning('Untested major version (%s) in player engine - download may fail.' % (found, ))
return merge_dicts(
self._extract_kvs(url, webpage, video_id),
info_dict)