aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorSergey M․ <dstftw@gmail.com>2021-02-08 15:46:32 +0700
committerSergey M․ <dstftw@gmail.com>2021-02-08 15:47:12 +0700
commit311ebdd9a57e72116136a464fbc0fa8cad32db42 (patch)
tree11ed3a0da312e44d1dee7d09593bf5c78e0d1da9
parent99c68db0a8adc634e2e928ea2756a2ceee3ae863 (diff)
downloadyoutube-dl-311ebdd9a57e72116136a464fbc0fa8cad32db42.tar.xz
[xhamster] Extract formats from xplayer settings and extract filesizes (closes #28114)
-rw-r--r--youtube_dl/extractor/xhamster.py80
1 files changed, 68 insertions, 12 deletions
diff --git a/youtube_dl/extractor/xhamster.py b/youtube_dl/extractor/xhamster.py
index 76aeaf9a4..f73b9778f 100644
--- a/youtube_dl/extractor/xhamster.py
+++ b/youtube_dl/extractor/xhamster.py
@@ -11,11 +11,14 @@ from ..utils import (
dict_get,
extract_attributes,
ExtractorError,
+ float_or_none,
int_or_none,
parse_duration,
+ str_or_none,
try_get,
unified_strdate,
url_or_none,
+ urljoin,
)
@@ -146,36 +149,89 @@ class XHamsterIE(InfoExtractor):
video = initials['videoModel']
title = video['title']
formats = []
- for format_id, formats_dict in video['sources'].items():
+ format_urls = set()
+ format_sizes = {}
+ sources = try_get(video, lambda x: x['sources'], dict) or {}
+ for format_id, formats_dict in sources.items():
if not isinstance(formats_dict, dict):
continue
+ download_sources = try_get(sources, lambda x: x['download'], dict) or {}
+ for quality, format_dict in download_sources.items():
+ if not isinstance(format_dict, dict):
+ continue
+ format_sizes[quality] = float_or_none(format_dict.get('size'))
for quality, format_item in formats_dict.items():
if format_id == 'download':
# Download link takes some time to be generated,
# skipping for now
continue
- if not isinstance(format_item, dict):
- continue
- format_url = format_item.get('link')
- filesize = int_or_none(
- format_item.get('size'), invscale=1000000)
- else:
- format_url = format_item
- filesize = None
+ format_url = format_item
format_url = url_or_none(format_url)
- if not format_url:
+ if not format_url or format_url in format_urls:
continue
+ format_urls.add(format_url)
formats.append({
'format_id': '%s-%s' % (format_id, quality),
'url': format_url,
'ext': determine_ext(format_url, 'mp4'),
'height': get_height(quality),
- 'filesize': filesize,
+ 'filesize': format_sizes.get(quality),
'http_headers': {
'Referer': urlh.geturl(),
},
})
- self._sort_formats(formats)
+ xplayer_sources = try_get(
+ initials, lambda x: x['xplayerSettings']['sources'], dict)
+ if xplayer_sources:
+ hls_sources = xplayer_sources.get('hls')
+ if isinstance(hls_sources, dict):
+ for hls_format_key in ('url', 'fallback'):
+ hls_url = hls_sources.get(hls_format_key)
+ if not hls_url:
+ continue
+ hls_url = urljoin(url, hls_url)
+ if not hls_url or hls_url in format_urls:
+ continue
+ format_urls.add(hls_url)
+ formats.extend(self._extract_m3u8_formats(
+ hls_url, video_id, 'mp4', entry_protocol='m3u8_native',
+ m3u8_id='hls', fatal=False))
+ standard_sources = xplayer_sources.get('standard')
+ if isinstance(standard_sources, dict):
+ for format_id, formats_list in standard_sources.items():
+ if not isinstance(formats_list, list):
+ continue
+ for standard_format in formats_list:
+ if not isinstance(standard_format, dict):
+ continue
+ for standard_format_key in ('url', 'fallback'):
+ standard_url = standard_format.get(standard_format_key)
+ if not standard_url:
+ continue
+ standard_url = urljoin(url, standard_url)
+ if not standard_url or standard_url in format_urls:
+ continue
+ format_urls.add(standard_url)
+ ext = determine_ext(standard_url, 'mp4')
+ if ext == 'm3u8':
+ formats.extend(self._extract_m3u8_formats(
+ standard_url, video_id, 'mp4', entry_protocol='m3u8_native',
+ m3u8_id='hls', fatal=False))
+ continue
+ quality = (str_or_none(standard_format.get('quality'))
+ or str_or_none(standard_format.get('label'))
+ or '')
+ formats.append({
+ 'format_id': '%s-%s' % (format_id, quality),
+ 'url': standard_url,
+ 'ext': ext,
+ 'height': get_height(quality),
+ 'filesize': format_sizes.get(quality),
+ 'http_headers': {
+ 'Referer': standard_url,
+ },
+ })
+ self._sort_formats(formats, field_preference=('height', 'width', 'tbr', 'format_id'))
categories_list = video.get('categories')
if isinstance(categories_list, list):