diff options
author | Remita Amine <remitamine@gmail.com> | 2020-12-30 13:43:56 +0100 |
---|---|---|
committer | Remita Amine <remitamine@gmail.com> | 2020-12-30 13:43:56 +0100 |
commit | bd18824c2a99d6d01b00edfa186b9fd227af255c (patch) | |
tree | 72d6a609dfbe4a87cff261ac3fad5f413e2aedd8 /youtube_dl/extractor/yandexdisk.py | |
parent | bdd044e67b5d10736aa712e9be64beff0d47f490 (diff) |
[yandexdisk] fix extraction(closes #17861)(closes #27131)
Diffstat (limited to 'youtube_dl/extractor/yandexdisk.py')
-rw-r--r-- | youtube_dl/extractor/yandexdisk.py | 144 |
1 files changed, 84 insertions, 60 deletions
diff --git a/youtube_dl/extractor/yandexdisk.py b/youtube_dl/extractor/yandexdisk.py index e8f6ae10f..21f37c192 100644 --- a/youtube_dl/extractor/yandexdisk.py +++ b/youtube_dl/extractor/yandexdisk.py @@ -1,19 +1,40 @@ # coding: utf-8 from __future__ import unicode_literals +import json + from .common import InfoExtractor -from ..compat import compat_str +from ..compat import compat_HTTPError from ..utils import ( determine_ext, + ExtractorError, float_or_none, int_or_none, - try_get, - urlencode_postdata, + mimetype2ext, + parse_iso8601, + urljoin, ) class YandexDiskIE(InfoExtractor): - _VALID_URL = r'https?://yadi\.sk/[di]/(?P<id>[^/?#&]+)' + _VALID_URL = r'''(?x)https?:// + (?: + (?:www\.)?yadi\.sk| + disk\.yandex\. + (?: + az| + by| + co(?:m(?:\.(?:am|ge|tr))?|\.il)| + ee| + fr| + k[gz]| + l[tv]| + md| + t[jm]| + u[az]| + ru + ) + )/(?:[di]/|public.*?\bhash=)(?P<id>[^/?#&]+)''' _TESTS = [{ 'url': 'https://yadi.sk/i/VdOeDou8eZs6Y', @@ -25,94 +46,97 @@ class YandexDiskIE(InfoExtractor): 'duration': 168.6, 'uploader': 'y.botova', 'uploader_id': '300043621', + 'timestamp': 1421396809, + 'upload_date': '20150116', 'view_count': int, }, }, { 'url': 'https://yadi.sk/d/h3WAXvDS3Li3Ce', 'only_matching': True, + }, { + 'url': 'https://yadi.sk/public?hash=5DZ296JK9GWCLp02f6jrObjnctjRxMs8L6%2B%2FuhNqk38%3D', + 'only_matching': True, }] def _real_extract(self, url): video_id = self._match_id(url) - status = self._download_webpage( - 'https://disk.yandex.com/auth/status', video_id, query={ - 'urlOrigin': url, - 'source': 'public', - 'md5': 'false', - }) - - sk = self._search_regex( - r'(["\'])sk(?:External)?\1\s*:\s*(["\'])(?P<value>(?:(?!\2).)+)\2', - status, 'sk', group='value') - - webpage = self._download_webpage(url, video_id) - - models = self._parse_json( - self._search_regex( - r'<script[^>]+id=["\']models-client[^>]+>\s*(\[.+?\])\s*</script', - webpage, 'video JSON'), - video_id) - - data = next( - model['data'] for model in models - if model.get('model') == 'resource') - - video_hash = data['id'] - title = data['name'] - - models = self._download_json( - 'https://disk.yandex.com/models/', video_id, - data=urlencode_postdata({ - '_model.0': 'videoInfo', - 'id.0': video_hash, - '_model.1': 'do-get-resource-url', - 'id.1': video_hash, - 'version': '13.6', - 'sk': sk, - }), query={'_m': 'videoInfo'})['models'] - - videos = try_get(models, lambda x: x[0]['data']['videos'], list) or [] - source_url = try_get( - models, lambda x: x[1]['data']['file'], compat_str) + try: + resource = self._download_json( + 'https://cloud-api.yandex.net/v1/disk/public/resources', + video_id, query={'public_key': url}) + except ExtractorError as e: + if isinstance(e.cause, compat_HTTPError) and e.cause.code == 403: + error_description = self._parse_json( + e.cause.read().decode(), video_id)['description'] + raise ExtractorError(error_description, expected=True) + raise + + title = resource['name'] + public_url = resource.get('public_url') + if public_url: + video_id = self._match_id(public_url) + + self._set_cookie('yadi.sk', 'yandexuid', '0') + + def call_api(action): + return (self._download_json( + urljoin(url, '/public/api/') + action, video_id, data=json.dumps({ + 'hash': url, + # obtain sk if needed from call_api('check-auth') while + # the yandexuid cookie is set and sending an empty JSON object + 'sk': 'ya6b52f8c6b12abe91a66d22d3a31084b' + }).encode(), headers={ + 'Content-Type': 'text/plain', + }, fatal=False) or {}).get('data') or {} formats = [] + source_url = resource.get('file') + if not source_url: + source_url = call_api('download-url').get('url') if source_url: formats.append({ 'url': source_url, 'format_id': 'source', - 'ext': determine_ext(title, 'mp4'), + 'ext': determine_ext(title, mimetype2ext(resource.get('mime_type')) or 'mp4'), 'quality': 1, + 'filesize': int_or_none(resource.get('size')) }) - for video in videos: + + video_streams = call_api('get-video-streams') + for video in (video_streams.get('videos') or []): format_url = video.get('url') if not format_url: continue - if determine_ext(format_url) == 'm3u8': + if video.get('dimension') == 'adaptive': formats.extend(self._extract_m3u8_formats( - format_url, video_id, 'mp4', entry_protocol='m3u8_native', + format_url, video_id, 'mp4', 'm3u8_native', m3u8_id='hls', fatal=False)) else: + size = video.get('size') or {} + height = int_or_none(size.get('height')) + format_id = 'hls' + if height: + format_id += '-%dp' % height formats.append({ + 'ext': 'mp4', + 'format_id': format_id, + 'height': height, + 'protocol': 'm3u8_native', 'url': format_url, + 'width': int_or_none(size.get('width')), }) self._sort_formats(formats) - duration = float_or_none(try_get( - models, lambda x: x[0]['data']['duration']), 1000) - uploader = try_get( - data, lambda x: x['user']['display_name'], compat_str) - uploader_id = try_get( - data, lambda x: x['user']['uid'], compat_str) - view_count = int_or_none(try_get( - data, lambda x: x['meta']['views_counter'])) + owner = resource.get('owner') or {} return { 'id': video_id, 'title': title, - 'duration': duration, - 'uploader': uploader, - 'uploader_id': uploader_id, - 'view_count': view_count, + 'duration': float_or_none(video_streams.get('duration'), 1000), + 'uploader': owner.get('display_name'), + 'uploader_id': owner.get('uid'), + 'view_count': int_or_none(resource.get('views_count')), + 'timestamp': parse_iso8601(resource.get('created')), 'formats': formats, } |