diff options
author | Remita Amine <remitamine@gmail.com> | 2020-11-26 12:56:49 +0100 |
---|---|---|
committer | Remita Amine <remitamine@gmail.com> | 2020-11-26 12:56:49 +0100 |
commit | f9f9699f2fa87ec54d59b1834cc56adb6147fec6 (patch) | |
tree | 8f130f8ff829b0c0e743735fcf6c9da0bf54abfa /youtube_dl/extractor | |
parent | a3cf22e590f7057c50563f36c17b59993446dbdd (diff) |
[videa] improve extraction
Diffstat (limited to 'youtube_dl/extractor')
-rw-r--r-- | youtube_dl/extractor/videa.py | 99 |
1 files changed, 54 insertions, 45 deletions
diff --git a/youtube_dl/extractor/videa.py b/youtube_dl/extractor/videa.py index a03614cc1..ab2c15cde 100644 --- a/youtube_dl/extractor/videa.py +++ b/youtube_dl/extractor/videa.py @@ -1,10 +1,9 @@ # coding: utf-8 from __future__ import unicode_literals -import re import random +import re import string -import struct from .common import InfoExtractor from ..utils import ( @@ -12,13 +11,14 @@ from ..utils import ( int_or_none, mimetype2ext, parse_codecs, + update_url_query, xpath_element, xpath_text, ) from ..compat import ( compat_b64decode, compat_ord, - compat_parse_qs, + compat_struct_pack, ) @@ -28,7 +28,7 @@ class VideaIE(InfoExtractor): videa(?:kid)?\.hu/ (?: videok/(?:[^/]+/)*[^?#&]+-| - player\?.*?\bv=| + (?:videojs_)?player\?.*?\bv=| player/v/ ) (?P<id>[^?#&]+) @@ -62,6 +62,7 @@ class VideaIE(InfoExtractor): 'url': 'https://videakid.hu/player/v/8YfIAjxwWGwT8HVQ?autoplay=1', 'only_matching': True, }] + _STATIC_SECRET = 'xHb0ZvME5q8CBcoQi6AngerDu3FGO9fkUlwPmLVY_RTzj2hJIS4NasXWKy1td7p' @staticmethod def _extract_urls(webpage): @@ -69,75 +70,84 @@ class VideaIE(InfoExtractor): r'<iframe[^>]+src=(["\'])(?P<url>(?:https?:)?//videa\.hu/player\?.*?\bv=.+?)\1', webpage)] - def rc4(self, ciphertext, key): + @staticmethod + def rc4(cipher_text, key): res = b'' - keyLen = len(key) + key_len = len(key) S = list(range(256)) j = 0 for i in range(256): - j = (j + S[i] + ord(key[i % keyLen])) % 256 + j = (j + S[i] + ord(key[i % key_len])) % 256 S[i], S[j] = S[j], S[i] i = 0 j = 0 - for m in range(len(ciphertext)): + for m in range(len(cipher_text)): i = (i + 1) % 256 j = (j + S[i]) % 256 S[i], S[j] = S[j], S[i] k = S[(S[i] + S[j]) % 256] - res += struct.pack("B", k ^ compat_ord(ciphertext[m])) + res += compat_struct_pack('B', k ^ compat_ord(cipher_text[m])) - return res + return res.decode() def _real_extract(self, url): video_id = self._match_id(url) - webpage = self._download_webpage(url, video_id, fatal=True) - error = self._search_regex(r'<p class="error-text">([^<]+)</p>', webpage, 'error', default=None) - if error: - raise ExtractorError(error, expected=True) - - video_src_params_raw = self._search_regex(r'<iframe[^>]+id="videa_player_iframe"[^>]+src="/player\?([^"]+)"', webpage, 'video_src_params') - video_src_params = compat_parse_qs(video_src_params_raw) - player_page = self._download_webpage("https://videa.hu/videojs_player?%s" % video_src_params_raw, video_id, fatal=True) - nonce = self._search_regex(r'_xt\s*=\s*"([^"]+)"', player_page, 'nonce') - random_seed = ''.join(random.choice(string.ascii_uppercase + string.ascii_lowercase + string.digits) for _ in range(8)) - static_secret = 'xHb0ZvME5q8CBcoQi6AngerDu3FGO9fkUlwPmLVY_RTzj2hJIS4NasXWKy1td7p' + query = {'v': video_id} + player_page = self._download_webpage( + 'https://videa.hu/player', video_id, query=query) + + nonce = self._search_regex( + r'_xt\s*=\s*"([^"]+)"', player_page, 'nonce') l = nonce[:32] s = nonce[32:] result = '' for i in range(0, 32): - result += s[i - (static_secret.index(l[i]) - 31)] - - video_src_params['_s'] = random_seed - video_src_params['_t'] = result[:16] - encryption_key_stem = result[16:] + random_seed - - [b64_info, handle] = self._download_webpage_handle( - 'http://videa.hu/videaplayer_get_xml.php', video_id, - query=video_src_params, fatal=True) - - encrypted_info = compat_b64decode(b64_info) - key = encryption_key_stem + handle.info()['x-videa-xs'] - info_str = self.rc4(encrypted_info, key).decode('utf8') - info = self._parse_xml(info_str, video_id) - - video = xpath_element(info, './/video', 'video', fatal=True) - sources = xpath_element(info, './/video_sources', 'sources', fatal=True) - hash_values = xpath_element(info, './/hash_values', 'hash_values', fatal=True) + result += s[i - (self._STATIC_SECRET.index(l[i]) - 31)] + + random_seed = ''.join(random.choice(string.ascii_letters + string.digits) for _ in range(8)) + query['_s'] = random_seed + query['_t'] = result[:16] + + b64_info, handle = self._download_webpage_handle( + 'http://videa.hu/videaplayer_get_xml.php', video_id, query=query) + if b64_info.startswith('<?xml'): + info = self._parse_xml(b64_info, video_id) + else: + key = result[16:] + random_seed + handle.headers['x-videa-xs'] + info = self._parse_xml(self.rc4( + compat_b64decode(b64_info), key), video_id) + + video = xpath_element(info, './video', 'video') + if not video: + raise ExtractorError(xpath_element( + info, './error', fatal=True), expected=True) + sources = xpath_element( + info, './video_sources', 'sources', fatal=True) + hash_values = xpath_element( + info, './hash_values', 'hash values', fatal=True) title = xpath_text(video, './title', fatal=True) formats = [] for source in sources.findall('./video_source'): source_url = source.text - if not source_url: + source_name = source.get('name') + source_exp = source.get('exp') + if not (source_url and source_name and source_exp): continue - source_url += '?md5=%s&expires=%s' % (hash_values.find('hash_value_%s' % source.get('name')).text, source.get('exp')) + hash_value = xpath_text(hash_values, 'hash_value_' + source_name) + if not hash_value: + continue + source_url = update_url_query(source_url, { + 'md5': hash_value, + 'expires': source_exp, + }) f = parse_codecs(source.get('codecs')) f.update({ - 'url': source_url, + 'url': self._proto_relative_url(source_url), 'ext': mimetype2ext(source.get('mimetype')) or 'mp4', 'format_id': source.get('name'), 'width': int_or_none(source.get('width')), @@ -146,8 +156,7 @@ class VideaIE(InfoExtractor): formats.append(f) self._sort_formats(formats) - thumbnail = xpath_text(video, './poster_src') - duration = int_or_none(xpath_text(video, './duration')) + thumbnail = self._proto_relative_url(xpath_text(video, './poster_src')) age_limit = None is_adult = xpath_text(video, './is_adult_content', default=None) @@ -158,7 +167,7 @@ class VideaIE(InfoExtractor): 'id': video_id, 'title': title, 'thumbnail': thumbnail, - 'duration': duration, + 'duration': int_or_none(xpath_text(video, './duration')), 'age_limit': age_limit, 'formats': formats, } |