[videa] Adapt to updates (#26301)

closes #25973, closes #25650.
author: Adrian Heine né Lang <mail@adrianheine.de> 2020-11-26 12:55:06 +0100
committer: GitHub <noreply@github.com> 2020-11-26 11:55:06 +0000
commit: a3cf22e590f7057c50563f36c17b59993446dbdd (patch)
tree: 097e3c75cb290736da0946ab09a1a45c843082dc /youtube_dl/extractor
parent: 99de2f38d375f794ffdc4f5a110a5227310f3ab0 (diff)
1 files changed, 60 insertions, 2 deletions
diff --git a/youtube_dl/extractor/videa.py b/youtube_dl/extractor/videa.py
index d0e34c819..a03614cc1 100644
--- a/youtube_dl/extractor/videa.py
+++ b/youtube_dl/extractor/videa.py
@@ -2,15 +2,24 @@
 from __future__ import unicode_literals
 
 import re
+import random
+import string
+import struct
 
 from .common import InfoExtractor
 from ..utils import (
+    ExtractorError,
     int_or_none,
     mimetype2ext,
     parse_codecs,
     xpath_element,
     xpath_text,
 )
+from ..compat import (
+    compat_b64decode,
+    compat_ord,
+    compat_parse_qs,
+)
 
 
 class VideaIE(InfoExtractor):
@@ -60,15 +69,63 @@ class VideaIE(InfoExtractor):
             r'<iframe[^>]+src=(["\'])(?P<url>(?:https?:)?//videa\.hu/player\?.*?\bv=.+?)\1',
             webpage)]
 
+    def rc4(self, ciphertext, key):
+        res = b''
+
+        keyLen = len(key)
+        S = list(range(256))
+
+        j = 0
+        for i in range(256):
+            j = (j + S[i] + ord(key[i % keyLen])) % 256
+            S[i], S[j] = S[j], S[i]
+
+        i = 0
+        j = 0
+        for m in range(len(ciphertext)):
+            i = (i + 1) % 256
+            j = (j + S[i]) % 256
+            S[i], S[j] = S[j], S[i]
+            k = S[(S[i] + S[j]) % 256]
+            res += struct.pack("B", k ^ compat_ord(ciphertext[m]))
+
+        return res
+
     def _real_extract(self, url):
         video_id = self._match_id(url)
+        webpage = self._download_webpage(url, video_id, fatal=True)
+        error = self._search_regex(r'<p class="error-text">([^<]+)</p>', webpage, 'error', default=None)
+        if error:
+            raise ExtractorError(error, expected=True)
+
+        video_src_params_raw = self._search_regex(r'<iframe[^>]+id="videa_player_iframe"[^>]+src="/player\?([^"]+)"', webpage, 'video_src_params')
+        video_src_params = compat_parse_qs(video_src_params_raw)
+        player_page = self._download_webpage("https://videa.hu/videojs_player?%s" % video_src_params_raw, video_id, fatal=True)
+        nonce = self._search_regex(r'_xt\s*=\s*"([^"]+)"', player_page, 'nonce')
+        random_seed = ''.join(random.choice(string.ascii_uppercase + string.ascii_lowercase + string.digits) for _ in range(8))
+        static_secret = 'xHb0ZvME5q8CBcoQi6AngerDu3FGO9fkUlwPmLVY_RTzj2hJIS4NasXWKy1td7p'
+        l = nonce[:32]
+        s = nonce[32:]
+        result = ''
+        for i in range(0, 32):
+            result += s[i - (static_secret.index(l[i]) - 31)]
 
-        info = self._download_xml(
+        video_src_params['_s'] = random_seed
+        video_src_params['_t'] = result[:16]
+        encryption_key_stem = result[16:] + random_seed
+
+        [b64_info, handle] = self._download_webpage_handle(
             'http://videa.hu/videaplayer_get_xml.php', video_id,
-            query={'v': video_id})
+            query=video_src_params, fatal=True)
+
+        encrypted_info = compat_b64decode(b64_info)
+        key = encryption_key_stem + handle.info()['x-videa-xs']
+        info_str = self.rc4(encrypted_info, key).decode('utf8')
+        info = self._parse_xml(info_str, video_id)
 
         video = xpath_element(info, './/video', 'video', fatal=True)
         sources = xpath_element(info, './/video_sources', 'sources', fatal=True)
+        hash_values = xpath_element(info, './/hash_values', 'hash_values', fatal=True)
 
         title = xpath_text(video, './title', fatal=True)
 
@@ -77,6 +134,7 @@ class VideaIE(InfoExtractor):
             source_url = source.text
             if not source_url:
                 continue
+            source_url += '?md5=%s&expires=%s' % (hash_values.find('hash_value_%s' % source.get('name')).text, source.get('exp'))
             f = parse_codecs(source.get('codecs'))
             f.update({
                 'url': source_url,
author	Adrian Heine né Lang <mail@adrianheine.de>	2020-11-26 12:55:06 +0100
committer	GitHub <noreply@github.com>	2020-11-26 11:55:06 +0000
commit	a3cf22e590f7057c50563f36c17b59993446dbdd (patch)
tree	097e3c75cb290736da0946ab09a1a45c843082dc /youtube_dl/extractor
parent	99de2f38d375f794ffdc4f5a110a5227310f3ab0 (diff)