diff options
Diffstat (limited to 'youtube_dl/extractor/videa.py')
| -rw-r--r-- | youtube_dl/extractor/videa.py | 99 | 
1 files changed, 54 insertions, 45 deletions
| diff --git a/youtube_dl/extractor/videa.py b/youtube_dl/extractor/videa.py index a03614cc1..ab2c15cde 100644 --- a/youtube_dl/extractor/videa.py +++ b/youtube_dl/extractor/videa.py @@ -1,10 +1,9 @@  # coding: utf-8  from __future__ import unicode_literals -import re  import random +import re  import string -import struct  from .common import InfoExtractor  from ..utils import ( @@ -12,13 +11,14 @@ from ..utils import (      int_or_none,      mimetype2ext,      parse_codecs, +    update_url_query,      xpath_element,      xpath_text,  )  from ..compat import (      compat_b64decode,      compat_ord, -    compat_parse_qs, +    compat_struct_pack,  ) @@ -28,7 +28,7 @@ class VideaIE(InfoExtractor):                          videa(?:kid)?\.hu/                          (?:                              videok/(?:[^/]+/)*[^?#&]+-| -                            player\?.*?\bv=| +                            (?:videojs_)?player\?.*?\bv=|                              player/v/                          )                          (?P<id>[^?#&]+) @@ -62,6 +62,7 @@ class VideaIE(InfoExtractor):          'url': 'https://videakid.hu/player/v/8YfIAjxwWGwT8HVQ?autoplay=1',          'only_matching': True,      }] +    _STATIC_SECRET = 'xHb0ZvME5q8CBcoQi6AngerDu3FGO9fkUlwPmLVY_RTzj2hJIS4NasXWKy1td7p'      @staticmethod      def _extract_urls(webpage): @@ -69,75 +70,84 @@ class VideaIE(InfoExtractor):              r'<iframe[^>]+src=(["\'])(?P<url>(?:https?:)?//videa\.hu/player\?.*?\bv=.+?)\1',              webpage)] -    def rc4(self, ciphertext, key): +    @staticmethod +    def rc4(cipher_text, key):          res = b'' -        keyLen = len(key) +        key_len = len(key)          S = list(range(256))          j = 0          for i in range(256): -            j = (j + S[i] + ord(key[i % keyLen])) % 256 +            j = (j + S[i] + ord(key[i % key_len])) % 256              S[i], S[j] = S[j], S[i]          i = 0          j = 0 -        for m in range(len(ciphertext)): +        for m in range(len(cipher_text)):              i = (i + 1) % 256              j = (j + S[i]) % 256              S[i], S[j] = S[j], S[i]              k = S[(S[i] + S[j]) % 256] -            res += struct.pack("B", k ^ compat_ord(ciphertext[m])) +            res += compat_struct_pack('B', k ^ compat_ord(cipher_text[m])) -        return res +        return res.decode()      def _real_extract(self, url):          video_id = self._match_id(url) -        webpage = self._download_webpage(url, video_id, fatal=True) -        error = self._search_regex(r'<p class="error-text">([^<]+)</p>', webpage, 'error', default=None) -        if error: -            raise ExtractorError(error, expected=True) - -        video_src_params_raw = self._search_regex(r'<iframe[^>]+id="videa_player_iframe"[^>]+src="/player\?([^"]+)"', webpage, 'video_src_params') -        video_src_params = compat_parse_qs(video_src_params_raw) -        player_page = self._download_webpage("https://videa.hu/videojs_player?%s" % video_src_params_raw, video_id, fatal=True) -        nonce = self._search_regex(r'_xt\s*=\s*"([^"]+)"', player_page, 'nonce') -        random_seed = ''.join(random.choice(string.ascii_uppercase + string.ascii_lowercase + string.digits) for _ in range(8)) -        static_secret = 'xHb0ZvME5q8CBcoQi6AngerDu3FGO9fkUlwPmLVY_RTzj2hJIS4NasXWKy1td7p' +        query = {'v': video_id} +        player_page = self._download_webpage( +            'https://videa.hu/player', video_id, query=query) + +        nonce = self._search_regex( +            r'_xt\s*=\s*"([^"]+)"', player_page, 'nonce')          l = nonce[:32]          s = nonce[32:]          result = ''          for i in range(0, 32): -            result += s[i - (static_secret.index(l[i]) - 31)] - -        video_src_params['_s'] = random_seed -        video_src_params['_t'] = result[:16] -        encryption_key_stem = result[16:] + random_seed - -        [b64_info, handle] = self._download_webpage_handle( -            'http://videa.hu/videaplayer_get_xml.php', video_id, -            query=video_src_params, fatal=True) - -        encrypted_info = compat_b64decode(b64_info) -        key = encryption_key_stem + handle.info()['x-videa-xs'] -        info_str = self.rc4(encrypted_info, key).decode('utf8') -        info = self._parse_xml(info_str, video_id) - -        video = xpath_element(info, './/video', 'video', fatal=True) -        sources = xpath_element(info, './/video_sources', 'sources', fatal=True) -        hash_values = xpath_element(info, './/hash_values', 'hash_values', fatal=True) +            result += s[i - (self._STATIC_SECRET.index(l[i]) - 31)] + +        random_seed = ''.join(random.choice(string.ascii_letters + string.digits) for _ in range(8)) +        query['_s'] = random_seed +        query['_t'] = result[:16] + +        b64_info, handle = self._download_webpage_handle( +            'http://videa.hu/videaplayer_get_xml.php', video_id, query=query) +        if b64_info.startswith('<?xml'): +            info = self._parse_xml(b64_info, video_id) +        else: +            key = result[16:] + random_seed + handle.headers['x-videa-xs'] +            info = self._parse_xml(self.rc4( +                compat_b64decode(b64_info), key), video_id) + +        video = xpath_element(info, './video', 'video') +        if not video: +            raise ExtractorError(xpath_element( +                info, './error', fatal=True), expected=True) +        sources = xpath_element( +            info, './video_sources', 'sources', fatal=True) +        hash_values = xpath_element( +            info, './hash_values', 'hash values', fatal=True)          title = xpath_text(video, './title', fatal=True)          formats = []          for source in sources.findall('./video_source'):              source_url = source.text -            if not source_url: +            source_name = source.get('name') +            source_exp = source.get('exp') +            if not (source_url and source_name and source_exp):                  continue -            source_url += '?md5=%s&expires=%s' % (hash_values.find('hash_value_%s' % source.get('name')).text, source.get('exp')) +            hash_value = xpath_text(hash_values, 'hash_value_' + source_name) +            if not hash_value: +                continue +            source_url = update_url_query(source_url, { +                'md5': hash_value, +                'expires': source_exp, +            })              f = parse_codecs(source.get('codecs'))              f.update({ -                'url': source_url, +                'url': self._proto_relative_url(source_url),                  'ext': mimetype2ext(source.get('mimetype')) or 'mp4',                  'format_id': source.get('name'),                  'width': int_or_none(source.get('width')), @@ -146,8 +156,7 @@ class VideaIE(InfoExtractor):              formats.append(f)          self._sort_formats(formats) -        thumbnail = xpath_text(video, './poster_src') -        duration = int_or_none(xpath_text(video, './duration')) +        thumbnail = self._proto_relative_url(xpath_text(video, './poster_src'))          age_limit = None          is_adult = xpath_text(video, './is_adult_content', default=None) @@ -158,7 +167,7 @@ class VideaIE(InfoExtractor):              'id': video_id,              'title': title,              'thumbnail': thumbnail, -            'duration': duration, +            'duration': int_or_none(xpath_text(video, './duration')),              'age_limit': age_limit,              'formats': formats,          } | 
