diff options
| author | Sergey M․ <dstftw@gmail.com> | 2018-03-08 03:16:21 +0700 | 
|---|---|---|
| committer | Sergey M․ <dstftw@gmail.com> | 2018-03-08 03:16:21 +0700 | 
| commit | d91dd0ce199a818fb12b28cb777d4453e7e829f4 (patch) | |
| tree | 2b329718602c0cfb3c5593bc22a4c56e94cf5b77 | |
| parent | 6202f08e1bb4621873cf12d471cd566c4d5cfbc8 (diff) | |
[nexx] Restore reverse engineered approach
| -rw-r--r-- | youtube_dl/extractor/nexx.py | 117 | 
1 files changed, 112 insertions, 5 deletions
| diff --git a/youtube_dl/extractor/nexx.py b/youtube_dl/extractor/nexx.py index 0a8d844d4..117dfe826 100644 --- a/youtube_dl/extractor/nexx.py +++ b/youtube_dl/extractor/nexx.py @@ -1,22 +1,27 @@  # coding: utf-8  from __future__ import unicode_literals +import hashlib +import random  import re +import time  from .common import InfoExtractor  from ..compat import compat_str  from ..utils import ( +    ExtractorError,      int_or_none,      parse_duration,      try_get, +    urlencode_postdata,  )  class NexxIE(InfoExtractor):      _VALID_URL = r'''(?x)                          (?: -                            https?://api\.nexx(?:\.cloud|cdn\.com)/v3/\d+/videos/byid/| -                            nexx:(?:\d+:)?| +                            https?://api\.nexx(?:\.cloud|cdn\.com)/v3/(?P<domain_id>\d+)/videos/byid/| +                            nexx:(?P<domain_id_s>\d+)?:|                              https?://arc\.nexx\.cloud/api/video/                          )                          (?P<id>\d+) @@ -58,6 +63,21 @@ class NexxIE(InfoExtractor):              'skip_download': True,          },      }, { +        # does not work via arc +        'url': 'nexx:741:1269984', +        'md5': 'c714b5b238b2958dc8d5642addba6886', +        'info_dict': { +            'id': '1269984', +            'ext': 'mp4', +            'title': '1 TAG ohne KLO... wortwörtlich! 😑', +            'alt_title': '1 TAG ohne KLO... wortwörtlich! 😑', +            'description': 'md5:4604539793c49eda9443ab5c5b1d612f', +            'thumbnail': r're:^https?://.*\.jpg$', +            'duration': 607, +            'timestamp': 1518614955, +            'upload_date': '20180214', +        }, +    }, {          'url': 'https://api.nexxcdn.com/v3/748/videos/byid/128907',          'only_matching': True,      }, { @@ -103,12 +123,99 @@ class NexxIE(InfoExtractor):      def _extract_url(webpage):          return NexxIE._extract_urls(webpage)[0] +    def _handle_error(self, response): +        status = int_or_none(try_get( +            response, lambda x: x['metadata']['status']) or 200) +        if 200 <= status < 300: +            return +        raise ExtractorError( +            '%s said: %s' % (self.IE_NAME, response['metadata']['errorhint']), +            expected=True) + +    def _call_api(self, domain_id, path, video_id, data=None, headers={}): +        headers['Content-Type'] = 'application/x-www-form-urlencoded; charset=UTF-8' +        result = self._download_json( +            'https://api.nexx.cloud/v3/%s/%s' % (domain_id, path), video_id, +            'Downloading %s JSON' % path, data=urlencode_postdata(data), +            headers=headers) +        self._handle_error(result) +        return result['result'] +      def _real_extract(self, url): -        video_id = self._match_id(url) +        mobj = re.match(self._VALID_URL, url) +        domain_id = mobj.group('domain_id') or mobj.group('domain_id_s') +        video_id = mobj.group('id') + +        video = None -        video = self._download_json( +        response = self._download_json(              'https://arc.nexx.cloud/api/video/%s.json' % video_id, -            video_id)['result'] +            video_id, fatal=False) +        if response and isinstance(response, dict): +            result = response.get('result') +            if result and isinstance(result, dict): +                video = result + +        # not all videos work via arc, e.g. nexx:741:1269984 +        if not video: +            # Reverse engineered from JS code (see getDeviceID function) +            device_id = '%d:%d:%d%d' % ( +                random.randint(1, 4), int(time.time()), +                random.randint(1e4, 99999), random.randint(1, 9)) + +            result = self._call_api(domain_id, 'session/init', video_id, data={ +                'nxp_devh': device_id, +                'nxp_userh': '', +                'precid': '0', +                'playlicense': '0', +                'screenx': '1920', +                'screeny': '1080', +                'playerversion': '6.0.00', +                'gateway': 'html5', +                'adGateway': '', +                'explicitlanguage': 'en-US', +                'addTextTemplates': '1', +                'addDomainData': '1', +                'addAdModel': '1', +            }, headers={ +                'X-Request-Enable-Auth-Fallback': '1', +            }) + +            cid = result['general']['cid'] + +            # As described in [1] X-Request-Token generation algorithm is +            # as follows: +            #   md5( operation + domain_id + domain_secret ) +            # where domain_secret is a static value that will be given by nexx.tv +            # as per [1]. Here is how this "secret" is generated (reversed +            # from _play.api.init function, search for clienttoken). So it's +            # actually not static and not that much of a secret. +            # 1. https://nexxtvstorage.blob.core.windows.net/files/201610/27.pdf +            secret = result['device']['clienttoken'][int(device_id[0]):] +            secret = secret[0:len(secret) - int(device_id[-1])] + +            op = 'byid' + +            # Reversed from JS code for _play.api.call function (search for +            # X-Request-Token) +            request_token = hashlib.md5( +                ''.join((op, domain_id, secret)).encode('utf-8')).hexdigest() + +            video = self._call_api( +                domain_id, 'videos/%s/%s' % (op, video_id), video_id, data={ +                    'additionalfields': 'language,channel,actors,studio,licenseby,slug,subtitle,teaser,description', +                    'addInteractionOptions': '1', +                    'addStatusDetails': '1', +                    'addStreamDetails': '1', +                    'addCaptions': '1', +                    'addScenes': '1', +                    'addHotSpots': '1', +                    'addBumpers': '1', +                    'captionFormat': 'data', +                }, headers={ +                    'X-Request-CID': cid, +                    'X-Request-Token': request_token, +                })          general = video['general']          title = general['title'] | 
