diff options
author | Adrian Heine né Lang <mail@adrianheine.de> | 2021-01-12 21:12:44 +0100 |
---|---|---|
committer | GitHub <noreply@github.com> | 2021-01-12 20:12:44 +0000 |
commit | a938f111ed2955dc5304d11c301bfa28df585fd3 (patch) | |
tree | 211abea19a2b71b0bc0f1360469ffd6c918a656c /youtube_dl/extractor | |
parent | 4759543f6e5d532795eb1d5434692bb6d5e1f0ec (diff) |
[ADN] Fix extraction (#27732)
Closes #26963.
Diffstat (limited to 'youtube_dl/extractor')
-rw-r--r-- | youtube_dl/extractor/adn.py | 145 |
1 files changed, 86 insertions, 59 deletions
diff --git a/youtube_dl/extractor/adn.py b/youtube_dl/extractor/adn.py index c95ad2173..901832ac4 100644 --- a/youtube_dl/extractor/adn.py +++ b/youtube_dl/extractor/adn.py @@ -10,6 +10,7 @@ import random from .common import InfoExtractor from ..aes import aes_cbc_decrypt from ..compat import ( + compat_HTTPError, compat_b64decode, compat_ord, ) @@ -18,10 +19,12 @@ from ..utils import ( bytes_to_long, ExtractorError, float_or_none, + int_or_none, intlist_to_bytes, long_to_bytes, pkcs1pad, strip_or_none, + unified_strdate, urljoin, ) @@ -31,16 +34,18 @@ class ADNIE(InfoExtractor): _VALID_URL = r'https?://(?:www\.)?animedigitalnetwork\.fr/video/[^/]+/(?P<id>\d+)' _TEST = { 'url': 'http://animedigitalnetwork.fr/video/blue-exorcist-kyoto-saga/7778-episode-1-debut-des-hostilites', - 'md5': 'e497370d847fd79d9d4c74be55575c7a', + 'md5': '0319c99885ff5547565cacb4f3f9348d', 'info_dict': { 'id': '7778', 'ext': 'mp4', - 'title': 'Blue Exorcist - Kyôto Saga - Épisode 1', + 'title': 'Blue Exorcist - Kyôto Saga - Episode 1', 'description': 'md5:2f7b5aa76edbc1a7a92cedcda8a528d5', } } + _BASE_URL = 'http://animedigitalnetwork.fr' - _RSA_KEY = (0xc35ae1e4356b65a73b551493da94b8cb443491c0aa092a357a5aee57ffc14dda85326f42d716e539a34542a0d3f363adf16c5ec222d713d5997194030ee2e4f0d1fb328c01a81cf6868c090d50de8e169c6b13d1675b9eeed1cbc51e1fffca9b38af07f37abd790924cd3bee59d0257cfda4fe5f3f0534877e21ce5821447d1b, 65537) + _API_BASE_URL = 'https://gw.api.animedigitalnetwork.fr' + _RSA_KEY = (0x9B42B08905199A5CCE2026274399CA560ECB209EE9878A708B1C0812E1BB8CB5D1FB7441861147C1A1F2F3A0476DD63A9CAC20D3E983613346850AA6CB38F16DC7D720FD7D86FC6E5B3D5BBC72E14CD0BF9E869F2CEA2CCAD648F1DCE38F1FF916CEFB2D339B64AA0264372344BC775E265E8A852F88144AB0BD9AA06C1A4ABB, 65537) _POS_ALIGN_MAP = { 'start': 1, 'end': 3, @@ -119,59 +124,75 @@ Format: Marked,Start,End,Style,Name,MarginL,MarginR,MarginV,Effect,Text''' def _real_extract(self, url): video_id = self._match_id(url) - webpage = self._download_webpage(url, video_id) - player_config = self._parse_json(self._search_regex( - r'playerConfig\s*=\s*({.+});', webpage, - 'player config', default='{}'), video_id, fatal=False) - if not player_config: - config_url = urljoin(self._BASE_URL, self._search_regex( - r'(?:id="player"|class="[^"]*adn-player-container[^"]*")[^>]+data-url="([^"]+)"', - webpage, 'config url')) - player_config = self._download_json( - config_url, video_id, - 'Downloading player config JSON metadata')['player'] - - video_info = {} - video_info_str = self._search_regex( - r'videoInfo\s*=\s*({.+});', webpage, - 'video info', fatal=False) - if video_info_str: - video_info = self._parse_json( - video_info_str, video_id, fatal=False) or {} - - options = player_config.get('options') or {} - metas = options.get('metas') or {} - links = player_config.get('links') or {} - sub_path = player_config.get('subtitles') - error = None - if not links: - links_url = player_config.get('linksurl') or options['videoUrl'] - token = options['token'] - self._K = ''.join([random.choice('0123456789abcdef') for _ in range(16)]) - message = bytes_to_intlist(json.dumps({ - 'k': self._K, - 'e': 60, - 't': token, - })) + config_url = self._API_BASE_URL + '/player/video/%s/configuration' % video_id + player_config = self._download_json( + config_url, video_id, + 'Downloading player config JSON metadata')['player']['options'] + + user = player_config['user'] + if not user.get('hasAccess'): + raise ExtractorError('This video is only available for paying users') + # self.raise_login_required() # FIXME: Login is not implemented + + token = self._download_json( + user.get('refreshTokenUrl') or (self._API_BASE_URL + '/player/refresh/token'), + video_id, 'Downloading access token', headers={'x-player-refresh-token': user['refreshToken']}, + data=b'')['token'] + + links_url = player_config.get('videoUrl') or (self._API_BASE_URL + '/player/video/%s/link' % video_id) + self._K = ''.join([random.choice('0123456789abcdef') for _ in range(16)]) + message = bytes_to_intlist(json.dumps({ + 'k': self._K, + 't': token, + })) + + # Sometimes authentication fails for no good reason, retry with + # a different random padding + links_data = None + for _ in range(3): padded_message = intlist_to_bytes(pkcs1pad(message, 128)) n, e = self._RSA_KEY encrypted_message = long_to_bytes(pow(bytes_to_long(padded_message), e, n)) authorization = base64.b64encode(encrypted_message).decode() - links_data = self._download_json( - urljoin(self._BASE_URL, links_url), video_id, - 'Downloading links JSON metadata', headers={ - 'Authorization': 'Bearer ' + authorization, - }) - links = links_data.get('links') or {} - metas = metas or links_data.get('meta') or {} - sub_path = sub_path or links_data.get('subtitles') or \ - 'index.php?option=com_vodapi&task=subtitles.getJSON&format=json&id=' + video_id - sub_path += '&token=' + token - error = links_data.get('error') - title = metas.get('title') or video_info['title'] + + try: + links_data = self._download_json( + urljoin(self._BASE_URL, links_url), video_id, + 'Downloading links JSON metadata', headers={ + 'X-Player-Token': authorization + }, + query={ + 'freeWithAds': 'true', + 'adaptive': 'false', + 'withMetadata': 'true', + 'source': 'Web' + } + ) + break + except ExtractorError as e: + if not isinstance(e.cause, compat_HTTPError): + raise e + + if e.cause.code == 401: + # This usually goes away with a different random pkcs1pad, so retry + continue + + error = self._parse_json(e.cause.read(), video_id) + message = error.get('message') + if e.cause.code == 403 and error.get('code') == 'player-bad-geolocation-country': + self.raise_geo_restricted(msg=message) + else: + raise ExtractorError(message) + else: + raise ExtractorError('Giving up retrying') + + links = links_data.get('links') or {} + metas = links_data.get('metadata') or {} + sub_path = (links.get('subtitles') or {}).get('all') + video_info = links_data.get('video') or {} formats = [] - for format_id, qualities in links.items(): + for format_id, qualities in (links.get('streaming') or {}).items(): if not isinstance(qualities, dict): continue for quality, load_balancer_url in qualities.items(): @@ -189,19 +210,25 @@ Format: Marked,Start,End,Style,Name,MarginL,MarginR,MarginV,Effect,Text''' for f in m3u8_formats: f['language'] = 'fr' formats.extend(m3u8_formats) - if not error: - error = options.get('error') - if not formats and error: - raise ExtractorError('%s said: %s' % (self.IE_NAME, error), expected=True) self._sort_formats(formats) + video = (self._download_json(self._API_BASE_URL + '/video/%s' % video_id, video_id, + 'Downloading additional video metadata', fatal=False) or {}).get('video') + show = video.get('show') or {} + return { 'id': video_id, - 'title': title, - 'description': strip_or_none(metas.get('summary') or video_info.get('resume')), + 'title': metas.get('title') or video_id, + 'description': strip_or_none(metas.get('summary') or video.get('summary')), 'thumbnail': video_info.get('image'), 'formats': formats, - 'subtitles': self.extract_subtitles(sub_path, video_id), - 'episode': metas.get('subtitle') or video_info.get('videoTitle'), - 'series': video_info.get('playlistTitle'), + 'subtitles': sub_path and self.extract_subtitles(sub_path, video_id), + 'episode': metas.get('subtitle') or video.get('name'), + 'episode_number': int_or_none(video.get('shortNumber')), + 'series': video_info.get('playlistTitle') or show.get('title'), + 'season_number': int_or_none(video.get('season')), + 'duration': int_or_none(video_info.get('duration') or video.get('duration')), + 'release_date': unified_strdate(video.get('release_date')), + 'average_rating': video.get('rating') or metas.get('rating'), + 'comment_count': int_or_none(video.get('commentsCount')), } |