diff options
| -rw-r--r-- | youtube_dl/extractor/yandexdisk.py | 89 | 
1 files changed, 47 insertions, 42 deletions
| diff --git a/youtube_dl/extractor/yandexdisk.py b/youtube_dl/extractor/yandexdisk.py index 21f37c192..6fcd8ee7e 100644 --- a/youtube_dl/extractor/yandexdisk.py +++ b/youtube_dl/extractor/yandexdisk.py @@ -2,24 +2,23 @@  from __future__ import unicode_literals  import json +import re  from .common import InfoExtractor -from ..compat import compat_HTTPError  from ..utils import (      determine_ext, -    ExtractorError,      float_or_none,      int_or_none,      mimetype2ext, -    parse_iso8601, +    try_get,      urljoin,  )  class YandexDiskIE(InfoExtractor):      _VALID_URL = r'''(?x)https?:// -        (?: -            (?:www\.)?yadi\.sk| +        (?P<domain> +            yadi\.sk|              disk\.yandex\.                  (?:                      az| @@ -38,7 +37,7 @@ class YandexDiskIE(InfoExtractor):      _TESTS = [{          'url': 'https://yadi.sk/i/VdOeDou8eZs6Y', -        'md5': '33955d7ae052f15853dc41f35f17581c', +        'md5': 'a4a8d52958c8fddcf9845935070402ae',          'info_dict': {              'id': 'VdOeDou8eZs6Y',              'ext': 'mp4', @@ -46,10 +45,9 @@ class YandexDiskIE(InfoExtractor):              'duration': 168.6,              'uploader': 'y.botova',              'uploader_id': '300043621', -            'timestamp': 1421396809, -            'upload_date': '20150116',              'view_count': int,          }, +        'expected_warnings': ['Unable to download JSON metadata'],      }, {          'url': 'https://yadi.sk/d/h3WAXvDS3Li3Ce',          'only_matching': True, @@ -59,51 +57,58 @@ class YandexDiskIE(InfoExtractor):      }]      def _real_extract(self, url): -        video_id = self._match_id(url) +        domain, video_id = re.match(self._VALID_URL, url).groups() -        try: -            resource = self._download_json( -                'https://cloud-api.yandex.net/v1/disk/public/resources', -                video_id, query={'public_key': url}) -        except ExtractorError as e: -            if isinstance(e.cause, compat_HTTPError) and e.cause.code == 403: -                error_description = self._parse_json( -                    e.cause.read().decode(), video_id)['description'] -                raise ExtractorError(error_description, expected=True) -            raise +        webpage = self._download_webpage(url, video_id) +        store = self._parse_json(self._search_regex( +            r'<script[^>]+id="store-prefetch"[^>]*>\s*({.+?})\s*</script>', +            webpage, 'store'), video_id) +        resource = store['resources'][store['rootResourceId']]          title = resource['name'] -        public_url = resource.get('public_url') +        meta = resource.get('meta') or {} + +        public_url = meta.get('short_url')          if public_url:              video_id = self._match_id(public_url) -        self._set_cookie('yadi.sk', 'yandexuid', '0') +        source_url = (self._download_json( +            'https://cloud-api.yandex.net/v1/disk/public/resources/download', +            video_id, query={'public_key': url}, fatal=False) or {}).get('href') +        video_streams = resource.get('videoStreams') or {} +        video_hash = resource.get('hash') or url +        environment = store.get('environment') or {} +        sk = environment.get('sk') +        yandexuid = environment.get('yandexuid') +        if sk and yandexuid and not (source_url and video_streams): +            self._set_cookie(domain, 'yandexuid', yandexuid) -        def call_api(action): -            return (self._download_json( -                urljoin(url, '/public/api/') + action, video_id, data=json.dumps({ -                    'hash': url, -                    # obtain sk if needed from call_api('check-auth') while -                    # the yandexuid cookie is set and sending an empty JSON object -                    'sk': 'ya6b52f8c6b12abe91a66d22d3a31084b' -                }).encode(), headers={ -                    'Content-Type': 'text/plain', -                }, fatal=False) or {}).get('data') or {} +            def call_api(action): +                return (self._download_json( +                    urljoin(url, '/public/api/') + action, video_id, data=json.dumps({ +                        'hash': video_hash, +                        'sk': sk, +                    }).encode(), headers={ +                        'Content-Type': 'text/plain', +                    }, fatal=False) or {}).get('data') or {} +            if not source_url: +                # TODO: figure out how to detect if download limit has +                # been reached and then avoid unnecessary source format +                # extraction requests +                source_url = call_api('download-url').get('url') +            if not video_streams: +                video_streams = call_api('get-video-streams')          formats = [] -        source_url = resource.get('file') -        if not source_url: -            source_url = call_api('download-url').get('url')          if source_url:              formats.append({                  'url': source_url,                  'format_id': 'source', -                'ext': determine_ext(title, mimetype2ext(resource.get('mime_type')) or 'mp4'), +                'ext': determine_ext(title, meta.get('ext') or mimetype2ext(meta.get('mime_type')) or 'mp4'),                  'quality': 1, -                'filesize': int_or_none(resource.get('size')) +                'filesize': int_or_none(meta.get('size'))              }) -        video_streams = call_api('get-video-streams')          for video in (video_streams.get('videos') or []):              format_url = video.get('url')              if not format_url: @@ -128,15 +133,15 @@ class YandexDiskIE(InfoExtractor):                  })          self._sort_formats(formats) -        owner = resource.get('owner') or {} +        uid = resource.get('uid') +        display_name = try_get(store, lambda x: x['users'][uid]['displayName'])          return {              'id': video_id,              'title': title,              'duration': float_or_none(video_streams.get('duration'), 1000), -            'uploader': owner.get('display_name'), -            'uploader_id': owner.get('uid'), -            'view_count': int_or_none(resource.get('views_count')), -            'timestamp': parse_iso8601(resource.get('created')), +            'uploader': display_name, +            'uploader_id': uid, +            'view_count': int_or_none(meta.get('views_counter')),              'formats': formats,          } | 
