diff options
| author | Remita Amine <remitamine@gmail.com> | 2016-07-12 23:15:38 +0100 | 
|---|---|---|
| committer | Remita Amine <remitamine@gmail.com> | 2016-07-12 23:15:38 +0100 | 
| commit | 41aa44259d3a0791b1a023a18c9a933f71e04c50 (patch) | |
| tree | 4bf4e5f3f5215e8aff2ff97ae4d8026caf1be52c | |
| parent | 381ff44756ecf188de476a7a4cc9d4becf6633d1 (diff) | |
[shahid] try to bypass geo restriction and extract more metadata(closes #10062)
| -rw-r--r-- | youtube_dl/extractor/shahid.py | 75 | 
1 files changed, 26 insertions, 49 deletions
| diff --git a/youtube_dl/extractor/shahid.py b/youtube_dl/extractor/shahid.py index d95ea06be..ca286abb1 100644 --- a/youtube_dl/extractor/shahid.py +++ b/youtube_dl/extractor/shahid.py @@ -2,11 +2,11 @@  from __future__ import unicode_literals  from .common import InfoExtractor -from ..compat import compat_urllib_parse_urlencode  from ..utils import (      ExtractorError,      int_or_none,      parse_iso8601, +    str_or_none,  ) @@ -33,45 +33,27 @@ class ShahidIE(InfoExtractor):          'only_matching': True      }] -    def _handle_error(self, response): -        if not isinstance(response, dict): -            return -        error = response.get('error') +    def _call_api(self, path, video_id, note): +        data = self._download_json( +            'http://api.shahid.net/api/v1_1/' + path, video_id, note, query={ +                'apiKey': 'sh@hid0nlin3', +                'hash': 'b2wMCTHpSmyxGqQjJFOycRmLSex+BpTK/ooxy6vHaqs=', +            }).get('data', {}) + +        error = data.get('error')          if error:              raise ExtractorError(                  '%s returned error: %s' % (self.IE_NAME, '\n'.join(error.values())),                  expected=True) -    def _download_json(self, url, video_id, note='Downloading JSON metadata'): -        response = super(ShahidIE, self)._download_json(url, video_id, note)['data'] -        self._handle_error(response) -        return response +        return data      def _real_extract(self, url):          video_id = self._match_id(url) -        webpage = self._download_webpage(url, video_id) - -        api_vars = { -            'id': video_id, -            'type': 'player', -            'url': 'http://api.shahid.net/api/v1_1', -            'playerType': 'episode', -        } - -        flashvars = self._search_regex( -            r'var\s+flashvars\s*=\s*({[^}]+})', webpage, 'flashvars', default=None) -        if flashvars: -            for key in api_vars.keys(): -                value = self._search_regex( -                    r'\b%s\s*:\s*(?P<q>["\'])(?P<value>.+?)(?P=q)' % key, -                    flashvars, 'type', default=None, group='value') -                if value: -                    api_vars[key] = value - -        player = self._download_json( -            'https://shahid.mbc.net/arContent/getPlayerContent-param-.id-%s.type-%s.html' -            % (video_id, api_vars['type']), video_id, 'Downloading player JSON') +        player = self._call_api( +            'Content/Episode/%s' % video_id, +            video_id, 'Downloading player JSON')          if player.get('drm'):              raise ExtractorError('This video is DRM protected.', expected=True) @@ -79,22 +61,11 @@ class ShahidIE(InfoExtractor):          formats = self._extract_m3u8_formats(player['url'], video_id, 'mp4')          self._sort_formats(formats) -        video = self._download_json( -            '%s/%s/%s?%s' % ( -                api_vars['url'], api_vars['playerType'], api_vars['id'], -                compat_urllib_parse_urlencode({ -                    'apiKey': 'sh@hid0nlin3', -                    'hash': 'b2wMCTHpSmyxGqQjJFOycRmLSex+BpTK/ooxy6vHaqs=', -                })), -            video_id, 'Downloading video JSON') - -        video = video[api_vars['playerType']] +        video = self._call_api( +            'episode/%s' % video_id, video_id, +            'Downloading video JSON')['episode']          title = video['title'] -        description = video.get('description') -        thumbnail = video.get('thumbnailUrl') -        duration = int_or_none(video.get('duration')) -        timestamp = parse_iso8601(video.get('referenceDate'))          categories = [              category['name']              for category in video.get('genres', []) if 'name' in category] @@ -102,10 +73,16 @@ class ShahidIE(InfoExtractor):          return {              'id': video_id,              'title': title, -            'description': description, -            'thumbnail': thumbnail, -            'duration': duration, -            'timestamp': timestamp, +            'description': video.get('description'), +            'thumbnail': video.get('thumbnailUrl'), +            'duration': int_or_none(video.get('duration')), +            'timestamp': parse_iso8601(video.get('referenceDate')),              'categories': categories, +            'series': video.get('showTitle') or video.get('showName'), +            'season': video.get('seasonTitle'), +            'season_number': int_or_none(video.get('seasonNumber')), +            'season_id': str_or_none(video.get('seasonId')), +            'episode_number': int_or_none(video.get('number')), +            'episode_id': video_id,              'formats': formats,          } | 
