diff options
Diffstat (limited to 'youtube_dl/extractor/shahid.py')
| -rw-r--r-- | youtube_dl/extractor/shahid.py | 164 | 
1 files changed, 121 insertions, 43 deletions
| diff --git a/youtube_dl/extractor/shahid.py b/youtube_dl/extractor/shahid.py index 374f7faf9..5c2a6206b 100644 --- a/youtube_dl/extractor/shahid.py +++ b/youtube_dl/extractor/shahid.py @@ -1,22 +1,53 @@  # coding: utf-8  from __future__ import unicode_literals -import re  import json +import math +import re -from .common import InfoExtractor +from .aws import AWSIE  from ..compat import compat_HTTPError  from ..utils import ( +    clean_html,      ExtractorError, +    InAdvancePagedList,      int_or_none,      parse_iso8601,      str_or_none,      urlencode_postdata, -    clean_html,  ) -class ShahidIE(InfoExtractor): +class ShahidBaseIE(AWSIE): +    _AWS_PROXY_HOST = 'api2.shahid.net' +    _AWS_API_KEY = '2RRtuMHx95aNI1Kvtn2rChEuwsCogUd4samGPjLh' + +    def _handle_error(self, e): +        fail_data = self._parse_json( +            e.cause.read().decode('utf-8'), None, fatal=False) +        if fail_data: +            faults = fail_data.get('faults', []) +            faults_message = ', '.join([clean_html(fault['userMessage']) for fault in faults if fault.get('userMessage')]) +            if faults_message: +                raise ExtractorError(faults_message, expected=True) + +    def _call_api(self, path, video_id, request=None): +        query = {} +        if request: +            query['request'] = json.dumps(request) +        try: +            return self._aws_execute_api({ +                'uri': '/proxy/v2/' + path, +                'access_key': 'AKIAI6X4TYCIXM2B7MUQ', +                'secret_key': '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', +            }, video_id, query) +        except ExtractorError as e: +            if isinstance(e.cause, compat_HTTPError): +                self._handle_error(e) +            raise + + +class ShahidIE(ShahidBaseIE):      _NETRC_MACHINE = 'shahid'      _VALID_URL = r'https?://shahid\.mbc\.net/ar/(?:serie|show|movie)s/[^/]+/(?P<type>episode|clip|movie)-(?P<id>\d+)'      _TESTS = [{ @@ -41,34 +72,25 @@ class ShahidIE(InfoExtractor):          'only_matching': True      }] -    def _api2_request(self, *args, **kwargs): -        try: -            return self._download_json(*args, **kwargs) -        except ExtractorError as e: -            if isinstance(e.cause, compat_HTTPError): -                fail_data = self._parse_json( -                    e.cause.read().decode('utf-8'), None, fatal=False) -                if fail_data: -                    faults = fail_data.get('faults', []) -                    faults_message = ', '.join([clean_html(fault['userMessage']) for fault in faults if fault.get('userMessage')]) -                    if faults_message: -                        raise ExtractorError(faults_message, expected=True) -            raise -      def _real_initialize(self):          email, password = self._get_login_info()          if email is None:              return -        user_data = self._api2_request( -            'https://shahid.mbc.net/wd/service/users/login', -            None, 'Logging in', data=json.dumps({ -                'email': email, -                'password': password, -                'basic': 'false', -            }).encode('utf-8'), headers={ -                'Content-Type': 'application/json; charset=UTF-8', -            })['user'] +        try: +            user_data = self._download_json( +                'https://shahid.mbc.net/wd/service/users/login', +                None, 'Logging in', data=json.dumps({ +                    'email': email, +                    'password': password, +                    'basic': 'false', +                }).encode('utf-8'), headers={ +                    'Content-Type': 'application/json; charset=UTF-8', +                })['user'] +        except ExtractorError as e: +            if isinstance(e.cause, compat_HTTPError): +                self._handle_error(e) +            raise          self._download_webpage(              'https://shahid.mbc.net/populateContext', @@ -81,25 +103,13 @@ class ShahidIE(InfoExtractor):                  'sessionId': user_data['sessionId'],              })) -    def _get_api_data(self, response): -        data = response.get('data', {}) - -        error = data.get('error') -        if error: -            raise ExtractorError( -                '%s returned error: %s' % (self.IE_NAME, '\n'.join(error.values())), -                expected=True) - -        return data -      def _real_extract(self, url):          page_type, video_id = re.match(self._VALID_URL, url).groups()          if page_type == 'clip':              page_type = 'episode' -        playout = self._api2_request( -            'https://api2.shahid.net/proxy/v2/playout/url/' + video_id, -            video_id, 'Downloading player JSON')['playout'] +        playout = self._call_api( +            'playout/url/' + video_id, video_id)['playout']          if playout.get('drm'):              raise ExtractorError('This video is DRM protected.', expected=True) @@ -107,13 +117,27 @@ class ShahidIE(InfoExtractor):          formats = self._extract_m3u8_formats(playout['url'], video_id, 'mp4')          self._sort_formats(formats) -        video = self._get_api_data(self._download_json( +        # video = self._call_api( +        #     'product/id', video_id, { +        #         'id': video_id, +        #         'productType': 'ASSET', +        #         'productSubType': page_type.upper() +        #     })['productModel'] + +        response = self._download_json(              'http://api.shahid.net/api/v1_1/%s/%s' % (page_type, video_id),              video_id, 'Downloading video JSON', query={                  'apiKey': 'sh@hid0nlin3',                  'hash': 'b2wMCTHpSmyxGqQjJFOycRmLSex+BpTK/ooxy6vHaqs=', -            }))[page_type] +            }) +        data = response.get('data', {}) +        error = data.get('error') +        if error: +            raise ExtractorError( +                '%s returned error: %s' % (self.IE_NAME, '\n'.join(error.values())), +                expected=True) +        video = data[page_type]          title = video['title']          categories = [              category['name'] @@ -135,3 +159,57 @@ class ShahidIE(InfoExtractor):              'episode_id': video_id,              'formats': formats,          } + + +class ShahidShowIE(ShahidBaseIE): +    _VALID_URL = r'https?://shahid\.mbc\.net/ar/(?:show|serie)s/[^/]+/(?:show|series)-(?P<id>\d+)' +    _TESTS = [{ +        'url': 'https://shahid.mbc.net/ar/shows/%D8%B1%D8%A7%D9%85%D8%B2-%D9%82%D8%B1%D8%B4-%D8%A7%D9%84%D8%A8%D8%AD%D8%B1/show-79187', +        'info_dict': { +            'id': '79187', +            'title': 'رامز قرش البحر', +            'description': 'md5:c88fa7e0f02b0abd39d417aee0d046ff', +        }, +        'playlist_mincount': 32, +    }, { +        'url': 'https://shahid.mbc.net/ar/series/How-to-live-Longer-(The-Big-Think)/series-291861', +        'only_matching': True +    }] +    _PAGE_SIZE = 30 + +    def _real_extract(self, url): +        show_id = self._match_id(url) + +        product = self._call_api( +            'playableAsset', show_id, {'showId': show_id})['productModel'] +        playlist = product['playlist'] +        playlist_id = playlist['id'] +        show = product.get('show', {}) + +        def page_func(page_num): +            playlist = self._call_api( +                'product/playlist', show_id, { +                    'playListId': playlist_id, +                    'pageNumber': page_num, +                    'pageSize': 30, +                    'sorts': [{ +                        'order': 'DESC', +                        'type': 'SORTDATE' +                    }], +                }) +            for product in playlist.get('productList', {}).get('products', []): +                product_url = product.get('productUrl', []).get('url') +                if not product_url: +                    continue +                yield self.url_result( +                    product_url, 'Shahid', +                    str_or_none(product.get('id')), +                    product.get('title')) + +        entries = InAdvancePagedList( +            page_func, +            math.ceil(playlist['count'] / self._PAGE_SIZE), +            self._PAGE_SIZE) + +        return self.playlist_result( +            entries, show_id, show.get('title'), show.get('description')) | 
