diff options
| author | Remita Amine <remitamine@gmail.com> | 2019-07-09 08:28:39 +0100 | 
|---|---|---|
| committer | Remita Amine <remitamine@gmail.com> | 2019-07-09 08:28:56 +0100 | 
| commit | 25d71fb058368e1d48c4ad9496d91d33378649f6 (patch) | |
| tree | 31760a873d4af8b8a3791f54eb636099bdccce39 /youtube_dl/extractor/packtpub.py | |
| parent | a6389abfd7fec786ed07031cd7f3a42d02910de3 (diff) | |
[packtpub] fix extraction(closes #21268)
Diffstat (limited to 'youtube_dl/extractor/packtpub.py')
| -rw-r--r-- | youtube_dl/extractor/packtpub.py | 111 | 
1 files changed, 51 insertions, 60 deletions
| diff --git a/youtube_dl/extractor/packtpub.py b/youtube_dl/extractor/packtpub.py index 1324137df..3d39d1b27 100644 --- a/youtube_dl/extractor/packtpub.py +++ b/youtube_dl/extractor/packtpub.py @@ -5,26 +5,27 @@ import re  from .common import InfoExtractor  from ..compat import ( -    compat_str, +    # compat_str,      compat_HTTPError,  )  from ..utils import (      clean_html,      ExtractorError, -    remove_end, +    # remove_end, +    str_or_none,      strip_or_none,      unified_timestamp, -    urljoin, +    # urljoin,  )  class PacktPubBaseIE(InfoExtractor): -    _PACKT_BASE = 'https://www.packtpub.com' -    _MAPT_REST = '%s/mapt-rest' % _PACKT_BASE +    # _PACKT_BASE = 'https://www.packtpub.com' +    _STATIC_PRODUCTS_BASE = 'https://static.packt-cdn.com/products/'  class PacktPubIE(PacktPubBaseIE): -    _VALID_URL = r'https?://(?:(?:www\.)?packtpub\.com/mapt|subscription\.packtpub\.com)/video/[^/]+/(?P<course_id>\d+)/(?P<chapter_id>\d+)/(?P<id>\d+)' +    _VALID_URL = r'https?://(?:(?:www\.)?packtpub\.com/mapt|subscription\.packtpub\.com)/video/[^/]+/(?P<course_id>\d+)/(?P<chapter_id>\d+)/(?P<id>\d+)(?:/(?P<display_id>[^/?&#]+))?'      _TESTS = [{          'url': 'https://www.packtpub.com/mapt/video/web-development/9781787122215/20528/20530/Project+Intro', @@ -50,9 +51,9 @@ class PacktPubIE(PacktPubBaseIE):              return          try:              self._TOKEN = self._download_json( -                self._MAPT_REST + '/users/tokens', None, +                'https://services.packtpub.com/auth-v1/users/tokens', None,                  'Downloading Authorization Token', data=json.dumps({ -                    'email': username, +                    'username': username,                      'password': password,                  }).encode())['data']['access']          except ExtractorError as e: @@ -61,54 +62,40 @@ class PacktPubIE(PacktPubBaseIE):                  raise ExtractorError(message, expected=True)              raise -    def _handle_error(self, response): -        if response.get('status') != 'success': -            raise ExtractorError( -                '% said: %s' % (self.IE_NAME, response['message']), -                expected=True) - -    def _download_json(self, *args, **kwargs): -        response = super(PacktPubIE, self)._download_json(*args, **kwargs) -        self._handle_error(response) -        return response -      def _real_extract(self, url): -        mobj = re.match(self._VALID_URL, url) -        course_id, chapter_id, video_id = mobj.group( -            'course_id', 'chapter_id', 'id') +        course_id, chapter_id, video_id, display_id = re.match(self._VALID_URL, url).groups()          headers = {}          if self._TOKEN:              headers['Authorization'] = 'Bearer ' + self._TOKEN -        video = self._download_json( -            '%s/users/me/products/%s/chapters/%s/sections/%s' -            % (self._MAPT_REST, course_id, chapter_id, video_id), video_id, -            'Downloading JSON video', headers=headers)['data'] - -        content = video.get('content') -        if not content: -            self.raise_login_required('This video is locked') - -        video_url = content['file'] +        try: +            video_url = self._download_json( +                'https://services.packtpub.com/products-v1/products/%s/%s/%s' % (course_id, chapter_id, video_id), video_id, +                'Downloading JSON video', headers=headers)['data'] +        except ExtractorError as e: +            if isinstance(e.cause, compat_HTTPError) and e.cause.code == 400: +                self.raise_login_required('This video is locked') +            raise -        metadata = self._download_json( -            '%s/products/%s/chapters/%s/sections/%s/metadata' -            % (self._MAPT_REST, course_id, chapter_id, video_id), -            video_id)['data'] +        # TODO: find a better way to avoid duplicating course requests +        # metadata = self._download_json( +        #     '%s/products/%s/chapters/%s/sections/%s/metadata' +        #     % (self._MAPT_REST, course_id, chapter_id, video_id), +        #     video_id)['data'] -        title = metadata['pageTitle'] -        course_title = metadata.get('title') -        if course_title: -            title = remove_end(title, ' - %s' % course_title) -        timestamp = unified_timestamp(metadata.get('publicationDate')) -        thumbnail = urljoin(self._PACKT_BASE, metadata.get('filepath')) +        # title = metadata['pageTitle'] +        # course_title = metadata.get('title') +        # if course_title: +        #     title = remove_end(title, ' - %s' % course_title) +        # timestamp = unified_timestamp(metadata.get('publicationDate')) +        # thumbnail = urljoin(self._PACKT_BASE, metadata.get('filepath'))          return {              'id': video_id,              'url': video_url, -            'title': title, -            'thumbnail': thumbnail, -            'timestamp': timestamp, +            'title': display_id or video_id,  # title, +            # 'thumbnail': thumbnail, +            # 'timestamp': timestamp,          } @@ -119,6 +106,7 @@ class PacktPubCourseIE(PacktPubBaseIE):          'info_dict': {              'id': '9781787122215',              'title': 'Learn Nodejs by building 12 projects [Video]', +            'description': 'md5:489da8d953f416e51927b60a1c7db0aa',          },          'playlist_count': 90,      }, { @@ -136,35 +124,38 @@ class PacktPubCourseIE(PacktPubBaseIE):          url, course_id = mobj.group('url', 'id')          course = self._download_json( -            '%s/products/%s/metadata' % (self._MAPT_REST, course_id), -            course_id)['data'] +            self._STATIC_PRODUCTS_BASE + '%s/toc' % course_id, course_id) +        metadata = self._download_json( +            self._STATIC_PRODUCTS_BASE + '%s/summary' % course_id, +            course_id, fatal=False) or {}          entries = [] -        for chapter_num, chapter in enumerate(course['tableOfContents'], 1): -            if chapter.get('type') != 'chapter': -                continue -            children = chapter.get('children') -            if not isinstance(children, list): +        for chapter_num, chapter in enumerate(course['chapters'], 1): +            chapter_id = str_or_none(chapter.get('id')) +            sections = chapter.get('sections') +            if not chapter_id or not isinstance(sections, list):                  continue              chapter_info = {                  'chapter': chapter.get('title'),                  'chapter_number': chapter_num, -                'chapter_id': chapter.get('id'), +                'chapter_id': chapter_id,              } -            for section in children: -                if section.get('type') != 'section': -                    continue -                section_url = section.get('seoUrl') -                if not isinstance(section_url, compat_str): +            for section in sections: +                section_id = str_or_none(section.get('id')) +                if not section_id or section.get('contentType') != 'video':                      continue                  entry = {                      '_type': 'url_transparent', -                    'url': urljoin(url + '/', section_url), +                    'url': '/'.join([url, chapter_id, section_id]),                      'title': strip_or_none(section.get('title')),                      'description': clean_html(section.get('summary')), +                    'thumbnail': metadata.get('coverImage'), +                    'timestamp': unified_timestamp(metadata.get('publicationDate')),                      'ie_key': PacktPubIE.ie_key(),                  }                  entry.update(chapter_info)                  entries.append(entry) -        return self.playlist_result(entries, course_id, course.get('title')) +        return self.playlist_result( +            entries, course_id, metadata.get('title'), +            clean_html(metadata.get('about'))) | 
