diff options
| author | Sergey M․ <dstftw@gmail.com> | 2017-04-01 00:25:27 +0700 | 
|---|---|---|
| committer | Sergey M․ <dstftw@gmail.com> | 2017-04-01 00:25:27 +0700 | 
| commit | 7453999580f2809153a84420d3ca72b24186c02b (patch) | |
| tree | bd85e93c6400379fc33bd93d30a6b3bd8ba35d64 | |
| parent | 1640eb096166c81918125a0a7462eb2edb063167 (diff) | |
[packtpub] Add extractor (closes #12610)
| -rw-r--r-- | youtube_dl/extractor/extractors.py | 4 | ||||
| -rw-r--r-- | youtube_dl/extractor/packtpub.py | 138 | 
2 files changed, 142 insertions, 0 deletions
| diff --git a/youtube_dl/extractor/extractors.py b/youtube_dl/extractor/extractors.py index 43933ad5b..6ad7444fe 100644 --- a/youtube_dl/extractor/extractors.py +++ b/youtube_dl/extractor/extractors.py @@ -729,6 +729,10 @@ from .orf import (      ORFFM4IE,      ORFIPTVIE,  ) +from .packtpub import ( +    PacktPubIE, +    PacktPubCourseIE, +)  from .pandatv import PandaTVIE  from .pandoratv import PandoraTVIE  from .parliamentliveuk import ParliamentLiveUKIE diff --git a/youtube_dl/extractor/packtpub.py b/youtube_dl/extractor/packtpub.py new file mode 100644 index 000000000..881f3bcc7 --- /dev/null +++ b/youtube_dl/extractor/packtpub.py @@ -0,0 +1,138 @@ +from __future__ import unicode_literals + +import re + +from .common import InfoExtractor +from ..compat import compat_str +from ..utils import ( +    clean_html, +    ExtractorError, +    remove_end, +    strip_or_none, +    unified_timestamp, +    urljoin, +) + + +class PacktPubBaseIE(InfoExtractor): +    _PACKT_BASE = 'https://www.packtpub.com' +    _MAPT_REST = '%s/mapt-rest' % _PACKT_BASE + + +class PacktPubIE(PacktPubBaseIE): +    _VALID_URL = r'https?://(?:www\.)?packtpub\.com/mapt/video/[^/]+/(?P<course_id>\d+)/(?P<chapter_id>\d+)/(?P<id>\d+)' + +    _TEST = { +        'url': 'https://www.packtpub.com/mapt/video/web-development/9781787122215/20528/20530/Project+Intro', +        'md5': '1e74bd6cfd45d7d07666f4684ef58f70', +        'info_dict': { +            'id': '20530', +            'ext': 'mp4', +            'title': 'Project Intro', +            'thumbnail': r're:(?i)^https?://.*\.jpg', +            'timestamp': 1490918400, +            'upload_date': '20170331', +        }, +    } + +    def _handle_error(self, response): +        if response.get('status') != 'success': +            raise ExtractorError( +                '% said: %s' % (self.IE_NAME, response['message']), +                expected=True) + +    def _download_json(self, *args, **kwargs): +        response = super(PacktPubIE, self)._download_json(*args, **kwargs) +        self._handle_error(response) +        return response + +    def _real_extract(self, url): +        mobj = re.match(self._VALID_URL, url) +        course_id, chapter_id, video_id = mobj.group( +            'course_id', 'chapter_id', 'id') + +        video = self._download_json( +            '%s/users/me/products/%s/chapters/%s/sections/%s' +            % (self._MAPT_REST, course_id, chapter_id, video_id), video_id, +            'Downloading JSON video')['data'] + +        content = video.get('content') +        if not content: +            raise ExtractorError('This video is locked', expected=True) + +        video_url = content['file'] + +        metadata = self._download_json( +            '%s/products/%s/chapters/%s/sections/%s/metadata' +            % (self._MAPT_REST, course_id, chapter_id, video_id), +            video_id)['data'] + +        title = metadata['pageTitle'] +        course_title = metadata.get('title') +        if course_title: +            title = remove_end(title, ' - %s' % course_title) +        timestamp = unified_timestamp(metadata.get('publicationDate')) +        thumbnail = urljoin(self._PACKT_BASE, metadata.get('filepath')) + +        return { +            'id': video_id, +            'url': video_url, +            'title': title, +            'thumbnail': thumbnail, +            'timestamp': timestamp, +        } + + +class PacktPubCourseIE(PacktPubBaseIE): +    _VALID_URL = r'(?P<url>https?://(?:www\.)?packtpub\.com/mapt/video/[^/]+/(?P<id>\d+))' +    _TEST = { +        'url': 'https://www.packtpub.com/mapt/video/web-development/9781787122215', +        'info_dict': { +            'id': '9781787122215', +            'title': 'Learn Nodejs by building 12 projects [Video]', +        }, +        'playlist_count': 90, +    } + +    @classmethod +    def suitable(cls, url): +        return False if PacktPubIE.suitable(url) else super( +            PacktPubCourseIE, cls).suitable(url) + +    def _real_extract(self, url): +        mobj = re.match(self._VALID_URL, url) +        url, course_id = mobj.group('url', 'id') + +        course = self._download_json( +            '%s/products/%s/metadata' % (self._MAPT_REST, course_id), +            course_id)['data'] + +        entries = [] +        for chapter_num, chapter in enumerate(course['tableOfContents'], 1): +            if chapter.get('type') != 'chapter': +                continue +            children = chapter.get('children') +            if not isinstance(children, list): +                continue +            chapter_info = { +                'chapter': chapter.get('title'), +                'chapter_number': chapter_num, +                'chapter_id': chapter.get('id'), +            } +            for section in children: +                if section.get('type') != 'section': +                    continue +                section_url = section.get('seoUrl') +                if not isinstance(section_url, compat_str): +                    continue +                entry = { +                    '_type': 'url_transparent', +                    'url': urljoin(url + '/', section_url), +                    'title': strip_or_none(section.get('title')), +                    'description': clean_html(section.get('summary')), +                    'ie_key': PacktPubIE.ie_key(), +                } +                entry.update(chapter_info) +                entries.append(entry) + +        return self.playlist_result(entries, course_id, course.get('title')) | 
