1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
|
from __future__ import unicode_literals
import re
from .common import InfoExtractor
from ..compat import compat_str
from ..utils import (
clean_html,
ExtractorError,
remove_end,
strip_or_none,
unified_timestamp,
urljoin,
)
class PacktPubBaseIE(InfoExtractor):
_PACKT_BASE = 'https://www.packtpub.com'
_MAPT_REST = '%s/mapt-rest' % _PACKT_BASE
class PacktPubIE(PacktPubBaseIE):
_VALID_URL = r'https?://(?:www\.)?packtpub\.com/mapt/video/[^/]+/(?P<course_id>\d+)/(?P<chapter_id>\d+)/(?P<id>\d+)'
_TEST = {
'url': 'https://www.packtpub.com/mapt/video/web-development/9781787122215/20528/20530/Project+Intro',
'md5': '1e74bd6cfd45d7d07666f4684ef58f70',
'info_dict': {
'id': '20530',
'ext': 'mp4',
'title': 'Project Intro',
'thumbnail': r're:(?i)^https?://.*\.jpg',
'timestamp': 1490918400,
'upload_date': '20170331',
},
}
def _handle_error(self, response):
if response.get('status') != 'success':
raise ExtractorError(
'% said: %s' % (self.IE_NAME, response['message']),
expected=True)
def _download_json(self, *args, **kwargs):
response = super(PacktPubIE, self)._download_json(*args, **kwargs)
self._handle_error(response)
return response
def _real_extract(self, url):
mobj = re.match(self._VALID_URL, url)
course_id, chapter_id, video_id = mobj.group(
'course_id', 'chapter_id', 'id')
video = self._download_json(
'%s/users/me/products/%s/chapters/%s/sections/%s'
% (self._MAPT_REST, course_id, chapter_id, video_id), video_id,
'Downloading JSON video')['data']
content = video.get('content')
if not content:
raise ExtractorError('This video is locked', expected=True)
video_url = content['file']
metadata = self._download_json(
'%s/products/%s/chapters/%s/sections/%s/metadata'
% (self._MAPT_REST, course_id, chapter_id, video_id),
video_id)['data']
title = metadata['pageTitle']
course_title = metadata.get('title')
if course_title:
title = remove_end(title, ' - %s' % course_title)
timestamp = unified_timestamp(metadata.get('publicationDate'))
thumbnail = urljoin(self._PACKT_BASE, metadata.get('filepath'))
return {
'id': video_id,
'url': video_url,
'title': title,
'thumbnail': thumbnail,
'timestamp': timestamp,
}
class PacktPubCourseIE(PacktPubBaseIE):
_VALID_URL = r'(?P<url>https?://(?:www\.)?packtpub\.com/mapt/video/[^/]+/(?P<id>\d+))'
_TEST = {
'url': 'https://www.packtpub.com/mapt/video/web-development/9781787122215',
'info_dict': {
'id': '9781787122215',
'title': 'Learn Nodejs by building 12 projects [Video]',
},
'playlist_count': 90,
}
@classmethod
def suitable(cls, url):
return False if PacktPubIE.suitable(url) else super(
PacktPubCourseIE, cls).suitable(url)
def _real_extract(self, url):
mobj = re.match(self._VALID_URL, url)
url, course_id = mobj.group('url', 'id')
course = self._download_json(
'%s/products/%s/metadata' % (self._MAPT_REST, course_id),
course_id)['data']
entries = []
for chapter_num, chapter in enumerate(course['tableOfContents'], 1):
if chapter.get('type') != 'chapter':
continue
children = chapter.get('children')
if not isinstance(children, list):
continue
chapter_info = {
'chapter': chapter.get('title'),
'chapter_number': chapter_num,
'chapter_id': chapter.get('id'),
}
for section in children:
if section.get('type') != 'section':
continue
section_url = section.get('seoUrl')
if not isinstance(section_url, compat_str):
continue
entry = {
'_type': 'url_transparent',
'url': urljoin(url + '/', section_url),
'title': strip_or_none(section.get('title')),
'description': clean_html(section.get('summary')),
'ie_key': PacktPubIE.ie_key(),
}
entry.update(chapter_info)
entries.append(entry)
return self.playlist_result(entries, course_id, course.get('title'))
|