aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorSergey M․ <dstftw@gmail.com>2018-06-29 00:35:05 +0700
committerSergey M․ <dstftw@gmail.com>2018-06-29 00:35:05 +0700
commitacbd0ff5df5ff9d69e6707ea4fa3e3b4f9cc6528 (patch)
tree007b2cb8948cb5ce5272bccacd54a2a123ada4a4
parent7b393f9cc5dc4790bcb623c768fa4a3046ef80bf (diff)
[dctptv] Restore extraction based on REST API (closes #16850)
-rw-r--r--youtube_dl/extractor/dctp.py80
1 files changed, 54 insertions, 26 deletions
diff --git a/youtube_dl/extractor/dctp.py b/youtube_dl/extractor/dctp.py
index 3a6d0560e..dc0c41b8a 100644
--- a/youtube_dl/extractor/dctp.py
+++ b/youtube_dl/extractor/dctp.py
@@ -5,13 +5,15 @@ from .common import InfoExtractor
from ..compat import compat_str
from ..utils import (
float_or_none,
- unified_strdate,
+ int_or_none,
+ unified_timestamp,
)
class DctpTvIE(InfoExtractor):
_VALID_URL = r'https?://(?:www\.)?dctp\.tv/(?:#/)?filme/(?P<id>[^/?#&]+)'
- _TEST = {
+ _TESTS = [{
+ # 4x3
'url': 'http://www.dctp.tv/filme/videoinstallation-fuer-eine-kaufhausfassade/',
'info_dict': {
'id': '95eaa4f33dad413aa17b4ee613cccc6c',
@@ -19,31 +21,49 @@ class DctpTvIE(InfoExtractor):
'ext': 'flv',
'title': 'Videoinstallation für eine Kaufhausfassade',
'description': 'Kurzfilm',
- 'upload_date': '20110407',
'thumbnail': r're:^https?://.*\.jpg$',
'duration': 71.24,
+ 'timestamp': 1302172322,
+ 'upload_date': '20110407',
},
'params': {
# rtmp download
'skip_download': True,
},
- }
+ }, {
+ # 16x9
+ 'url': 'http://www.dctp.tv/filme/sind-youtuber-die-besseren-lehrer/',
+ 'only_matching': True,
+ }]
+
+ _BASE_URL = 'http://dctp-ivms2-restapi.s3.amazonaws.com'
def _real_extract(self, url):
display_id = self._match_id(url)
- webpage = self._download_webpage(url, display_id)
+ version = self._download_json(
+ '%s/version.json' % self._BASE_URL, display_id,
+ 'Downloading version JSON')
+
+ restapi_base = '%s/%s/restapi' % (
+ self._BASE_URL, version['version_name'])
- video_id = self._html_search_meta(
- 'DC.identifier', webpage, 'video id',
- default=None) or self._search_regex(
- r'id=["\']uuid[^>]+>([^<]+)<', webpage, 'video id')
+ info = self._download_json(
+ '%s/slugs/%s.json' % (restapi_base, display_id), display_id,
+ 'Downloading video info JSON')
- title = self._og_search_title(webpage)
+ media = self._download_json(
+ '%s/media/%s.json' % (restapi_base, compat_str(info['object_id'])),
+ display_id, 'Downloading media JSON')
+
+ uuid = media['uuid']
+ title = media['title']
+ ratio = '16x9' if media.get('is_wide') else '4x3'
+ play_path = 'mp4:%s_dctp_0500_%s.m4v' % (uuid, ratio)
servers = self._download_json(
'http://www.dctp.tv/streaming_servers/', display_id,
- note='Downloading server list', fatal=False)
+ note='Downloading server list JSON', fatal=False)
if servers:
endpoint = next(
@@ -60,27 +80,35 @@ class DctpTvIE(InfoExtractor):
formats = [{
'url': endpoint,
'app': app,
- 'play_path': 'mp4:%s_dctp_0500_4x3.m4v' % video_id,
+ 'play_path': play_path,
'page_url': url,
- 'player_url': 'http://svm-prod-dctptv-static.s3.amazonaws.com/dctptv-relaunch2012-109.swf',
+ 'player_url': 'http://svm-prod-dctptv-static.s3.amazonaws.com/dctptv-relaunch2012-110.swf',
'ext': 'flv',
}]
- description = self._html_search_meta('DC.description', webpage)
- upload_date = unified_strdate(
- self._html_search_meta('DC.date.created', webpage))
- thumbnail = self._og_search_thumbnail(webpage)
- duration = float_or_none(self._search_regex(
- r'id=["\']duration_in_ms[^+]>(\d+)', webpage, 'duration',
- default=None), scale=1000)
+ thumbnails = []
+ images = media.get('images')
+ if isinstance(images, list):
+ for image in images:
+ if not isinstance(image, dict):
+ continue
+ image_url = image.get('url')
+ if not image_url or not isinstance(image_url, compat_str):
+ continue
+ thumbnails.append({
+ 'url': image_url,
+ 'width': int_or_none(image.get('width')),
+ 'height': int_or_none(image.get('height')),
+ })
return {
- 'id': video_id,
+ 'id': uuid,
+ 'display_id': display_id,
'title': title,
+ 'alt_title': media.get('subtitle'),
+ 'description': media.get('description') or media.get('teaser'),
+ 'timestamp': unified_timestamp(media.get('created')),
+ 'duration': float_or_none(media.get('duration_in_ms'), scale=1000),
+ 'thumbnails': thumbnails,
'formats': formats,
- 'display_id': display_id,
- 'description': description,
- 'upload_date': upload_date,
- 'thumbnail': thumbnail,
- 'duration': duration,
}