aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorSergey M․ <dstftw@gmail.com>2018-05-01 01:48:21 +0700
committerSergey M․ <dstftw@gmail.com>2018-05-01 01:50:30 +0700
commit67ca1a8ef7ea6094e1e34518b93cdb5ba59f31b3 (patch)
tree646b229fe8eceb6d0dcd295ffabec5b11bbcf295
parent4a733545867a014eb786348f8fb9e6ae95850742 (diff)
downloadyoutube-dl-67ca1a8ef7ea6094e1e34518b93cdb5ba59f31b3.tar.xz
[zattoo] Improve and simplify (closes #14676)
-rw-r--r--youtube_dl/extractor/zattoo.py248
1 files changed, 142 insertions, 106 deletions
diff --git a/youtube_dl/extractor/zattoo.py b/youtube_dl/extractor/zattoo.py
index 928f22566..773073d85 100644
--- a/youtube_dl/extractor/zattoo.py
+++ b/youtube_dl/extractor/zattoo.py
@@ -1,84 +1,82 @@
# coding: utf-8
from __future__ import unicode_literals
-from uuid import uuid4
import re
+from uuid import uuid4
from .common import InfoExtractor
-from ..utils import (
+from ..compat import (
+ compat_HTTPError,
compat_str,
+)
+from ..utils import (
ExtractorError,
- sanitized_Request,
+ int_or_none,
+ try_get,
urlencode_postdata,
)
class ZattooBaseIE(InfoExtractor):
-
_NETRC_MACHINE = 'zattoo'
_HOST_URL = 'https://zattoo.com'
_power_guide_hash = None
- def _login(self, uuid, session_id):
+ def _login(self):
(username, password) = self._get_login_info()
if not username or not password:
- raise ExtractorError(
- 'A valid %s account is needed to access this media.' % self._NETRC_MACHINE,
- expected=True)
- login_form = {
- 'login': username,
- 'password': password,
- 'remember': True,
- }
- request = sanitized_Request(
- '%s/zapi/v2/account/login' % self._HOST_URL,
- urlencode_postdata(login_form))
- request.add_header(
- 'Referer', '%s/login' % self._HOST_URL)
- request.add_header(
- 'Content-Type', 'application/x-www-form-urlencoded; charset=UTF-8')
- request.add_header(
- 'Cookie', 'uuid=%s; beaker.session.id=%s' % (uuid, session_id))
- response = self._request_webpage(
- request, None, 'Logging in')
- data = self._parse_json(response.read(), None)
- return data['session']['power_guide_hash']
-
- def _get_app_token_and_version(self):
- host_webpage = self._download_webpage(
- self._HOST_URL, None, 'Downloading %s' % self._HOST_URL)
+ self.raise_login_required(
+ 'A valid %s account is needed to access this media.'
+ % self._NETRC_MACHINE)
+
+ try:
+ data = self._download_json(
+ '%s/zapi/v2/account/login' % self._HOST_URL, None, 'Logging in',
+ data=urlencode_postdata({
+ 'login': username,
+ 'password': password,
+ 'remember': 'true',
+ }), headers={
+ 'Referer': '%s/login' % self._HOST_URL,
+ 'Content-Type': 'application/x-www-form-urlencoded; charset=UTF-8',
+ })
+ except ExtractorError as e:
+ if isinstance(e.cause, compat_HTTPError) and e.cause.code == 400:
+ raise ExtractorError(
+ 'Unable to login: incorrect username and/or password',
+ expected=True)
+ raise
+
+ self._power_guide_hash = data['session']['power_guide_hash']
+
+ def _real_initialize(self):
+ webpage = self._download_webpage(
+ self._HOST_URL, None, 'Downloading app token')
app_token = self._html_search_regex(
- r'<script.+window\.appToken\s*=\s*\'(.+)\'', host_webpage, 'app token')
+ r'appToken\s*=\s*(["\'])(?P<token>(?:(?!\1).)+?)\1',
+ webpage, 'app token', group='token')
app_version = self._html_search_regex(
- r'<!--\w+-(.+?)-', host_webpage, 'app version', default='2.8.2')
- return app_token, app_version
-
- def _say_hello(self, uuid, app_token, app_version):
- postdata = {
- 'client_app_token': app_token,
- 'uuid': uuid,
- 'lang': 'en',
- 'app_version': app_version,
- 'format': 'json',
- }
- request = sanitized_Request(
- '%s/zapi/v2/session/hello' % self._HOST_URL,
- urlencode_postdata(postdata))
- response = self._request_webpage(
- request, None, 'Say hello')
+ r'<!--\w+-(.+?)-', webpage, 'app version', default='2.8.2')
+
+ # Will setup appropriate cookies
+ self._request_webpage(
+ '%s/zapi/v2/session/hello' % self._HOST_URL, None,
+ 'Opening session', data=urlencode_postdata({
+ 'client_app_token': app_token,
+ 'uuid': compat_str(uuid4()),
+ 'lang': 'en',
+ 'app_version': app_version,
+ 'format': 'json',
+ }))
- cookie = response.headers.get('Set-Cookie')
- session_id = self._search_regex(
- r'beaker\.session\.id\s*=\s*(.+?);', cookie, 'session id')
- return session_id
+ self._login()
def _extract_cid(self, video_id, channel_name):
channel_groups = self._download_json(
'%s/zapi/v2/cached/channels/%s' % (self._HOST_URL,
self._power_guide_hash),
- video_id,
- 'Downloading available channel list',
+ video_id, 'Downloading channel list',
query={'details': False})['channel_groups']
channel_list = []
for chgrp in channel_groups:
@@ -86,7 +84,9 @@ class ZattooBaseIE(InfoExtractor):
try:
return next(
chan['cid'] for chan in channel_list
- if chan['display_alias'] == channel_name or chan['cid'] == channel_name)
+ if chan.get('cid') and (
+ chan.get('display_alias') == channel_name or
+ chan.get('cid') == channel_name))
except StopIteration:
raise ExtractorError('Could not extract channel id')
@@ -100,72 +100,90 @@ class ZattooBaseIE(InfoExtractor):
'complete': True
})
+ p = data['program']
+ cid = p['cid']
+
info_dict = {
'id': video_id,
- 'title': data['program']['title'],
- 'description': data['program'].get('description'),
- 'thumbnail': data['program'].get('image_url')
+ 'title': p.get('title') or p['episode_title'],
+ 'description': p.get('description'),
+ 'thumbnail': p.get('image_url'),
+ 'creator': p.get('channel_name'),
+ 'episode': p.get('episode_title'),
+ 'episode_number': int_or_none(p.get('episode_number')),
+ 'season_number': int_or_none(p.get('season_number')),
+ 'release_year': int_or_none(p.get('year')),
+ 'categories': try_get(p, lambda x: x['categories'], list),
}
- cid = data['program']['cid']
+
return cid, info_dict
def _extract_formats(self, cid, video_id, record_id=None, is_live=False):
- postdata = {
- 'stream_type': 'dash',
+ postdata_common = {
'https_watch_urls': True,
}
- if record_id:
- url = '%s/zapi/watch/recording/%s' % (self._HOST_URL, record_id)
- else:
- url = '%s/zapi/watch/recall/%s/%s' % (self._HOST_URL, cid, video_id)
if is_live:
- postdata.update({'timeshift': 10800})
+ postdata_common.update({'timeshift': 10800})
url = '%s/zapi/watch/live/%s' % (self._HOST_URL, cid)
-
- data = self._download_json(
- sanitized_Request(url, urlencode_postdata(postdata)),
- video_id, 'Downloading dash formats')
+ elif record_id:
+ url = '%s/zapi/watch/recording/%s' % (self._HOST_URL, record_id)
+ else:
+ url = '%s/zapi/watch/recall/%s/%s' % (self._HOST_URL, cid, video_id)
formats = []
- for elem in data['stream']['watch_urls']:
- audio_channel = elem.get('audio_channel')
- maxrate = elem.get('maxrate')
- formats.extend(
- self._extract_mpd_formats(
- elem['url'], video_id,
- mpd_id='dash-maxrate-%s-channel-%s' % (maxrate, audio_channel), fatal=False))
-
- postdata.update({'stream_type': 'hls'})
- request = sanitized_Request(
- url, urlencode_postdata(postdata))
- data = self._download_json(
- request, video_id, 'Downloading hls formats')
- for elem in data['stream']['watch_urls']:
- audio_channel = elem.get('audio_channel')
- preference = None
-
- # Prefer audio channel A:
- if audio_channel == 'A':
- preference = 1
-
- maxrate = elem.get('maxrate')
- formats.extend(
- self._extract_m3u8_formats(
- elem['url'], video_id, 'mp4', entry_protocol='m3u8_native',
- preference=preference,
- m3u8_id='hls-maxrate-%s-channel-%s' % (maxrate, audio_channel),
- fatal=False))
-
+ for stream_type in ('dash', 'hls', 'hls5', 'hds'):
+ postdata = postdata_common.copy()
+ postdata['stream_type'] = stream_type
+
+ data = self._download_json(
+ url, video_id, 'Downloading %s formats' % stream_type.upper(),
+ data=urlencode_postdata(postdata), fatal=False)
+ if not data:
+ continue
+
+ watch_urls = try_get(
+ data, lambda x: x['stream']['watch_urls'], list)
+ if not watch_urls:
+ continue
+
+ for watch in watch_urls:
+ if not isinstance(watch, dict):
+ continue
+ watch_url = watch.get('url')
+ if not watch_url or not isinstance(watch_url, compat_str):
+ continue
+ format_id_list = [stream_type]
+ maxrate = watch.get('maxrate')
+ if maxrate:
+ format_id_list.append(compat_str(maxrate))
+ audio_channel = watch.get('audio_channel')
+ if audio_channel:
+ format_id_list.append(compat_str(audio_channel))
+ preference = 1 if audio_channel == 'A' else None
+ format_id = '-'.join(format_id_list)
+ if stream_type in ('dash', 'dash_widevine', 'dash_playready'):
+ this_formats = self._extract_mpd_formats(
+ watch_url, video_id, mpd_id=format_id, fatal=False)
+ elif stream_type in ('hls', 'hls5', 'hls5_fairplay'):
+ this_formats = self._extract_m3u8_formats(
+ watch_url, video_id, 'mp4',
+ entry_protocol='m3u8_native', m3u8_id=format_id,
+ fatal=False)
+ elif stream_type == 'hds':
+ this_formats = self._extract_f4m_formats(
+ watch_url, video_id, f4m_id=format_id, fatal=False)
+ elif stream_type == 'smooth_playready':
+ this_formats = self._extract_ism_formats(
+ watch_url, video_id, ism_id=format_id, fatal=False)
+ else:
+ assert False
+ for this_format in this_formats:
+ this_format['preference'] = preference
+ formats.extend(this_formats)
self._sort_formats(formats)
return formats
- def _real_initialize(self):
- uuid = compat_str(uuid4())
- app_token, app_version = self._get_app_token_and_version()
- session_id = self._say_hello(uuid, app_token, app_version)
- self._power_guide_hash = self._login(uuid, session_id)
-
def _extract_video(self, channel_name, video_id, record_id=None, is_live=False):
if is_live:
cid = self._extract_cid(video_id, channel_name)
@@ -190,13 +208,27 @@ class QuicklineBaseIE(ZattooBaseIE):
class QuicklineIE(QuicklineBaseIE):
_VALID_URL = r'https?://(?:www\.)?mobiltv\.quickline\.com/watch/(?P<channel>[^/]+)/(?P<id>[0-9]+)'
+ _TEST = {
+ 'url': 'https://mobiltv.quickline.com/watch/prosieben/130671867-maze-runner-die-auserwaehlten-in-der-brandwueste',
+ 'only_matching': True,
+ }
+
def _real_extract(self, url):
channel_name, video_id = re.match(self._VALID_URL, url).groups()
return self._extract_video(channel_name, video_id)
class QuicklineLiveIE(QuicklineBaseIE):
- _VALID_URL = r'https?://(?:www\.)?mobiltv\.quickline\.com/watch/(?P<id>[^/]+)$'
+ _VALID_URL = r'https?://(?:www\.)?mobiltv\.quickline\.com/watch/(?P<id>[^/]+)'
+
+ _TEST = {
+ 'url': 'https://mobiltv.quickline.com/watch/srf1',
+ 'only_matching': True,
+ }
+
+ @classmethod
+ def suitable(cls, url):
+ return False if QuicklineIE.suitable(url) else super(QuicklineLiveIE, cls).suitable(url)
def _real_extract(self, url):
channel_name = video_id = self._match_id(url)
@@ -222,13 +254,17 @@ class ZattooIE(ZattooBaseIE):
class ZattooLiveIE(ZattooBaseIE):
- _VALID_URL = r'https?://(?:www\.)?zattoo\.com/watch/(?P<id>[^/]+)$'
+ _VALID_URL = r'https?://(?:www\.)?zattoo\.com/watch/(?P<id>[^/]+)'
_TEST = {
'url': 'https://zattoo.com/watch/srf1',
'only_matching': True,
}
+ @classmethod
+ def suitable(cls, url):
+ return False if ZattooIE.suitable(url) else super(ZattooLiveIE, cls).suitable(url)
+
def _real_extract(self, url):
channel_name = video_id = self._match_id(url)
return self._extract_video(channel_name, video_id, is_live=True)