from __future__ import unicode_literals
import re
from .common import InfoExtractor
from ..compat import (
compat_b64decode,
compat_str,
compat_urllib_parse_unquote,
compat_urlparse,
)
from ..utils import (
int_or_none,
parse_duration,
parse_iso8601,
str_or_none,
str_to_int,
try_get,
unified_timestamp,
url_or_none,
)
class FourTubeBaseIE(InfoExtractor):
_TKN_HOST = 'tkn.kodicdn.com'
def _extract_formats(self, url, video_id, media_id, sources):
token_url = 'https://%s/%s/desktop/%s' % (
self._TKN_HOST, media_id, '+'.join(sources))
parsed_url = compat_urlparse.urlparse(url)
tokens = self._download_json(token_url, video_id, data=b'', headers={
'Origin': '%s://%s' % (parsed_url.scheme, parsed_url.hostname),
'Referer': url,
})
formats = [{
'url': tokens[format]['token'],
'format_id': format + 'p',
'resolution': format + 'p',
'quality': int(format),
} for format in sources]
self._sort_formats(formats)
return formats
def _real_extract(self, url):
mobj = re.match(self._VALID_URL, url)
kind, video_id, display_id = mobj.group('kind', 'id', 'display_id')
if kind == 'm' or not display_id:
url = self._URL_TEMPLATE % video_id
webpage = self._download_webpage(url, video_id)
title = self._html_search_meta('name', webpage)
timestamp = parse_iso8601(self._html_search_meta(
'uploadDate', webpage))
thumbnail = self._html_search_meta('thumbnailUrl', webpage)
uploader_id = self._html_search_regex(
r'',
webpage, 'uploader id', fatal=False)
uploader = self._html_search_regex(
r'',
webpage, 'uploader', fatal=False)
categories_html = self._search_regex(
r'(?s)>\s*Categories / Tags\s*.*?',
webpage, 'categories', fatal=False)
categories = None
if categories_html:
categories = [
c.strip() for c in re.findall(
r'(?s)(.*?)', categories_html)]
view_count = str_to_int(self._search_regex(
r']+itemprop="interactionCount"[^>]+content="UserPlays:([0-9,]+)">',
webpage, 'view count', default=None))
like_count = str_to_int(self._search_regex(
r']+itemprop="interactionCount"[^>]+content="UserLikes:([0-9,]+)">',
webpage, 'like count', default=None))
duration = parse_duration(self._html_search_meta('duration', webpage))
media_id = self._search_regex(
r'