aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorSergey M․ <dstftw@gmail.com>2018-04-16 00:31:25 +0700
committerSergey M․ <dstftw@gmail.com>2018-04-16 00:34:47 +0700
commita42839e548d81ae20e5164ae690075d2c423477e (patch)
tree419ae879ea2c7d771205b5fcb0edd7408ef3a674
parentd6166a7602f5b78a4bb552ba0f4b176cbc0a4a03 (diff)
[picarto] Improve extraction (closes #6205, closes #12514, closes #15276, closes #15551)
-rw-r--r--youtube_dl/extractor/extractors.py2
-rwxr-xr-xyoutube_dl/extractor/picarto.py152
2 files changed, 116 insertions, 38 deletions
diff --git a/youtube_dl/extractor/extractors.py b/youtube_dl/extractor/extractors.py
index d83e93dec..3570fa165 100644
--- a/youtube_dl/extractor/extractors.py
+++ b/youtube_dl/extractor/extractors.py
@@ -816,8 +816,8 @@ from .philharmoniedeparis import PhilharmonieDeParisIE
from .phoenix import PhoenixIE
from .photobucket import PhotobucketIE
from .picarto import (
- PicartoVodIE,
PicartoIE,
+ PicartoVodIE,
)
from .piksel import PikselIE
from .pinkbike import PinkbikeIE
diff --git a/youtube_dl/extractor/picarto.py b/youtube_dl/extractor/picarto.py
index 1d6f714ed..2366dfb34 100755
--- a/youtube_dl/extractor/picarto.py
+++ b/youtube_dl/extractor/picarto.py
@@ -1,12 +1,21 @@
# coding: utf-8
from __future__ import unicode_literals
+import time
+
from .common import InfoExtractor
-from ..utils import ExtractorError, js_to_json, urlencode_postdata
+from ..compat import compat_str
+from ..utils import (
+ ExtractorError,
+ js_to_json,
+ try_get,
+ update_url_query,
+ urlencode_postdata,
+)
class PicartoIE(InfoExtractor):
- _VALID_URL = r'https?://(?:www.)?picarto\.tv/(?P<id>[a-zA-Z0-9]+)[^/]*$'
+ _VALID_URL = r'https?://(?:www.)?picarto\.tv/(?P<id>[a-zA-Z0-9]+)'
_TEST = {
'url': 'https://picarto.tv/Setz',
'info_dict': {
@@ -16,72 +25,141 @@ class PicartoIE(InfoExtractor):
'timestamp': int,
'is_live': True
},
- 'params': {
- 'skip_download': True
- }
+ 'skip': 'Stream is offline',
}
+ @classmethod
+ def suitable(cls, url):
+ return False if PicartoVodIE.suitable(url) else super(PicartoIE, cls).suitable(url)
+
def _real_extract(self, url):
channel_id = self._match_id(url)
stream_page = self._download_webpage(url, channel_id)
- if 'This channel does not exist.' in stream_page:
- raise ExtractorError('Channel does not exist', expected=True)
+ if '>This channel does not exist' in stream_page:
+ raise ExtractorError(
+ 'Channel %s does not exist' % channel_id, expected=True)
- player_settings_js = self._html_search_regex(
- r'(?s)playerSettings\[1\]\s*=\s*(\{.+?\}\n)', stream_page, 'player-settings')
- player_settings = self._parse_json(player_settings_js, channel_id,
- transform_source=js_to_json)
- if not player_settings.get('online'):
+ player = self._parse_json(
+ self._search_regex(
+ r'(?s)playerSettings\[\d+\]\s*=\s*(\{.+?\}\s*\n)', stream_page,
+ 'player settings'),
+ channel_id, transform_source=js_to_json)
+
+ if player.get('online') is False:
raise ExtractorError('Stream is offline', expected=True)
- cdn_data = self._download_json('https://picarto.tv/process/channel', channel_id,
+ cdn_data = self._download_json(
+ 'https://picarto.tv/process/channel', channel_id,
data=urlencode_postdata({'loadbalancinginfo': channel_id}),
- note='Fetching load balancer info')
- edge = [edge['ep'] for edge in cdn_data['edges'] if edge['id'] == cdn_data['preferedEdge']][0]
+ note='Downloading load balancing info')
+
+ def get_event(key):
+ return try_get(player, lambda x: x['event'][key], compat_str) or ''
- formats = self._extract_m3u8_formats('https://%s/hls/%s/index.m3u8' % (edge, channel_id),
- channel_id, 'mp4')
- formats.append({'url': 'https://%s/mp4/%s.mp4' % (edge, channel_id)})
+ params = {
+ 'token': player.get('token') or '',
+ 'ticket': get_event('ticket'),
+ 'con': int(time.time() * 1000),
+ 'type': get_event('ticket'),
+ 'scope': get_event('scope'),
+ }
+
+ prefered_edge = cdn_data.get('preferedEdge')
+ default_tech = player.get('defaultTech')
+
+ formats = []
+
+ for edge in cdn_data['edges']:
+ edge_ep = edge.get('ep')
+ if not edge_ep or not isinstance(edge_ep, compat_str):
+ continue
+ edge_id = edge.get('id')
+ for tech in cdn_data['techs']:
+ tech_label = tech.get('label')
+ tech_type = tech.get('type')
+ preference = 0
+ if edge_id == prefered_edge:
+ preference += 1
+ if tech_type == default_tech:
+ preference += 1
+ format_id = []
+ if edge_id:
+ format_id.append(edge_id)
+ if tech_type == 'application/x-mpegurl' or tech_label == 'HLS':
+ format_id.append('hls')
+ formats.extend(self._extract_m3u8_formats(
+ update_url_query(
+ 'https://%s/hls/%s/index.m3u8'
+ % (edge_ep, channel_id), params),
+ channel_id, 'mp4', preference=preference,
+ m3u8_id='-'.join(format_id), fatal=False))
+ continue
+ elif tech_type == 'video/mp4' or tech_label == 'MP4':
+ format_id.append('mp4')
+ formats.append({
+ 'url': update_url_query(
+ 'https://%s/mp4/%s.mp4' % (edge_ep, channel_id),
+ params),
+ 'format_id': '-'.join(format_id),
+ 'preference': preference,
+ })
+ else:
+ # rtmp format does not seem to work
+ continue
self._sort_formats(formats)
+ mature = player.get('mature')
+ if mature is None:
+ age_limit = None
+ else:
+ age_limit = 18 if mature is True else 0
+
return {
'id': channel_id,
- 'formats': formats,
- 'ext': 'mp4',
'title': self._live_title(channel_id),
'is_live': True,
- 'thumbnail': player_settings.get('vodThumb'),
- 'age_limit': 18 if player_settings.get('mature') else None,
+ 'thumbnail': player.get('vodThumb'),
+ 'age_limit': age_limit,
+ 'formats': formats,
}
class PicartoVodIE(InfoExtractor):
- _VALID_URL = r'https?://(?:www.)?picarto\.tv/videopopout/(?P<id>[a-zA-Z0-9_\-\.]+).flv'
- _TEST = {
- 'url': 'https://picarto.tv/videopopout/Carrot_2018.01.11.07.55.12.flv',
- 'md5': '80765b67813053ff31d4df2bd5e900ce',
+ _VALID_URL = r'https?://(?:www.)?picarto\.tv/videopopout/(?P<id>[^/?#&]+)'
+ _TESTS = [{
+ 'url': 'https://picarto.tv/videopopout/ArtofZod_2017.12.12.00.13.23.flv',
+ 'md5': '3ab45ba4352c52ee841a28fb73f2d9ca',
'info_dict': {
- 'id': 'Carrot_2018.01.11.07.55.12',
+ 'id': 'ArtofZod_2017.12.12.00.13.23.flv',
'ext': 'mp4',
- 'title': 'Carrot_2018.01.11.07.55.12',
- 'thumbnail': r're:^https?://.*\.jpg$'
- }
- }
+ 'title': 'ArtofZod_2017.12.12.00.13.23.flv',
+ 'thumbnail': r're:^https?://.*\.jpg'
+ },
+ }, {
+ 'url': 'https://picarto.tv/videopopout/Plague',
+ 'only_matching': True,
+ }]
def _real_extract(self, url):
video_id = self._match_id(url)
+
webpage = self._download_webpage(url, video_id)
- vod_info_js = self._html_search_regex(r'(?s)"#vod-player",\s*(\{.+?\})\)',
- webpage, video_id)
- vod_info = self._parse_json(vod_info_js, video_id, transform_source=js_to_json)
+ vod_info = self._parse_json(
+ self._search_regex(
+ r'(?s)#vod-player["\']\s*,\s*(\{.+?\})\s*\)', webpage,
+ video_id),
+ video_id, transform_source=js_to_json)
+
+ formats = self._extract_m3u8_formats(
+ vod_info['vod'], video_id, 'mp4', entry_protocol='m3u8_native',
+ m3u8_id='hls')
+ self._sort_formats(formats)
return {
'id': video_id,
'title': video_id,
- 'ext': 'mp4',
- 'protocol': 'm3u8',
- 'url': vod_info['vod'],
'thumbnail': vod_info.get('vodThumb'),
+ 'formats': formats,
}