diff options
| author | Sergey M․ <dstftw@gmail.com> | 2018-04-16 00:31:25 +0700 | 
|---|---|---|
| committer | Sergey M․ <dstftw@gmail.com> | 2018-04-16 00:34:47 +0700 | 
| commit | a42839e548d81ae20e5164ae690075d2c423477e (patch) | |
| tree | 419ae879ea2c7d771205b5fcb0edd7408ef3a674 | |
| parent | d6166a7602f5b78a4bb552ba0f4b176cbc0a4a03 (diff) | |
[picarto] Improve extraction (closes #6205, closes #12514, closes #15276, closes #15551)
| -rw-r--r-- | youtube_dl/extractor/extractors.py | 2 | ||||
| -rwxr-xr-x | youtube_dl/extractor/picarto.py | 152 | 
2 files changed, 116 insertions, 38 deletions
| diff --git a/youtube_dl/extractor/extractors.py b/youtube_dl/extractor/extractors.py index d83e93dec..3570fa165 100644 --- a/youtube_dl/extractor/extractors.py +++ b/youtube_dl/extractor/extractors.py @@ -816,8 +816,8 @@ from .philharmoniedeparis import PhilharmonieDeParisIE  from .phoenix import PhoenixIE  from .photobucket import PhotobucketIE  from .picarto import ( -    PicartoVodIE,      PicartoIE, +    PicartoVodIE,  )  from .piksel import PikselIE  from .pinkbike import PinkbikeIE diff --git a/youtube_dl/extractor/picarto.py b/youtube_dl/extractor/picarto.py index 1d6f714ed..2366dfb34 100755 --- a/youtube_dl/extractor/picarto.py +++ b/youtube_dl/extractor/picarto.py @@ -1,12 +1,21 @@  # coding: utf-8  from __future__ import unicode_literals +import time +  from .common import InfoExtractor -from ..utils import ExtractorError, js_to_json, urlencode_postdata +from ..compat import compat_str +from ..utils import ( +    ExtractorError, +    js_to_json, +    try_get, +    update_url_query, +    urlencode_postdata, +)  class PicartoIE(InfoExtractor): -    _VALID_URL = r'https?://(?:www.)?picarto\.tv/(?P<id>[a-zA-Z0-9]+)[^/]*$' +    _VALID_URL = r'https?://(?:www.)?picarto\.tv/(?P<id>[a-zA-Z0-9]+)'      _TEST = {          'url': 'https://picarto.tv/Setz',          'info_dict': { @@ -16,72 +25,141 @@ class PicartoIE(InfoExtractor):              'timestamp': int,              'is_live': True          }, -        'params': { -            'skip_download': True -        } +        'skip': 'Stream is offline',      } +    @classmethod +    def suitable(cls, url): +        return False if PicartoVodIE.suitable(url) else super(PicartoIE, cls).suitable(url) +      def _real_extract(self, url):          channel_id = self._match_id(url)          stream_page = self._download_webpage(url, channel_id) -        if 'This channel does not exist.' in stream_page: -            raise ExtractorError('Channel does not exist', expected=True) +        if '>This channel does not exist' in stream_page: +            raise ExtractorError( +                'Channel %s does not exist' % channel_id, expected=True) -        player_settings_js = self._html_search_regex( -            r'(?s)playerSettings\[1\]\s*=\s*(\{.+?\}\n)', stream_page, 'player-settings') -        player_settings = self._parse_json(player_settings_js, channel_id, -            transform_source=js_to_json) -        if not player_settings.get('online'): +        player = self._parse_json( +            self._search_regex( +                r'(?s)playerSettings\[\d+\]\s*=\s*(\{.+?\}\s*\n)', stream_page, +                'player settings'), +            channel_id, transform_source=js_to_json) + +        if player.get('online') is False:              raise ExtractorError('Stream is offline', expected=True) -        cdn_data = self._download_json('https://picarto.tv/process/channel', channel_id, +        cdn_data = self._download_json( +            'https://picarto.tv/process/channel', channel_id,              data=urlencode_postdata({'loadbalancinginfo': channel_id}), -            note='Fetching load balancer info') -        edge = [edge['ep'] for edge in cdn_data['edges'] if edge['id'] == cdn_data['preferedEdge']][0] +            note='Downloading load balancing info') + +        def get_event(key): +            return try_get(player, lambda x: x['event'][key], compat_str) or '' -        formats = self._extract_m3u8_formats('https://%s/hls/%s/index.m3u8' % (edge, channel_id), -            channel_id, 'mp4') -        formats.append({'url': 'https://%s/mp4/%s.mp4' % (edge, channel_id)}) +        params = { +            'token': player.get('token') or '', +            'ticket': get_event('ticket'), +            'con': int(time.time() * 1000), +            'type': get_event('ticket'), +            'scope': get_event('scope'), +        } + +        prefered_edge = cdn_data.get('preferedEdge') +        default_tech = player.get('defaultTech') + +        formats = [] + +        for edge in cdn_data['edges']: +            edge_ep = edge.get('ep') +            if not edge_ep or not isinstance(edge_ep, compat_str): +                continue +            edge_id = edge.get('id') +            for tech in cdn_data['techs']: +                tech_label = tech.get('label') +                tech_type = tech.get('type') +                preference = 0 +                if edge_id == prefered_edge: +                    preference += 1 +                if tech_type == default_tech: +                    preference += 1 +                format_id = [] +                if edge_id: +                    format_id.append(edge_id) +                if tech_type == 'application/x-mpegurl' or tech_label == 'HLS': +                    format_id.append('hls') +                    formats.extend(self._extract_m3u8_formats( +                        update_url_query( +                            'https://%s/hls/%s/index.m3u8' +                            % (edge_ep, channel_id), params), +                        channel_id, 'mp4', preference=preference, +                        m3u8_id='-'.join(format_id), fatal=False)) +                    continue +                elif tech_type == 'video/mp4' or tech_label == 'MP4': +                    format_id.append('mp4') +                    formats.append({ +                        'url': update_url_query( +                            'https://%s/mp4/%s.mp4' % (edge_ep, channel_id), +                            params), +                        'format_id': '-'.join(format_id), +                        'preference': preference, +                    }) +                else: +                    # rtmp format does not seem to work +                    continue          self._sort_formats(formats) +        mature = player.get('mature') +        if mature is None: +            age_limit = None +        else: +            age_limit = 18 if mature is True else 0 +          return {              'id': channel_id, -            'formats': formats, -            'ext': 'mp4',              'title': self._live_title(channel_id),              'is_live': True, -            'thumbnail': player_settings.get('vodThumb'), -            'age_limit': 18 if player_settings.get('mature') else None, +            'thumbnail': player.get('vodThumb'), +            'age_limit': age_limit, +            'formats': formats,          }  class PicartoVodIE(InfoExtractor): -    _VALID_URL = r'https?://(?:www.)?picarto\.tv/videopopout/(?P<id>[a-zA-Z0-9_\-\.]+).flv' -    _TEST = { -        'url': 'https://picarto.tv/videopopout/Carrot_2018.01.11.07.55.12.flv', -        'md5': '80765b67813053ff31d4df2bd5e900ce', +    _VALID_URL = r'https?://(?:www.)?picarto\.tv/videopopout/(?P<id>[^/?#&]+)' +    _TESTS = [{ +        'url': 'https://picarto.tv/videopopout/ArtofZod_2017.12.12.00.13.23.flv', +        'md5': '3ab45ba4352c52ee841a28fb73f2d9ca',          'info_dict': { -            'id': 'Carrot_2018.01.11.07.55.12', +            'id': 'ArtofZod_2017.12.12.00.13.23.flv',              'ext': 'mp4', -            'title': 'Carrot_2018.01.11.07.55.12', -            'thumbnail': r're:^https?://.*\.jpg$' -        } -    } +            'title': 'ArtofZod_2017.12.12.00.13.23.flv', +            'thumbnail': r're:^https?://.*\.jpg' +        }, +    }, { +        'url': 'https://picarto.tv/videopopout/Plague', +        'only_matching': True, +    }]      def _real_extract(self, url):          video_id = self._match_id(url) +          webpage = self._download_webpage(url, video_id) -        vod_info_js = self._html_search_regex(r'(?s)"#vod-player",\s*(\{.+?\})\)', -            webpage, video_id) -        vod_info = self._parse_json(vod_info_js, video_id, transform_source=js_to_json) +        vod_info = self._parse_json( +            self._search_regex( +                r'(?s)#vod-player["\']\s*,\s*(\{.+?\})\s*\)', webpage, +                video_id), +            video_id, transform_source=js_to_json) + +        formats = self._extract_m3u8_formats( +            vod_info['vod'], video_id, 'mp4', entry_protocol='m3u8_native', +            m3u8_id='hls') +        self._sort_formats(formats)          return {              'id': video_id,              'title': video_id, -            'ext': 'mp4', -            'protocol': 'm3u8', -            'url': vod_info['vod'],              'thumbnail': vod_info.get('vodThumb'), +            'formats': formats,          } | 
