diff options
Diffstat (limited to 'youtube_dl/extractor/prosiebensat1.py')
| -rw-r--r-- | youtube_dl/extractor/prosiebensat1.py | 165 | 
1 files changed, 88 insertions, 77 deletions
diff --git a/youtube_dl/extractor/prosiebensat1.py b/youtube_dl/extractor/prosiebensat1.py index 07d49d489..ad0f0031c 100644 --- a/youtube_dl/extractor/prosiebensat1.py +++ b/youtube_dl/extractor/prosiebensat1.py @@ -5,7 +5,7 @@ import re  from hashlib import sha1  from .common import InfoExtractor -from ..compat import compat_urllib_parse_urlencode +from ..compat import compat_str  from ..utils import (      ExtractorError,      determine_ext, @@ -71,6 +71,7 @@ class ProSiebenSat1IE(InfoExtractor):                  # rtmp download                  'skip_download': True,              }, +            'skip': 'This video is unavailable',          },          {              'url': 'http://www.sixx.de/stars-style/video/sexy-laufen-in-ugg-boots-clip', @@ -86,6 +87,7 @@ class ProSiebenSat1IE(InfoExtractor):                  # rtmp download                  'skip_download': True,              }, +            'skip': 'This video is unavailable',          },          {              'url': 'http://www.sat1.de/film/der-ruecktritt/video/im-interview-kai-wiesinger-clip', @@ -101,6 +103,7 @@ class ProSiebenSat1IE(InfoExtractor):                  # rtmp download                  'skip_download': True,              }, +            'skip': 'This video is unavailable',          },          {              'url': 'http://www.kabeleins.de/tv/rosins-restaurants/videos/jagd-auf-fertigkost-im-elsthal-teil-2-ganze-folge', @@ -116,6 +119,7 @@ class ProSiebenSat1IE(InfoExtractor):                  # rtmp download                  'skip_download': True,              }, +            'skip': 'This video is unavailable',          },          {              'url': 'http://www.ran.de/fussball/bundesliga/video/schalke-toennies-moechte-raul-zurueck-ganze-folge', @@ -131,6 +135,7 @@ class ProSiebenSat1IE(InfoExtractor):                  # rtmp download                  'skip_download': True,              }, +            'skip': 'This video is unavailable',          },          {              'url': 'http://www.the-voice-of-germany.de/video/31-andreas-kuemmert-rocket-man-clip', @@ -227,70 +232,42 @@ class ProSiebenSat1IE(InfoExtractor):      ]      def _extract_clip(self, url, webpage): -        clip_id = self._html_search_regex(self._CLIPID_REGEXES, webpage, 'clip id') +        clip_id = self._html_search_regex( +            self._CLIPID_REGEXES, webpage, 'clip id')          access_token = 'prosieben'          client_name = 'kolibri-2.0.19-splec4'          client_location = url -        videos_api_url = 'http://vas.sim-technik.de/vas/live/v2/videos?%s' % compat_urllib_parse_urlencode({ -            'access_token': access_token, -            'client_location': client_location, -            'client_name': client_name, -            'ids': clip_id, -        }) - -        video = self._download_json(videos_api_url, clip_id, 'Downloading videos JSON')[0] +        video = self._download_json( +            'http://vas.sim-technik.de/vas/live/v2/videos', +            clip_id, 'Downloading videos JSON', query={ +                'access_token': access_token, +                'client_location': client_location, +                'client_name': client_name, +                'ids': clip_id, +            })[0]          if video.get('is_protected') is True:              raise ExtractorError('This video is DRM protected.', expected=True)          duration = float_or_none(video.get('duration')) -        source_ids = [source['id'] for source in video['sources']] -        source_ids_str = ','.join(map(str, source_ids)) +        source_ids = [compat_str(source['id']) for source in video['sources']]          g = '01!8d8F_)r9]4s[qeuXfP%' +        client_id = g[:2] + sha1(''.join([clip_id, g, access_token, client_location, g, client_name]).encode('utf-8')).hexdigest() -        client_id = g[:2] + sha1(''.join([clip_id, g, access_token, client_location, g, client_name]) -                                 .encode('utf-8')).hexdigest() - -        sources_api_url = 'http://vas.sim-technik.de/vas/live/v2/videos/%s/sources?%s' % (clip_id, compat_urllib_parse_urlencode({ -            'access_token': access_token, -            'client_id': client_id, -            'client_location': client_location, -            'client_name': client_name, -        })) - -        sources = self._download_json(sources_api_url, clip_id, 'Downloading sources JSON') +        sources = self._download_json( +            'http://vas.sim-technik.de/vas/live/v2/videos/%s/sources' % clip_id, +            clip_id, 'Downloading sources JSON', query={ +                'access_token': access_token, +                'client_id': client_id, +                'client_location': client_location, +                'client_name': client_name, +            })          server_id = sources['server_id'] -        client_id = g[:2] + sha1(''.join([g, clip_id, access_token, server_id, -                                          client_location, source_ids_str, g, client_name]) -                                 .encode('utf-8')).hexdigest() - -        url_api_url = 'http://vas.sim-technik.de/vas/live/v2/videos/%s/sources/url?%s' % (clip_id, compat_urllib_parse_urlencode({ -            'access_token': access_token, -            'client_id': client_id, -            'client_location': client_location, -            'client_name': client_name, -            'server_id': server_id, -            'source_ids': source_ids_str, -        })) - -        urls = self._download_json(url_api_url, clip_id, 'Downloading urls JSON') -          title = self._html_search_regex(self._TITLE_REGEXES, webpage, 'title') -        description = self._html_search_regex(self._DESCRIPTION_REGEXES, webpage, 'description', fatal=False) -        thumbnail = self._og_search_thumbnail(webpage) - -        upload_date = unified_strdate(self._html_search_regex( -            self._UPLOAD_DATE_REGEXES, webpage, 'upload date', default=None)) - -        formats = [] - -        urls_sources = urls['sources'] -        if isinstance(urls_sources, dict): -            urls_sources = urls_sources.values()          def fix_bitrate(bitrate):              bitrate = int_or_none(bitrate) @@ -298,37 +275,71 @@ class ProSiebenSat1IE(InfoExtractor):                  return None              return (bitrate // 1000) if bitrate % 1000 == 0 else bitrate -        for source in urls_sources: -            protocol = source['protocol'] -            source_url = source['url'] -            if protocol == 'rtmp' or protocol == 'rtmpe': -                mobj = re.search(r'^(?P<url>rtmpe?://[^/]+)/(?P<path>.+)$', source_url) -                if not mobj: -                    continue -                path = mobj.group('path') -                mp4colon_index = path.rfind('mp4:') -                app = path[:mp4colon_index] -                play_path = path[mp4colon_index:] -                formats.append({ -                    'url': '%s/%s' % (mobj.group('url'), app), -                    'app': app, -                    'play_path': play_path, -                    'player_url': 'http://livepassdl.conviva.com/hf/ver/2.79.0.17083/LivePassModuleMain.swf', -                    'page_url': 'http://www.prosieben.de', -                    'vbr': fix_bitrate(source['bitrate']), -                    'ext': 'mp4', -                    'format_id': '%s_%s' % (source['cdn'], source['bitrate']), -                }) -            elif 'f4mgenerator' in source_url or determine_ext(source_url) == 'f4m': -                formats.extend(self._extract_f4m_formats(source_url, clip_id)) -            else: -                formats.append({ -                    'url': source_url, -                    'vbr': fix_bitrate(source['bitrate']), +        formats = [] +        for source_id in source_ids: +            client_id = g[:2] + sha1(''.join([g, clip_id, access_token, server_id, client_location, source_id, g, client_name]).encode('utf-8')).hexdigest() +            urls = self._download_json( +                'http://vas.sim-technik.de/vas/live/v2/videos/%s/sources/url' % clip_id, +                clip_id, 'Downloading urls JSON', query={ +                    'access_token': access_token, +                    'client_id': client_id, +                    'client_location': client_location, +                    'client_name': client_name, +                    'server_id': server_id, +                    'source_ids': source_id,                  }) - +            if urls.get('status_code') != 0: +                raise ExtractorError('This video is unavailable', expected=True) +            urls_sources = urls['sources'] +            if isinstance(urls_sources, dict): +                urls_sources = urls_sources.values() +            for source in urls_sources: +                source_url = source.get('url') +                if not source_url: +                    continue +                protocol = source.get('protocol') +                mimetype = source.get('mimetype') +                if mimetype == 'application/f4m+xml' or 'f4mgenerator' in source_url or determine_ext(source_url) == 'f4m': +                    formats.extend(self._extract_f4m_formats( +                        source_url, clip_id, f4m_id='hds', fatal=False)) +                elif mimetype == 'application/x-mpegURL': +                    formats.extend(self._extract_m3u8_formats( +                        source_url, clip_id, 'mp4', 'm3u8_native', +                        m3u8_id='hls', fatal=False)) +                else: +                    tbr = fix_bitrate(source['bitrate']) +                    if protocol in ('rtmp', 'rtmpe'): +                        mobj = re.search(r'^(?P<url>rtmpe?://[^/]+)/(?P<path>.+)$', source_url) +                        if not mobj: +                            continue +                        path = mobj.group('path') +                        mp4colon_index = path.rfind('mp4:') +                        app = path[:mp4colon_index] +                        play_path = path[mp4colon_index:] +                        formats.append({ +                            'url': '%s/%s' % (mobj.group('url'), app), +                            'app': app, +                            'play_path': play_path, +                            'player_url': 'http://livepassdl.conviva.com/hf/ver/2.79.0.17083/LivePassModuleMain.swf', +                            'page_url': 'http://www.prosieben.de', +                            'tbr': tbr, +                            'ext': 'flv', +                            'format_id': 'rtmp%s' % ('-%d' % tbr if tbr else ''), +                        }) +                    else: +                        formats.append({ +                            'url': source_url, +                            'tbr': tbr, +                            'format_id': 'http%s' % ('-%d' % tbr if tbr else ''), +                        })          self._sort_formats(formats) +        description = self._html_search_regex( +            self._DESCRIPTION_REGEXES, webpage, 'description', fatal=False) +        thumbnail = self._og_search_thumbnail(webpage) +        upload_date = unified_strdate(self._html_search_regex( +            self._UPLOAD_DATE_REGEXES, webpage, 'upload date', default=None)) +          return {              'id': clip_id,              'title': title,  | 
