diff options
Diffstat (limited to 'youtube_dl/extractor/itv.py')
| -rw-r--r-- | youtube_dl/extractor/itv.py | 126 | 
1 files changed, 69 insertions, 57 deletions
| diff --git a/youtube_dl/extractor/itv.py b/youtube_dl/extractor/itv.py index 6a4f8a505..40cffed46 100644 --- a/youtube_dl/extractor/itv.py +++ b/youtube_dl/extractor/itv.py @@ -18,6 +18,7 @@ from ..utils import (      xpath_element,      xpath_text,      int_or_none, +    merge_dicts,      parse_duration,      smuggle_url,      ExtractorError, @@ -129,64 +130,65 @@ class ITVIE(InfoExtractor):          resp_env = self._download_xml(              params['data-playlist-url'], video_id, -            headers=headers, data=etree.tostring(req_env)) -        playlist = xpath_element(resp_env, './/Playlist') -        if playlist is None: -            fault_code = xpath_text(resp_env, './/faultcode') -            fault_string = xpath_text(resp_env, './/faultstring') -            if fault_code == 'InvalidGeoRegion': -                self.raise_geo_restricted( -                    msg=fault_string, countries=self._GEO_COUNTRIES) -            elif fault_code not in ( -                    'InvalidEntity', 'InvalidVodcrid', 'ContentUnavailable'): -                raise ExtractorError( -                    '%s said: %s' % (self.IE_NAME, fault_string), expected=True) -            info.update({ -                'title': self._og_search_title(webpage), -                'episode_title': params.get('data-video-episode'), -                'series': params.get('data-video-title'), -            }) -        else: -            title = xpath_text(playlist, 'EpisodeTitle', default=None) -            info.update({ -                'title': title, -                'episode_title': title, -                'episode_number': int_or_none(xpath_text(playlist, 'EpisodeNumber')), -                'series': xpath_text(playlist, 'ProgrammeTitle'), -                'duration': parse_duration(xpath_text(playlist, 'Duration')), -            }) -            video_element = xpath_element(playlist, 'VideoEntries/Video', fatal=True) -            media_files = xpath_element(video_element, 'MediaFiles', fatal=True) -            rtmp_url = media_files.attrib['base'] +            headers=headers, data=etree.tostring(req_env), fatal=False) +        if resp_env: +            playlist = xpath_element(resp_env, './/Playlist') +            if playlist is None: +                fault_code = xpath_text(resp_env, './/faultcode') +                fault_string = xpath_text(resp_env, './/faultstring') +                if fault_code == 'InvalidGeoRegion': +                    self.raise_geo_restricted( +                        msg=fault_string, countries=self._GEO_COUNTRIES) +                elif fault_code not in ( +                        'InvalidEntity', 'InvalidVodcrid', 'ContentUnavailable'): +                    raise ExtractorError( +                        '%s said: %s' % (self.IE_NAME, fault_string), expected=True) +                info.update({ +                    'title': self._og_search_title(webpage), +                    'episode_title': params.get('data-video-episode'), +                    'series': params.get('data-video-title'), +                }) +            else: +                title = xpath_text(playlist, 'EpisodeTitle', default=None) +                info.update({ +                    'title': title, +                    'episode_title': title, +                    'episode_number': int_or_none(xpath_text(playlist, 'EpisodeNumber')), +                    'series': xpath_text(playlist, 'ProgrammeTitle'), +                    'duration': parse_duration(xpath_text(playlist, 'Duration')), +                }) +                video_element = xpath_element(playlist, 'VideoEntries/Video', fatal=True) +                media_files = xpath_element(video_element, 'MediaFiles', fatal=True) +                rtmp_url = media_files.attrib['base'] -            for media_file in media_files.findall('MediaFile'): -                play_path = xpath_text(media_file, 'URL') -                if not play_path: -                    continue -                tbr = int_or_none(media_file.get('bitrate'), 1000) -                f = { -                    'format_id': 'rtmp' + ('-%d' % tbr if tbr else ''), -                    'play_path': play_path, -                    # Providing this swfVfy allows to avoid truncated downloads -                    'player_url': 'http://www.itv.com/mercury/Mercury_VideoPlayer.swf', -                    'page_url': url, -                    'tbr': tbr, -                    'ext': 'flv', -                } -                app = self._search_regex( -                    'rtmpe?://[^/]+/(.+)$', rtmp_url, 'app', default=None) -                if app: -                    f.update({ -                        'url': rtmp_url.split('?', 1)[0], -                        'app': app, -                    }) -                else: -                    f['url'] = rtmp_url -                formats.append(f) +                for media_file in media_files.findall('MediaFile'): +                    play_path = xpath_text(media_file, 'URL') +                    if not play_path: +                        continue +                    tbr = int_or_none(media_file.get('bitrate'), 1000) +                    f = { +                        'format_id': 'rtmp' + ('-%d' % tbr if tbr else ''), +                        'play_path': play_path, +                        # Providing this swfVfy allows to avoid truncated downloads +                        'player_url': 'http://www.itv.com/mercury/Mercury_VideoPlayer.swf', +                        'page_url': url, +                        'tbr': tbr, +                        'ext': 'flv', +                    } +                    app = self._search_regex( +                        'rtmpe?://[^/]+/(.+)$', rtmp_url, 'app', default=None) +                    if app: +                        f.update({ +                            'url': rtmp_url.split('?', 1)[0], +                            'app': app, +                        }) +                    else: +                        f['url'] = rtmp_url +                    formats.append(f) -            for caption_url in video_element.findall('ClosedCaptioningURIs/URL'): -                if caption_url.text: -                    extract_subtitle(caption_url.text) +                for caption_url in video_element.findall('ClosedCaptioningURIs/URL'): +                    if caption_url.text: +                        extract_subtitle(caption_url.text)          ios_playlist_url = params.get('data-video-playlist') or params.get('data-video-id')          hmac = params.get('data-video-hmac') @@ -261,7 +263,17 @@ class ITVIE(InfoExtractor):              'formats': formats,              'subtitles': subtitles,          }) -        return info + +        webpage_info = self._search_json_ld(webpage, video_id, default={}) +        if not webpage_info.get('title'): +            webpage_info['title'] = self._html_search_regex( +                r'(?s)<h\d+[^>]+\bclass=["\'][^>]*episode-title["\'][^>]*>([^<]+)<', +                webpage, 'title', default=None) or self._og_search_title( +                webpage, default=None) or self._html_search_meta( +                'twitter:title', webpage, 'title', +                default=None) or webpage_info['episode'] + +        return merge_dicts(info, webpage_info)  class ITVBTCCIE(InfoExtractor): | 
