diff options
Diffstat (limited to 'youtube_dl/extractor/yahoo.py')
| -rw-r--r-- | youtube_dl/extractor/yahoo.py | 60 | 
1 files changed, 45 insertions, 15 deletions
| diff --git a/youtube_dl/extractor/yahoo.py b/youtube_dl/extractor/yahoo.py index b8579b573..fca5ddc69 100644 --- a/youtube_dl/extractor/yahoo.py +++ b/youtube_dl/extractor/yahoo.py @@ -144,6 +144,17 @@ class YahooIE(InfoExtractor):          }, {              'url': 'https://tw.news.yahoo.com/-100120367.html',              'only_matching': True, +        }, { +            # Query result is embedded in webpage, but explicit request to video API fails with geo restriction +            'url': 'https://screen.yahoo.com/community/communitary-community-episode-1-ladders-154501237.html', +            'md5': '4fbafb9c9b6f07aa8f870629f6671b35', +            'info_dict': { +                'id': '1f32853c-a271-3eef-8cb6-f6d6872cb504', +                'ext': 'mp4', +                'title': 'Communitary - Community Episode 1: Ladders', +                'description': 'md5:8fc39608213295748e1e289807838c97', +                'duration': 1646, +            },          }      ] @@ -171,6 +182,19 @@ class YahooIE(InfoExtractor):          if nbc_sports_url:              return self.url_result(nbc_sports_url, 'NBCSportsVPlayer') +        # Query result is often embedded in webpage as JSON. Sometimes explicit requests +        # to video API results in a failure with geo restriction reason therefore using +        # embedded query result when present sounds reasonable. +        config_json = self._search_regex( +            r'window\.Af\.bootstrap\[[^\]]+\]\s*=\s*({.*?"applet_type"\s*:\s*"td-applet-videoplayer".*?});(?:</script>|$)', +            webpage, 'videoplayer applet', default=None) +        if config_json: +            config = self._parse_json(config_json, display_id, fatal=False) +            if config: +                sapi = config.get('models', {}).get('applet_model', {}).get('data', {}).get('sapi') +                if sapi: +                    return self._extract_info(display_id, sapi, webpage) +          items_json = self._search_regex(              r'mediaItems: ({.*?})$', webpage, 'items', flags=re.MULTILINE,              default=None) @@ -190,22 +214,10 @@ class YahooIE(InfoExtractor):              video_id = info['id']          return self._get_info(video_id, display_id, webpage) -    def _get_info(self, video_id, display_id, webpage): -        region = self._search_regex( -            r'\\?"region\\?"\s*:\s*\\?"([^"]+?)\\?"', -            webpage, 'region', fatal=False, default='US') -        data = compat_urllib_parse.urlencode({ -            'protocol': 'http', -            'region': region, -        }) -        query_url = ( -            'https://video.media.yql.yahoo.com/v1/video/sapi/streams/' -            '{id}?{data}'.format(id=video_id, data=data)) -        query_result = self._download_json( -            query_url, display_id, 'Downloading video info') - -        info = query_result['query']['results']['mediaObj'][0] +    def _extract_info(self, display_id, query, webpage): +        info = query['query']['results']['mediaObj'][0]          meta = info.get('meta') +        video_id = info.get('id')          if not meta:              msg = info['status'].get('msg') @@ -231,6 +243,9 @@ class YahooIE(InfoExtractor):                      'ext': 'flv',                  })              else: +                if s.get('format') == 'm3u8_playlist': +                    format_info['protocol'] = 'm3u8_native' +                    format_info['ext'] = 'mp4'                  format_url = compat_urlparse.urljoin(host, path)                  format_info['url'] = format_url              formats.append(format_info) @@ -264,6 +279,21 @@ class YahooIE(InfoExtractor):              'subtitles': subtitles,          } +    def _get_info(self, video_id, display_id, webpage): +        region = self._search_regex( +            r'\\?"region\\?"\s*:\s*\\?"([^"]+?)\\?"', +            webpage, 'region', fatal=False, default='US') +        data = compat_urllib_parse.urlencode({ +            'protocol': 'http', +            'region': region, +        }) +        query_url = ( +            'https://video.media.yql.yahoo.com/v1/video/sapi/streams/' +            '{id}?{data}'.format(id=video_id, data=data)) +        query_result = self._download_json( +            query_url, display_id, 'Downloading video info') +        return self._extract_info(display_id, query_result, webpage) +  class YahooSearchIE(SearchInfoExtractor):      IE_DESC = 'Yahoo screen search' | 
