import datetime import json import re from .common import InfoExtractor from ..utils import ( ExtractorError, ) class YahooIE(InfoExtractor): """Information extractor for screen.yahoo.com.""" _VALID_URL = r'http://screen\.yahoo\.com/.*?-(?P\d*?)\.html' def _real_extract(self, url): mobj = re.match(self._VALID_URL, url) if mobj is None: raise ExtractorError(u'Invalid URL: %s' % url) video_id = mobj.group('id') webpage = self._download_webpage(url, video_id) m_id = re.search(r'YUI\.namespace\("Media"\)\.CONTENT_ID = "(?P.+?)";', webpage) if m_id is None: # TODO: Check which url parameters are required info_url = 'http://cosmos.bcst.yahoo.com/rest/v2/pops;lmsoverride=1;outputformat=mrss;cb=974419660;id=%s;rd=news.yahoo.com;datacontext=mdb;lg=KCa2IihxG3qE60vQ7HtyUy' % video_id webpage = self._download_webpage(info_url, video_id, u'Downloading info webpage') info_re = r'''<!\[CDATA\[(?P<title>.*?)\]\]>.* .*?)\]\]>.* .*?)\ .*\]\]>.*