diff options
| -rw-r--r-- | youtube_dl/extractor/tudou.py | 36 | 
1 files changed, 25 insertions, 11 deletions
| diff --git a/youtube_dl/extractor/tudou.py b/youtube_dl/extractor/tudou.py index 1405b73f7..79679a14a 100644 --- a/youtube_dl/extractor/tudou.py +++ b/youtube_dl/extractor/tudou.py @@ -7,15 +7,25 @@ from .common import InfoExtractor  class TudouIE(InfoExtractor): -    _VALID_URL = r'(?:http://)?(?:www\.)?tudou\.com/(?:listplay|programs)/(?:view|(.+?))/(?:([^/]+)|([^/]+))(?:\.html)?' -    _TEST = { +    _VALID_URL = r'(?:http://)?(?:www\.)?tudou\.com/(?:listplay|programs|albumplay)/(?:view|(.+?))/(?:([^/]+)|([^/]+))(?:\.html)?' +    _TESTS = [{          u'url': u'http://www.tudou.com/listplay/zzdE77v6Mmo/2xN2duXMxmw.html',          u'file': u'159448201.f4v',          u'md5': u'140a49ed444bd22f93330985d8475fcb',          u'info_dict': {              u"title": u"卡马乔国足开大脚长传冲吊集锦"          } -    } +    }, +    { +        u'url': u'http://www.tudou.com/albumplay/TenTw_JgiPM/PzsAs5usU9A.html', +        u'file': u'todo.mp4', +        u'md5': u'todo.mp4', +        u'info_dict': { +            u'title': u'todo.mp4', +        }, +        u'add_ie': [u'Youku'], +        u'skip': u'Only works from China' +    }]      def _url_for_id(self, id, quality = None):          info_url = "http://v2.tudou.com/f?id="+str(id) @@ -29,14 +39,18 @@ class TudouIE(InfoExtractor):          mobj = re.match(self._VALID_URL, url)          video_id = mobj.group(2)          webpage = self._download_webpage(url, video_id) -        title = re.search(",kw:\"(.+)\"",webpage) -        if title is None: -            title = re.search(",kw: \'(.+)\'",webpage) -        title = title.group(1) -        thumbnail_url = re.search(",pic: \'(.+?)\'",webpage) -        if thumbnail_url is None: -            thumbnail_url = re.search(",pic:\"(.+?)\"",webpage) -        thumbnail_url = thumbnail_url.group(1) + +        m = re.search(r'vcode:\s*[\'"](.+?)[\'"]', webpage) +        if m and m.group(1): +            return { +                '_type': 'url', +                'url': u'youku:' + m.group(1), +                'ie_key': 'Youku' +            } + +        title = self._search_regex(r",kw:['\"](.+?)[\"']", webpage, u'title') +        thumbnail_url = self._search_regex( +            r",pic:\s*[\"'](.+?)[\"']", webpage, u'thumbnail URL', fatal=False)          segs_json = self._search_regex(r'segs: \'(.*)\'', webpage, 'segments')          segments = json.loads(segs_json) | 
