diff options
Diffstat (limited to 'youtube_dl/extractor/tudou.py')
| -rw-r--r-- | youtube_dl/extractor/tudou.py | 60 | 
1 files changed, 36 insertions, 24 deletions
diff --git a/youtube_dl/extractor/tudou.py b/youtube_dl/extractor/tudou.py index 7a3891b89..dcd823d08 100644 --- a/youtube_dl/extractor/tudou.py +++ b/youtube_dl/extractor/tudou.py @@ -1,5 +1,7 @@  # coding: utf-8 +from __future__ import unicode_literals +  import re  import json @@ -9,22 +11,29 @@ from .common import InfoExtractor  class TudouIE(InfoExtractor):      _VALID_URL = r'(?:http://)?(?:www\.)?tudou\.com/(?:listplay|programs|albumplay)/(?:view|(.+?))/(?:([^/]+)|([^/]+))(?:\.html)?'      _TESTS = [{ -        u'url': u'http://www.tudou.com/listplay/zzdE77v6Mmo/2xN2duXMxmw.html', -        u'file': u'159448201.f4v', -        u'md5': u'140a49ed444bd22f93330985d8475fcb', -        u'info_dict': { -            u"title": u"卡马乔国足开大脚长传冲吊集锦" +        'url': 'http://www.tudou.com/listplay/zzdE77v6Mmo/2xN2duXMxmw.html', +        'md5': '140a49ed444bd22f93330985d8475fcb', +        'info_dict': { +            'id': '159448201', +            'ext': 'f4v', +            'title': '卡马乔国足开大脚长传冲吊集锦', +            'thumbnail': 're:^https?://.*\.jpg$', +        } +    }, { +        'url': 'http://www.tudou.com/programs/view/ajX3gyhL0pc/', +        'info_dict': { +            'id': '117049447', +            'ext': 'f4v', +            'title': 'La Sylphide-Bolshoi-Ekaterina Krysanova & Vyacheslav Lopatin 2012', +            'thumbnail': 're:^https?://.*\.jpg$',          } -    }, -    { -        u'url': u'http://www.tudou.com/albumplay/TenTw_JgiPM/PzsAs5usU9A.html', -        u'file': u'todo.mp4', -        u'md5': u'todo.mp4', -        u'info_dict': { -            u'title': u'todo.mp4', +    }, { +        'url': 'http://www.tudou.com/albumplay/TenTw_JgiPM/PzsAs5usU9A.html', +        'info_dict': { +            'title': 'todo.mp4',          }, -        u'add_ie': [u'Youku'], -        u'skip': u'Only works from China' +        'add_ie': ['Youku'], +        'skip': 'Only works from China'      }]      def _url_for_id(self, id, quality = None): @@ -44,20 +53,22 @@ class TudouIE(InfoExtractor):          if m and m.group(1):              return {                  '_type': 'url', -                'url': u'youku:' + m.group(1), +                'url': 'youku:' + m.group(1),                  'ie_key': 'Youku'              }          title = self._search_regex( -            r",kw:\s*['\"](.+?)[\"']", webpage, u'title') +            r",kw:\s*['\"](.+?)[\"']", webpage, 'title')          thumbnail_url = self._search_regex( -            r",pic:\s*[\"'](.+?)[\"']", webpage, u'thumbnail URL', fatal=False) +            r",pic:\s*[\"'](.+?)[\"']", webpage, 'thumbnail URL', fatal=False)          segs_json = self._search_regex(r'segs: \'(.*)\'', webpage, 'segments')          segments = json.loads(segs_json)          # It looks like the keys are the arguments that have to be passed as          # the hd field in the request url, we pick the higher -        quality = sorted(segments.keys())[-1] +        # Also, filter non-number qualities (see issue #3643). +        quality = sorted(filter(lambda k: k.isdigit(), segments.keys()), +                         key=lambda k: int(k))[-1]          parts = segments[quality]          result = []          len_parts = len(parts) @@ -67,12 +78,13 @@ class TudouIE(InfoExtractor):              part_id = part['k']              final_url = self._url_for_id(part_id, quality)              ext = (final_url.split('?')[0]).split('.')[-1] -            part_info = {'id': part_id, -                          'url': final_url, -                          'ext': ext, -                          'title': title, -                          'thumbnail': thumbnail_url, -                          } +            part_info = { +                'id': '%s' % part_id, +                'url': final_url, +                'ext': ext, +                'title': title, +                'thumbnail': thumbnail_url, +            }              result.append(part_info)          return result  | 
