diff options
| -rw-r--r-- | ChangeLog | 1 | ||||
| -rw-r--r-- | youtube_dl/extractor/extractors.py | 5 | ||||
| -rw-r--r-- | youtube_dl/extractor/tudou.py | 132 | ||||
| -rw-r--r-- | youtube_dl/extractor/youku.py | 16 | 
4 files changed, 15 insertions, 139 deletions
| @@ -5,6 +5,7 @@ Core  * [postprocessor/ffmpeg] Fix metadata filename handling on Python 2 (#13182)  Extractors +* [tudou] Merge into youku extractor (#12214)  * [youku:show] Fix extraction  * [youku] Fix extraction (#13191) diff --git a/youtube_dl/extractor/extractors.py b/youtube_dl/extractor/extractors.py index ed603eb29..558c910d4 100644 --- a/youtube_dl/extractor/extractors.py +++ b/youtube_dl/extractor/extractors.py @@ -1019,11 +1019,6 @@ from .trilulilu import TriluliluIE  from .trutv import TruTVIE  from .tube8 import Tube8IE  from .tubitv import TubiTvIE -from .tudou import ( -    TudouIE, -    TudouPlaylistIE, -    TudouAlbumIE, -)  from .tumblr import TumblrIE  from .tunein import (      TuneInClipIE, diff --git a/youtube_dl/extractor/tudou.py b/youtube_dl/extractor/tudou.py index 2aae55e7e..7421378a8 100644 --- a/youtube_dl/extractor/tudou.py +++ b/youtube_dl/extractor/tudou.py @@ -3,138 +3,6 @@  from __future__ import unicode_literals  from .common import InfoExtractor -from ..compat import compat_str -from ..utils import ( -    ExtractorError, -    int_or_none, -    InAdvancePagedList, -    float_or_none, -    unescapeHTML, -) - - -class TudouIE(InfoExtractor): -    IE_NAME = 'tudou' -    _VALID_URL = r'https?://(?:www\.)?tudou\.com/(?:(?:programs|wlplay)/view|(?:listplay|albumplay)/[\w-]{11})/(?P<id>[\w-]{11})' -    _TESTS = [{ -        'url': 'http://www.tudou.com/listplay/zzdE77v6Mmo/2xN2duXMxmw.html', -        'md5': '140a49ed444bd22f93330985d8475fcb', -        'info_dict': { -            'id': '159448201', -            'ext': 'f4v', -            'title': '卡马乔国足开大脚长传冲吊集锦', -            'thumbnail': r're:^https?://.*\.jpg$', -            'timestamp': 1372113489000, -            'description': '卡马乔卡家军,开大脚先进战术不完全集锦!', -            'duration': 289.04, -            'view_count': int, -            'filesize': int, -        } -    }, { -        'url': 'http://www.tudou.com/programs/view/ajX3gyhL0pc/', -        'info_dict': { -            'id': '117049447', -            'ext': 'f4v', -            'title': 'La Sylphide-Bolshoi-Ekaterina Krysanova & Vyacheslav Lopatin 2012', -            'thumbnail': r're:^https?://.*\.jpg$', -            'timestamp': 1349207518000, -            'description': 'md5:294612423894260f2dcd5c6c04fe248b', -            'duration': 5478.33, -            'view_count': int, -            'filesize': int, -        } -    }] - -    _PLAYER_URL = 'http://js.tudouui.com/bin/lingtong/PortalPlayer_177.swf' - -    # Translated from tudou/tools/TVCHelper.as in PortalPlayer_193.swf -    # 0001, 0002 and 4001 are not included as they indicate temporary issues -    TVC_ERRORS = { -        '0003': 'The video is deleted or does not exist', -        '1001': 'This video is unavailable due to licensing issues', -        '1002': 'This video is unavailable as it\'s under review', -        '1003': 'This video is unavailable as it\'s under review', -        '3001': 'Password required', -        '5001': 'This video is available in Mainland China only due to licensing issues', -        '7001': 'This video is unavailable', -        '8001': 'This video is unavailable due to licensing issues', -    } - -    def _url_for_id(self, video_id, quality=None): -        info_url = 'http://v2.tudou.com/f?id=' + compat_str(video_id) -        if quality: -            info_url += '&hd' + quality -        xml_data = self._download_xml(info_url, video_id, 'Opening the info XML page') -        error = xml_data.attrib.get('error') -        if error is not None: -            raise ExtractorError('Tudou said: %s' % error, expected=True) -        final_url = xml_data.text -        return final_url - -    def _real_extract(self, url): -        video_id = self._match_id(url) -        item_data = self._download_json( -            'http://www.tudou.com/tvp/getItemInfo.action?ic=%s' % video_id, video_id) - -        youku_vcode = item_data.get('vcode') -        if youku_vcode: -            return self.url_result('youku:' + youku_vcode, ie='Youku') - -        if not item_data.get('itemSegs'): -            tvc_code = item_data.get('tvcCode') -            if tvc_code: -                err_msg = self.TVC_ERRORS.get(tvc_code) -                if err_msg: -                    raise ExtractorError('Tudou said: %s' % err_msg, expected=True) -                raise ExtractorError('Unexpected error %s returned from Tudou' % tvc_code) -            raise ExtractorError('Unxpected error returned from Tudou') - -        title = unescapeHTML(item_data['kw']) -        description = item_data.get('desc') -        thumbnail_url = item_data.get('pic') -        view_count = int_or_none(item_data.get('playTimes')) -        timestamp = int_or_none(item_data.get('pt')) - -        segments = self._parse_json(item_data['itemSegs'], video_id) -        # It looks like the keys are the arguments that have to be passed as -        # the hd field in the request url, we pick the higher -        # Also, filter non-number qualities (see issue #3643). -        quality = sorted(filter(lambda k: k.isdigit(), segments.keys()), -                         key=lambda k: int(k))[-1] -        parts = segments[quality] -        len_parts = len(parts) -        if len_parts > 1: -            self.to_screen('%s: found %s parts' % (video_id, len_parts)) - -        def part_func(partnum): -            part = parts[partnum] -            part_id = part['k'] -            final_url = self._url_for_id(part_id, quality) -            ext = (final_url.split('?')[0]).split('.')[-1] -            return [{ -                'id': '%s' % part_id, -                'url': final_url, -                'ext': ext, -                'title': title, -                'thumbnail': thumbnail_url, -                'description': description, -                'view_count': view_count, -                'timestamp': timestamp, -                'duration': float_or_none(part.get('seconds'), 1000), -                'filesize': int_or_none(part.get('size')), -                'http_headers': { -                    'Referer': self._PLAYER_URL, -                }, -            }] - -        entries = InAdvancePagedList(part_func, len_parts, 1) - -        return { -            '_type': 'multi_video', -            'entries': entries, -            'id': video_id, -            'title': title, -        }  class TudouPlaylistIE(InfoExtractor): diff --git a/youtube_dl/extractor/youku.py b/youtube_dl/extractor/youku.py index e081cfc19..ab97466d2 100644 --- a/youtube_dl/extractor/youku.py +++ b/youtube_dl/extractor/youku.py @@ -22,7 +22,9 @@ class YoukuIE(InfoExtractor):      IE_DESC = '优酷'      _VALID_URL = r'''(?x)          (?: -            http://(?:v|player)\.youku\.com/(?:v_show/id_|player\.php/sid/)| +            https?://( +                (?:v|player)\.youku\.com/(?:v_show/id_|player\.php/sid/)| +                video\.tudou\.com/v/)|              youku:)          (?P<id>[A-Za-z0-9]+)(?:\.html|/v\.swf|)      ''' @@ -71,6 +73,16 @@ class YoukuIE(InfoExtractor):              'ext': 'mp4',              'title': '我的世界☆明月庄主☆车震猎杀☆杀人艺术Minecraft',          }, +    }, { +        'url': 'http://video.tudou.com/v/XMjIyNzAzMTQ4NA==.html?f=46177805', +        'info_dict': { +            'id': 'XMjIyNzAzMTQ4NA', +            'ext': 'mp4', +            'title': '卡马乔国足开大脚长传冲吊集锦', +        }, +    }, { +        'url': 'http://video.tudou.com/v/XMjE4ODI3OTg2MA==.html', +        'only_matching': True,      }]      @staticmethod @@ -107,7 +119,7 @@ class YoukuIE(InfoExtractor):          # request basic data          basic_data_params = {              'vid': video_id, -            'ccode': '0401', +            'ccode': '0402' if 'tudou.com' in url else '0401',              'client_ip': '192.168.1.1',              'utid': cna,              'client_ts': time.time() / 1000, | 
