diff options
Diffstat (limited to 'youtube_dl/extractor/douyutv.py')
| -rw-r--r-- | youtube_dl/extractor/douyutv.py | 82 | 
1 files changed, 82 insertions, 0 deletions
| diff --git a/youtube_dl/extractor/douyutv.py b/youtube_dl/extractor/douyutv.py index d22133d24..9757f4422 100644 --- a/youtube_dl/extractor/douyutv.py +++ b/youtube_dl/extractor/douyutv.py @@ -3,11 +3,14 @@ from __future__ import unicode_literals  import time  import hashlib +import re  from .common import InfoExtractor  from ..utils import (      ExtractorError,      unescapeHTML, +    unified_strdate, +    urljoin,  ) @@ -117,3 +120,82 @@ class DouyuTVIE(InfoExtractor):              'uploader': uploader,              'is_live': True,          } + + +class DouyuShowIE(InfoExtractor): +    _VALID_URL = r'https?://v(?:mobile)?\.douyu\.com/show/(?P<id>[0-9a-zA-Z]+)' + +    _TESTS = [{ +        'url': 'https://v.douyu.com/show/rjNBdvnVXNzvE2yw', +        'md5': '0c2cfd068ee2afe657801269b2d86214', +        'info_dict': { +            'id': 'rjNBdvnVXNzvE2yw', +            'ext': 'mp4', +            'title': '陈一发儿:砒霜 我有个室友系列!04-01 22点场', +            'duration': 7150.08, +            'thumbnail': r're:^https?://.*\.jpg$', +            'uploader': '陈一发儿', +            'uploader_id': 'XrZwYelr5wbK', +            'uploader_url': 'https://v.douyu.com/author/XrZwYelr5wbK', +            'upload_date': '20170402', +        }, +    }, { +        'url': 'https://vmobile.douyu.com/show/rjNBdvnVXNzvE2yw', +        'only_matching': True, +    }] + +    def _real_extract(self, url): +        url = url.replace('vmobile.', 'v.') +        video_id = self._match_id(url) + +        webpage = self._download_webpage(url, video_id) + +        room_info = self._parse_json(self._search_regex( +            r'var\s+\$ROOM\s*=\s*({.+});', webpage, 'room info'), video_id) + +        video_info = None + +        for trial in range(5): +            # Sometimes Douyu rejects our request. Let's try it more times +            try: +                video_info = self._download_json( +                    'https://vmobile.douyu.com/video/getInfo', video_id, +                    query={'vid': video_id}, +                    headers={ +                        'Referer': url, +                        'x-requested-with': 'XMLHttpRequest', +                    }) +                break +            except ExtractorError: +                self._sleep(1, video_id) + +        if not video_info: +            raise ExtractorError('Can\'t fetch video info') + +        formats = self._extract_m3u8_formats( +            video_info['data']['video_url'], video_id, +            entry_protocol='m3u8_native', ext='mp4') + +        upload_date = unified_strdate(self._html_search_regex( +            r'<em>上传时间:</em><span>([^<]+)</span>', webpage, +            'upload date', fatal=False)) + +        uploader = uploader_id = uploader_url = None +        mobj = re.search( +            r'(?m)<a[^>]+href="/author/([0-9a-zA-Z]+)".+?<strong[^>]+title="([^"]+)"', +            webpage) +        if mobj: +            uploader_id, uploader = mobj.groups() +            uploader_url = urljoin(url, '/author/' + uploader_id) + +        return { +            'id': video_id, +            'title': room_info['name'], +            'formats': formats, +            'duration': room_info.get('duration'), +            'thumbnail': room_info.get('pic'), +            'upload_date': upload_date, +            'uploader': uploader, +            'uploader_id': uploader_id, +            'uploader_url': uploader_url, +        } | 
