diff options
author | Yen Chi Hsuan <yan12125@gmail.com> | 2017-05-12 18:44:10 +0800 |
---|---|---|
committer | Yen Chi Hsuan <yan12125@gmail.com> | 2017-05-12 18:44:24 +0800 |
commit | 7274f3d0e9e7d76d00fa3d438df1e1dc43143435 (patch) | |
tree | 09021ea65abaea9f0f194a603d783dd84874800e | |
parent | 3166b1f0ac16769190f6ab1e7e5b0e45bb63d482 (diff) |
[douyu] Support Douyu shows (closes #12228)
-rw-r--r-- | ChangeLog | 6 | ||||
-rw-r--r-- | youtube_dl/extractor/douyutv.py | 82 | ||||
-rw-r--r-- | youtube_dl/extractor/extractors.py | 5 |
3 files changed, 92 insertions, 1 deletions
@@ -1,3 +1,9 @@ +version <unreleased> + +Extractor ++ [douyu] Support Douyu shows (#12228) + + version 2017.05.09 Core diff --git a/youtube_dl/extractor/douyutv.py b/youtube_dl/extractor/douyutv.py index d22133d24..9757f4422 100644 --- a/youtube_dl/extractor/douyutv.py +++ b/youtube_dl/extractor/douyutv.py @@ -3,11 +3,14 @@ from __future__ import unicode_literals import time import hashlib +import re from .common import InfoExtractor from ..utils import ( ExtractorError, unescapeHTML, + unified_strdate, + urljoin, ) @@ -117,3 +120,82 @@ class DouyuTVIE(InfoExtractor): 'uploader': uploader, 'is_live': True, } + + +class DouyuShowIE(InfoExtractor): + _VALID_URL = r'https?://v(?:mobile)?\.douyu\.com/show/(?P<id>[0-9a-zA-Z]+)' + + _TESTS = [{ + 'url': 'https://v.douyu.com/show/rjNBdvnVXNzvE2yw', + 'md5': '0c2cfd068ee2afe657801269b2d86214', + 'info_dict': { + 'id': 'rjNBdvnVXNzvE2yw', + 'ext': 'mp4', + 'title': '陈一发儿:砒霜 我有个室友系列!04-01 22点场', + 'duration': 7150.08, + 'thumbnail': r're:^https?://.*\.jpg$', + 'uploader': '陈一发儿', + 'uploader_id': 'XrZwYelr5wbK', + 'uploader_url': 'https://v.douyu.com/author/XrZwYelr5wbK', + 'upload_date': '20170402', + }, + }, { + 'url': 'https://vmobile.douyu.com/show/rjNBdvnVXNzvE2yw', + 'only_matching': True, + }] + + def _real_extract(self, url): + url = url.replace('vmobile.', 'v.') + video_id = self._match_id(url) + + webpage = self._download_webpage(url, video_id) + + room_info = self._parse_json(self._search_regex( + r'var\s+\$ROOM\s*=\s*({.+});', webpage, 'room info'), video_id) + + video_info = None + + for trial in range(5): + # Sometimes Douyu rejects our request. Let's try it more times + try: + video_info = self._download_json( + 'https://vmobile.douyu.com/video/getInfo', video_id, + query={'vid': video_id}, + headers={ + 'Referer': url, + 'x-requested-with': 'XMLHttpRequest', + }) + break + except ExtractorError: + self._sleep(1, video_id) + + if not video_info: + raise ExtractorError('Can\'t fetch video info') + + formats = self._extract_m3u8_formats( + video_info['data']['video_url'], video_id, + entry_protocol='m3u8_native', ext='mp4') + + upload_date = unified_strdate(self._html_search_regex( + r'<em>上传时间:</em><span>([^<]+)</span>', webpage, + 'upload date', fatal=False)) + + uploader = uploader_id = uploader_url = None + mobj = re.search( + r'(?m)<a[^>]+href="/author/([0-9a-zA-Z]+)".+?<strong[^>]+title="([^"]+)"', + webpage) + if mobj: + uploader_id, uploader = mobj.groups() + uploader_url = urljoin(url, '/author/' + uploader_id) + + return { + 'id': video_id, + 'title': room_info['name'], + 'formats': formats, + 'duration': room_info.get('duration'), + 'thumbnail': room_info.get('pic'), + 'upload_date': upload_date, + 'uploader': uploader, + 'uploader_id': uploader_id, + 'uploader_url': uploader_url, + } diff --git a/youtube_dl/extractor/extractors.py b/youtube_dl/extractor/extractors.py index 61a6f1013..510c35fcb 100644 --- a/youtube_dl/extractor/extractors.py +++ b/youtube_dl/extractor/extractors.py @@ -251,7 +251,10 @@ from .democracynow import DemocracynowIE from .dfb import DFBIE from .dhm import DHMIE from .dotsub import DotsubIE -from .douyutv import DouyuTVIE +from .douyutv import ( + DouyuShowIE, + DouyuTVIE, +) from .dplay import ( DPlayIE, DPlayItIE, |