diff options
author | Sergey M․ <dstftw@gmail.com> | 2021-05-16 21:56:30 +0700 |
---|---|---|
committer | Sergey M․ <dstftw@gmail.com> | 2021-05-16 21:57:21 +0700 |
commit | 503a3744ad620a2fe21c82cb2c595dc939310a1e (patch) | |
tree | 784fdd683d5346faab017ab3a2a0d9c610cc38a0 | |
parent | ef03721f470606deaddb4a57e51550045a32fb04 (diff) |
[eroprofile] Fix extraction (closes #23200, closes #23626, closes #29008)
-rw-r--r-- | youtube_dl/extractor/eroprofile.py | 18 |
1 files changed, 7 insertions, 11 deletions
diff --git a/youtube_dl/extractor/eroprofile.py b/youtube_dl/extractor/eroprofile.py index c08643a17..9ee549d77 100644 --- a/youtube_dl/extractor/eroprofile.py +++ b/youtube_dl/extractor/eroprofile.py @@ -6,7 +6,7 @@ from .common import InfoExtractor from ..compat import compat_urllib_parse_urlencode from ..utils import ( ExtractorError, - unescapeHTML + merge_dicts, ) @@ -77,19 +77,15 @@ class EroProfileIE(InfoExtractor): [r"glbUpdViews\s*\('\d*','(\d+)'", r'p/report/video/(\d+)'], webpage, 'video id', default=None) - video_url = unescapeHTML(self._search_regex( - r'<source src="([^"]+)', webpage, 'video url')) title = self._html_search_regex( - r'Title:</th><td>([^<]+)</td>', webpage, 'title') - thumbnail = self._search_regex( - r'onclick="showVideoPlayer\(\)"><img src="([^"]+)', - webpage, 'thumbnail', fatal=False) + (r'Title:</th><td>([^<]+)</td>', r'<h1[^>]*>(.+?)</h1>'), + webpage, 'title') - return { + info = self._parse_html5_media_entries(url, webpage, video_id)[0] + + return merge_dicts(info, { 'id': video_id, 'display_id': display_id, - 'url': video_url, 'title': title, - 'thumbnail': thumbnail, 'age_limit': 18, - } + }) |