diff options
author | Daniel Höpfl <daniel@hoepfl.de> | 2019-02-13 16:29:43 +0100 |
---|---|---|
committer | Sergey M․ <dstftw@gmail.com> | 2019-05-23 03:06:07 +0700 |
commit | 0e6f914b3b40ef2ca78d82051a194faaad64dd9e (patch) | |
tree | d1d040fed1402a399e7eaa43e6ea690950b03f05 /youtube_dl | |
parent | 6ab30ff50bf6bd0585927cb73c7421bef184f87a (diff) |
[vivo] Fix extraction (closes #18906)
Diffstat (limited to 'youtube_dl')
-rw-r--r-- | youtube_dl/extractor/shared.py | 18 |
1 files changed, 16 insertions, 2 deletions
diff --git a/youtube_dl/extractor/shared.py b/youtube_dl/extractor/shared.py index 931a0f70e..eade8fd9e 100644 --- a/youtube_dl/extractor/shared.py +++ b/youtube_dl/extractor/shared.py @@ -1,5 +1,7 @@ from __future__ import unicode_literals +import re + from .common import InfoExtractor from ..compat import compat_b64decode from ..utils import ( @@ -7,6 +9,7 @@ from ..utils import ( int_or_none, url_or_none, urlencode_postdata, + unescapeHTML, ) @@ -22,8 +25,7 @@ class SharedBaseIE(InfoExtractor): video_url = self._extract_video_url(webpage, video_id, url) - title = compat_b64decode(self._html_search_meta( - 'full:title', webpage, 'title')).decode('utf-8') + title = self._extract_title(webpage) filesize = int_or_none(self._html_search_meta( 'full:size', webpage, 'file size', fatal=False)) @@ -35,6 +37,10 @@ class SharedBaseIE(InfoExtractor): 'title': title, } + def _extract_title(self, webpage): + return compat_b64decode(self._html_search_meta( + 'full:title', webpage, 'title')).decode('utf-8') + class SharedIE(SharedBaseIE): IE_DESC = 'shared.sx' @@ -86,6 +92,14 @@ class VivoIE(SharedBaseIE): }, } + def _extract_title(self, webpage): + data_title = self._search_regex( + r'data-name\s*=\s*(["\'])(?P<title>(?:(?!\1).)+)\1', webpage, + 'title', default=None, group='title') + if data_title: + return unescapeHTML(re.sub(r"\.[a-z0-9]{3,4}$", "", data_title)) + return self._og_search_title(webpage) + def _extract_video_url(self, webpage, video_id, *args): def decode_url(encoded_url): return compat_b64decode(encoded_url).decode('utf-8') |