diff options
author | Sergey M․ <dstftw@gmail.com> | 2019-07-14 00:21:39 +0700 |
---|---|---|
committer | Sergey M․ <dstftw@gmail.com> | 2019-07-14 00:21:39 +0700 |
commit | f9eeeda31c1a643aced8283440983f3a45208840 (patch) | |
tree | 2d94889d56f646d2ce8a984da45445a67e3c9c20 | |
parent | 5f562bd4bbc780e535e187efb36659247b41d6e5 (diff) |
[spankbang] Fix and improve metadata extraction
-rw-r--r-- | youtube_dl/extractor/spankbang.py | 27 |
1 files changed, 17 insertions, 10 deletions
diff --git a/youtube_dl/extractor/spankbang.py b/youtube_dl/extractor/spankbang.py index eb0919e3a..e040ada29 100644 --- a/youtube_dl/extractor/spankbang.py +++ b/youtube_dl/extractor/spankbang.py @@ -5,6 +5,7 @@ import re from .common import InfoExtractor from ..utils import ( ExtractorError, + merge_dicts, orderedSet, parse_duration, parse_resolution, @@ -26,6 +27,8 @@ class SpankBangIE(InfoExtractor): 'description': 'dillion harper masturbates on a bed', 'thumbnail': r're:^https?://.*\.jpg$', 'uploader': 'silly2587', + 'timestamp': 1422571989, + 'upload_date': '20150129', 'age_limit': 18, } }, { @@ -113,26 +116,29 @@ class SpankBangIE(InfoExtractor): self._sort_formats(formats) + info = self._search_json_ld(webpage, video_id, default={}) + title = self._html_search_regex( - r'(?s)<h1[^>]*>(.+?)</h1>', webpage, 'title') + r'(?s)<h1[^>]*>(.+?)</h1>', webpage, 'title', default=None) description = self._search_regex( r'<div[^>]+\bclass=["\']bottom[^>]+>\s*<p>[^<]*</p>\s*<p>([^<]+)', - webpage, 'description', fatal=False) - thumbnail = self._og_search_thumbnail(webpage) - uploader = self._search_regex( - r'class="user"[^>]*><img[^>]+>([^<]+)', + webpage, 'description', default=None) + thumbnail = self._og_search_thumbnail(webpage, default=None) + uploader = self._html_search_regex( + (r'(?s)<li[^>]+class=["\']profile[^>]+>(.+?)</a>', + r'class="user"[^>]*><img[^>]+>([^<]+)'), webpage, 'uploader', default=None) duration = parse_duration(self._search_regex( r'<div[^>]+\bclass=["\']right_side[^>]+>\s*<span>([^<]+)', - webpage, 'duration', fatal=False)) + webpage, 'duration', default=None)) view_count = str_to_int(self._search_regex( - r'([\d,.]+)\s+plays', webpage, 'view count', fatal=False)) + r'([\d,.]+)\s+plays', webpage, 'view count', default=None)) age_limit = self._rta_search(webpage) - return { + return merge_dicts({ 'id': video_id, - 'title': title, + 'title': title or video_id, 'description': description, 'thumbnail': thumbnail, 'uploader': uploader, @@ -140,7 +146,8 @@ class SpankBangIE(InfoExtractor): 'view_count': view_count, 'formats': formats, 'age_limit': age_limit, - } + }, info + ) class SpankBangPlaylistIE(InfoExtractor): |