diff options
author | Sergey M․ <dstftw@gmail.com> | 2017-05-28 07:50:15 +0700 |
---|---|---|
committer | Sergey M․ <dstftw@gmail.com> | 2017-05-28 07:50:15 +0700 |
commit | 855520427458f68076143c4079b038f4d15e5a13 (patch) | |
tree | ac3a63107b608b995ac8a08ac8d3bce8d71049f3 /youtube_dl | |
parent | 164fcbfeb75787cfeb1f0a84188ba4c11ac4a009 (diff) |
[xhamster] Extract categories (closes #11728)
Diffstat (limited to 'youtube_dl')
-rw-r--r-- | youtube_dl/extractor/xhamster.py | 11 |
1 files changed, 11 insertions, 0 deletions
diff --git a/youtube_dl/extractor/xhamster.py b/youtube_dl/extractor/xhamster.py index 7b6703714..7cf0629e7 100644 --- a/youtube_dl/extractor/xhamster.py +++ b/youtube_dl/extractor/xhamster.py @@ -4,6 +4,7 @@ import re from .common import InfoExtractor from ..utils import ( + clean_html, dict_get, ExtractorError, int_or_none, @@ -25,6 +26,7 @@ class XHamsterIE(InfoExtractor): 'uploader': 'Ruseful2011', 'duration': 893, 'age_limit': 18, + 'categories': ['Fake Hub', 'Amateur', 'MILFs', 'POV', 'Boss', 'Office', 'Oral', 'Reality', 'Sexy'], }, }, { 'url': 'http://xhamster.com/movies/2221348/britney_spears_sexy_booty.html?hd', @@ -36,6 +38,7 @@ class XHamsterIE(InfoExtractor): 'uploader': 'jojo747400', 'duration': 200, 'age_limit': 18, + 'categories': ['Britney Spears', 'Celebrities', 'HD Videos', 'Sexy', 'Sexy Booty'], }, 'params': { 'skip_download': True, @@ -51,6 +54,7 @@ class XHamsterIE(InfoExtractor): 'uploader': 'parejafree', 'duration': 72, 'age_limit': 18, + 'categories': ['Amateur', 'Blowjobs'], }, 'params': { 'skip_download': True, @@ -152,6 +156,12 @@ class XHamsterIE(InfoExtractor): self._sort_formats(formats) + categories_html = self._search_regex( + r'(?s)<table.+?(<span>Categories:.+?)</table>', webpage, + 'categories', default=None) + categories = [clean_html(category) for category in re.findall( + r'<a[^>]+>(.+?)</a>', categories_html)] if categories_html else None + return { 'id': video_id, 'title': title, @@ -165,6 +175,7 @@ class XHamsterIE(InfoExtractor): 'dislike_count': int_or_none(dislike_count), 'comment_count': int_or_none(comment_count), 'age_limit': age_limit, + 'categories': categories, 'formats': formats, } |