diff options
| author | Sergey M․ <dstftw@gmail.com> | 2017-05-28 07:50:15 +0700 | 
|---|---|---|
| committer | Sergey M․ <dstftw@gmail.com> | 2017-05-28 07:50:15 +0700 | 
| commit | 855520427458f68076143c4079b038f4d15e5a13 (patch) | |
| tree | ac3a63107b608b995ac8a08ac8d3bce8d71049f3 | |
| parent | 164fcbfeb75787cfeb1f0a84188ba4c11ac4a009 (diff) | |
[xhamster] Extract categories (closes #11728)
| -rw-r--r-- | youtube_dl/extractor/xhamster.py | 11 | 
1 files changed, 11 insertions, 0 deletions
| diff --git a/youtube_dl/extractor/xhamster.py b/youtube_dl/extractor/xhamster.py index 7b6703714..7cf0629e7 100644 --- a/youtube_dl/extractor/xhamster.py +++ b/youtube_dl/extractor/xhamster.py @@ -4,6 +4,7 @@ import re  from .common import InfoExtractor  from ..utils import ( +    clean_html,      dict_get,      ExtractorError,      int_or_none, @@ -25,6 +26,7 @@ class XHamsterIE(InfoExtractor):              'uploader': 'Ruseful2011',              'duration': 893,              'age_limit': 18, +            'categories': ['Fake Hub', 'Amateur', 'MILFs', 'POV', 'Boss', 'Office', 'Oral', 'Reality', 'Sexy'],          },      }, {          'url': 'http://xhamster.com/movies/2221348/britney_spears_sexy_booty.html?hd', @@ -36,6 +38,7 @@ class XHamsterIE(InfoExtractor):              'uploader': 'jojo747400',              'duration': 200,              'age_limit': 18, +            'categories': ['Britney Spears', 'Celebrities', 'HD Videos', 'Sexy', 'Sexy Booty'],          },          'params': {              'skip_download': True, @@ -51,6 +54,7 @@ class XHamsterIE(InfoExtractor):              'uploader': 'parejafree',              'duration': 72,              'age_limit': 18, +            'categories': ['Amateur', 'Blowjobs'],          },          'params': {              'skip_download': True, @@ -152,6 +156,12 @@ class XHamsterIE(InfoExtractor):          self._sort_formats(formats) +        categories_html = self._search_regex( +            r'(?s)<table.+?(<span>Categories:.+?)</table>', webpage, +            'categories', default=None) +        categories = [clean_html(category) for category in re.findall( +            r'<a[^>]+>(.+?)</a>', categories_html)] if categories_html else None +          return {              'id': video_id,              'title': title, @@ -165,6 +175,7 @@ class XHamsterIE(InfoExtractor):              'dislike_count': int_or_none(dislike_count),              'comment_count': int_or_none(comment_count),              'age_limit': age_limit, +            'categories': categories,              'formats': formats,          } | 
