aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorSergey M․ <dstftw@gmail.com>2016-08-31 22:23:55 +0700
committerSergey M․ <dstftw@gmail.com>2016-08-31 22:23:55 +0700
commit7a3e849f6eaf51b1d86b843a63664012ced2258c (patch)
tree391e568d8b22ce46ab7b7f95998e370ef3d7a9c0
parent196c6ba06792ec38238631d9173fc146822baa7e (diff)
downloadyoutube-dl-7a3e849f6eaf51b1d86b843a63664012ced2258c.tar.xz
[porncom] Extract categories and tags (Closes #10510)
-rw-r--r--youtube_dl/extractor/porncom.py13
1 files changed, 12 insertions, 1 deletions
diff --git a/youtube_dl/extractor/porncom.py b/youtube_dl/extractor/porncom.py
index 4baf79688..d85e0294d 100644
--- a/youtube_dl/extractor/porncom.py
+++ b/youtube_dl/extractor/porncom.py
@@ -26,6 +26,8 @@ class PornComIE(InfoExtractor):
'duration': 551,
'view_count': int,
'age_limit': 18,
+ 'categories': list,
+ 'tags': list,
},
}, {
'url': 'http://se.porn.com/videos/marsha-may-rides-seth-on-top-of-his-thick-cock-2658067',
@@ -75,7 +77,14 @@ class PornComIE(InfoExtractor):
self._sort_formats(formats)
view_count = str_to_int(self._search_regex(
- r'class=["\']views["\'][^>]*><p>([\d,.]+)', webpage, 'view count'))
+ r'class=["\']views["\'][^>]*><p>([\d,.]+)', webpage,
+ 'view count', fatal=False))
+
+ def extract_list(kind):
+ s = self._search_regex(
+ r'(?s)<p[^>]*>%s:(.+?)</p>' % kind.capitalize(),
+ webpage, kind, fatal=False)
+ return re.findall(r'<a[^>]+>([^<]+)</a>', s or '')
return {
'id': video_id,
@@ -86,4 +95,6 @@ class PornComIE(InfoExtractor):
'view_count': view_count,
'formats': formats,
'age_limit': 18,
+ 'categories': extract_list('categories'),
+ 'tags': extract_list('tags'),
}