diff options
author | Sergey M․ <dstftw@gmail.com> | 2016-09-11 22:50:36 +0700 |
---|---|---|
committer | Sergey M․ <dstftw@gmail.com> | 2016-09-11 22:50:36 +0700 |
commit | 6599c72527ca8434589c010c48164494ab4c2469 (patch) | |
tree | 890af1d200cfa1c5ea9eb0f0c4e723835c08f2e7 | |
parent | 6bb05b32a990b8fb961971fcb8110d292cf953e7 (diff) |
[tube8] Extract categories and tags (Closes #10579)
-rw-r--r-- | youtube_dl/extractor/tube8.py | 21 |
1 files changed, 21 insertions, 0 deletions
diff --git a/youtube_dl/extractor/tube8.py b/youtube_dl/extractor/tube8.py index 4053f6c21..e937b2396 100644 --- a/youtube_dl/extractor/tube8.py +++ b/youtube_dl/extractor/tube8.py @@ -1,5 +1,7 @@ from __future__ import unicode_literals +import re + from ..utils import ( int_or_none, str_to_int, @@ -21,7 +23,13 @@ class Tube8IE(KeezMoviesIE): 'title': 'Kasia music video', 'age_limit': 18, 'duration': 230, + 'categories': ['Teen'], + 'tags': ['dancing'], + }, + 'params': { + 'proxy': '127.0.0.1:8118', } + }, { 'url': 'http://www.tube8.com/shemale/teen/blonde-cd-gets-kidnapped-by-two-blacks-and-punished-for-being-a-slutty-girl/19569151/', 'only_matching': True, @@ -51,6 +59,17 @@ class Tube8IE(KeezMoviesIE): r'<span id="allCommentsCount">(\d+)</span>', webpage, 'comment count', fatal=False)) + category = self._search_regex( + r'Category:\s*</strong>\s*<a[^>]+href=[^>]+>([^<]+)', + webpage, 'category', fatal=False) + categories = [category] if category else None + + tags_str = self._search_regex( + r'(?s)Tags:\s*</strong>(.+?)</(?!a)', + webpage, 'tags', fatal=False) + tags = [t for t in re.findall( + r'<a[^>]+href=[^>]+>([^<]+)', tags_str)] if tags_str else None + info.update({ 'description': description, 'uploader': uploader, @@ -58,6 +77,8 @@ class Tube8IE(KeezMoviesIE): 'like_count': like_count, 'dislike_count': dislike_count, 'comment_count': comment_count, + 'categories': categories, + 'tags': tags, }) return info |