diff options
| author | Sergey M․ <dstftw@gmail.com> | 2019-02-05 23:06:55 +0700 | 
|---|---|---|
| committer | Sergey M․ <dstftw@gmail.com> | 2019-02-05 23:09:24 +0700 | 
| commit | 5dda1edef93d94c9a49672f905df0c49c75c5739 (patch) | |
| tree | eda87af41ddd975c3668113d8bf222785dc55373 | |
| parent | d2d970d07ec82f648b62bff8b15ac0b57d0d0496 (diff) | |
[pornhub] Improve and simplify (closes #19135)
| -rw-r--r-- | youtube_dl/extractor/pornhub.py | 17 | 
1 files changed, 6 insertions, 11 deletions
| diff --git a/youtube_dl/extractor/pornhub.py b/youtube_dl/extractor/pornhub.py index 428324ef0..641083da7 100644 --- a/youtube_dl/extractor/pornhub.py +++ b/youtube_dl/extractor/pornhub.py @@ -302,17 +302,12 @@ class PornHubIE(PornHubBaseIE):          comment_count = self._extract_count(              r'All Comments\s*<span>\(([\d,.]+)\)', webpage, 'comment') -        def _get_items(class_name): +        def extract_list(meta_key):              div = self._search_regex( -                r'<div class="' + class_name + '">([\S\s]+?)</div>', -                webpage, class_name, default=None) +                r'(?s)<div[^>]+\bclass=["\'].*?\b%sWrapper[^>]*>(.+?)</div>' +                % meta_key, webpage, meta_key, default=None)              if div: -                return [a for a in re.findall(r'<a href=[^>]+>([^<]+)', div)] -            else: -                return None - -        categories = _get_items('categoriesWrapper') -        tags = _get_items('tagsWrapper') +                return re.findall(r'<a[^>]+\bhref=[^>]+>([^<]+)', div)          return {              'id': video_id, @@ -327,8 +322,8 @@ class PornHubIE(PornHubBaseIE):              'comment_count': comment_count,              'formats': formats,              'age_limit': 18, -            'tags': tags, -            'categories': categories, +            'tags': extract_list('tags'), +            'categories': extract_list('categories'),              'subtitles': subtitles,          } | 
