diff options
author | Sergey M․ <dstftw@gmail.com> | 2015-03-30 19:41:04 +0600 |
---|---|---|
committer | Sergey M․ <dstftw@gmail.com> | 2015-03-30 19:41:04 +0600 |
commit | 7700207ec7d39e1594d9963a5014ddcb30c7301a (patch) | |
tree | e4d74a2aa8fa09f65de5a9d065267d7dd6cff6c0 /youtube_dl | |
parent | 72b249bf1fa73b5e72d6573f3587a775a6c1c820 (diff) |
[pornhub] Fix comment count extraction (Closes #5320)
Diffstat (limited to 'youtube_dl')
-rw-r--r-- | youtube_dl/extractor/pornhub.py | 17 |
1 files changed, 9 insertions, 8 deletions
diff --git a/youtube_dl/extractor/pornhub.py b/youtube_dl/extractor/pornhub.py index 3a27e3789..0c8b731cf 100644 --- a/youtube_dl/extractor/pornhub.py +++ b/youtube_dl/extractor/pornhub.py @@ -33,10 +33,8 @@ class PornHubIE(InfoExtractor): } def _extract_count(self, pattern, webpage, name): - count = self._html_search_regex(pattern, webpage, '%s count' % name, fatal=False) - if count: - count = str_to_int(count) - return count + return str_to_int(self._search_regex( + pattern, webpage, '%s count' % name, fatal=False)) def _real_extract(self, url): video_id = self._match_id(url) @@ -62,11 +60,14 @@ class PornHubIE(InfoExtractor): if thumbnail: thumbnail = compat_urllib_parse.unquote(thumbnail) - view_count = self._extract_count(r'<span class="count">([\d,\.]+)</span> views', webpage, 'view') - like_count = self._extract_count(r'<span class="votesUp">([\d,\.]+)</span>', webpage, 'like') - dislike_count = self._extract_count(r'<span class="votesDown">([\d,\.]+)</span>', webpage, 'dislike') + view_count = self._extract_count( + r'<span class="count">([\d,\.]+)</span> views', webpage, 'view') + like_count = self._extract_count( + r'<span class="votesUp">([\d,\.]+)</span>', webpage, 'like') + dislike_count = self._extract_count( + r'<span class="votesDown">([\d,\.]+)</span>', webpage, 'dislike') comment_count = self._extract_count( - r'All comments \(<var class="videoCommentCount">([\d,\.]+)</var>', webpage, 'comment') + r'All Comments\s*<span>\(([\d,.]+)\)', webpage, 'comment') video_urls = list(map(compat_urllib_parse.unquote, re.findall(r'"quality_[0-9]{3}p":"([^"]+)', webpage))) if webpage.find('"encrypted":true') != -1: |