diff options
| -rw-r--r-- | youtube_dl/extractor/pornhub.py | 17 | ||||
| -rw-r--r-- | youtube_dl/extractor/xuite.py | 14 | 
2 files changed, 20 insertions, 11 deletions
| diff --git a/youtube_dl/extractor/pornhub.py b/youtube_dl/extractor/pornhub.py index 3a27e3789..0c8b731cf 100644 --- a/youtube_dl/extractor/pornhub.py +++ b/youtube_dl/extractor/pornhub.py @@ -33,10 +33,8 @@ class PornHubIE(InfoExtractor):      }      def _extract_count(self, pattern, webpage, name): -        count = self._html_search_regex(pattern, webpage, '%s count' % name, fatal=False) -        if count: -            count = str_to_int(count) -        return count +        return str_to_int(self._search_regex( +            pattern, webpage, '%s count' % name, fatal=False))      def _real_extract(self, url):          video_id = self._match_id(url) @@ -62,11 +60,14 @@ class PornHubIE(InfoExtractor):          if thumbnail:              thumbnail = compat_urllib_parse.unquote(thumbnail) -        view_count = self._extract_count(r'<span class="count">([\d,\.]+)</span> views', webpage, 'view') -        like_count = self._extract_count(r'<span class="votesUp">([\d,\.]+)</span>', webpage, 'like') -        dislike_count = self._extract_count(r'<span class="votesDown">([\d,\.]+)</span>', webpage, 'dislike') +        view_count = self._extract_count( +            r'<span class="count">([\d,\.]+)</span> views', webpage, 'view') +        like_count = self._extract_count( +            r'<span class="votesUp">([\d,\.]+)</span>', webpage, 'like') +        dislike_count = self._extract_count( +            r'<span class="votesDown">([\d,\.]+)</span>', webpage, 'dislike')          comment_count = self._extract_count( -            r'All comments \(<var class="videoCommentCount">([\d,\.]+)</var>', webpage, 'comment') +            r'All Comments\s*<span>\(([\d,.]+)\)', webpage, 'comment')          video_urls = list(map(compat_urllib_parse.unquote, re.findall(r'"quality_[0-9]{3}p":"([^"]+)', webpage)))          if webpage.find('"encrypted":true') != -1: diff --git a/youtube_dl/extractor/xuite.py b/youtube_dl/extractor/xuite.py index 4971965f9..81d885fdc 100644 --- a/youtube_dl/extractor/xuite.py +++ b/youtube_dl/extractor/xuite.py @@ -69,18 +69,26 @@ class XuiteIE(InfoExtractor):          'only_matching': True,      }] +    @staticmethod +    def base64_decode_utf8(data): +        return base64.b64decode(data.encode('utf-8')).decode('utf-8') + +    @staticmethod +    def base64_encode_utf8(data): +        return base64.b64encode(data.encode('utf-8')).decode('utf-8') +      def _extract_flv_config(self, media_id): -        base64_media_id = base64.b64encode(media_id.encode('utf-8')).decode('utf-8') +        base64_media_id = self.base64_encode_utf8(media_id)          flv_config = self._download_xml(              'http://vlog.xuite.net/flash/player?media=%s' % base64_media_id,              'flv config')          prop_dict = {}          for prop in flv_config.findall('./property'): -            prop_id = base64.b64decode(prop.attrib['id']).decode('utf-8') +            prop_id = self.base64_decode_utf8(prop.attrib['id'])              # CDATA may be empty in flv config              if not prop.text:                  continue -            encoded_content = base64.b64decode(prop.text).decode('utf-8') +            encoded_content = self.base64_decode_utf8(prop.text)              prop_dict[prop_id] = compat_urllib_parse_unquote(encoded_content)          return prop_dict | 
