diff options
| -rw-r--r-- | youtube_dl/extractor/pornhub.py | 37 | 
1 files changed, 34 insertions, 3 deletions
diff --git a/youtube_dl/extractor/pornhub.py b/youtube_dl/extractor/pornhub.py index 9b413590a..eb316ad14 100644 --- a/youtube_dl/extractor/pornhub.py +++ b/youtube_dl/extractor/pornhub.py @@ -1,7 +1,9 @@  # coding: utf-8  from __future__ import unicode_literals +import functools  import itertools +import operator  # import os  import re @@ -129,9 +131,38 @@ class PornHubIE(InfoExtractor):          tv_webpage = dl_webpage('tv') -        video_url = self._search_regex( -            r'<video[^>]+\bsrc=(["\'])(?P<url>(?:https?:)?//.+?)\1', tv_webpage, -            'video url', group='url') +        encoded_url = self._search_regex(r'(var.*mediastring.*)</script>', +            tv_webpage, 'encoded url') +        assignments = encoded_url.split(";") +        js_vars = {} + +        def parse_js_value(inp): +            inp = re.sub(r'/\*[^*]*\*/', "", inp) + +            if "+" in inp: +                inps = inp.split("+") +                return functools.reduce(operator.concat, map(parse_js_value, inps)) + +            inp = inp.strip() +            if inp in js_vars: +                return js_vars[inp] + +            # Hope it's a string! +            assert inp.startswith('"') and inp.endswith('"') +            return inp[1:-1] + +        for assn in assignments: +            assn = assn.strip() +            if len(assn) == 0: +                continue + +            assert assn.startswith("var ") +            assn = assn[4:] +            vname, value = assn.split("=", 1) + +            js_vars[vname] = parse_js_value(value) + +        video_url = js_vars["mediastring"]          title = self._search_regex(              r'<h1>([^>]+)</h1>', tv_webpage, 'title', default=None)  | 
