diff options
Diffstat (limited to 'youtube_dl/extractor')
| -rw-r--r-- | youtube_dl/extractor/pornhub.py | 47 | 
1 files changed, 37 insertions, 10 deletions
diff --git a/youtube_dl/extractor/pornhub.py b/youtube_dl/extractor/pornhub.py index ba0ad7da2..75ed69cde 100644 --- a/youtube_dl/extractor/pornhub.py +++ b/youtube_dl/extractor/pornhub.py @@ -227,12 +227,13 @@ class PornHubIE(PornHubBaseIE):          else:              thumbnail, duration = [None] * 2 -        if not video_urls: -            tv_webpage = dl_webpage('tv') - +        def extract_js_vars(webpage, pattern, fatal=True):              assignments = self._search_regex( -                r'(var.+?mediastring.+?)</script>', tv_webpage, -                'encoded url').split(';') +                pattern, webpage, 'encoded url', fatal=fatal) +            if not assignments: +                return {} + +            assignments = assignments.split(';')              js_vars = {} @@ -254,11 +255,31 @@ class PornHubIE(PornHubBaseIE):                  assn = re.sub(r'var\s+', '', assn)                  vname, value = assn.split('=', 1)                  js_vars[vname] = parse_js_value(value) +            return js_vars -            video_url = js_vars['mediastring'] -            if video_url not in video_urls_set: -                video_urls.append((video_url, None)) -                video_urls_set.add(video_url) +        def add_video_url(video_url): +            v_url = url_or_none(video_url) +            if not v_url: +                return +            if v_url in video_urls_set: +                return +            video_urls.append((v_url, None)) +            video_urls_set.add(v_url) + +        if not video_urls: +            FORMAT_PREFIXES = ('media', 'quality') +            js_vars = extract_js_vars( +                webpage, r'(var\s+(?:%s)_.+)' % '|'.join(FORMAT_PREFIXES), +                fatal=False) +            if js_vars: +                for key, format_url in js_vars.items(): +                    if any(key.startswith(p) for p in FORMAT_PREFIXES): +                        add_video_url(format_url) + +        if not video_urls: +            js_vars = extract_js_vars( +                dl_webpage('tv'), r'(var.+?mediastring.+?)</script>') +            add_video_url(js_vars['mediastring'])          for mobj in re.finditer(                  r'<a[^>]+\bclass=["\']downloadBtn\b[^>]+\bhref=(["\'])(?P<url>(?:(?!\1).)+)\1', @@ -276,10 +297,16 @@ class PornHubIE(PornHubBaseIE):                      r'/(\d{6}/\d{2})/', video_url, 'upload data', default=None)                  if upload_date:                      upload_date = upload_date.replace('/', '') -            if determine_ext(video_url) == 'mpd': +            ext = determine_ext(video_url) +            if ext == 'mpd':                  formats.extend(self._extract_mpd_formats(                      video_url, video_id, mpd_id='dash', fatal=False))                  continue +            elif ext == 'm3u8': +                formats.extend(self._extract_m3u8_formats( +                    video_url, video_id, 'mp4', entry_protocol='m3u8_native', +                    m3u8_id='hls', fatal=False)) +                continue              tbr = None              mobj = re.search(r'(?P<height>\d+)[pP]?_(?P<tbr>\d+)[kK]', video_url)              if mobj:  | 
