diff options
author | Random User <rndusr@posteo.de> | 2017-03-25 21:36:59 +0100 |
---|---|---|
committer | Random User <rndusr@posteo.de> | 2017-03-25 21:36:59 +0100 |
commit | 4f06c1c9fcbfbc74b81b5fa89a616914b5ce5aad (patch) | |
tree | a51b702e001d350b908780a119f76d8ea706d511 /youtube_dl/extractor/pornhub.py | |
parent | c73e330e7adc9c0c15ac51aeea8fbb7dad95351a (diff) | |
parent | 942b44a0525f677924c660bcb00902d705d91fc2 (diff) |
Merge branch 'master' of github.com-rndusr:rg3/youtube-dl into fix/str-item-assignment
Diffstat (limited to 'youtube_dl/extractor/pornhub.py')
-rw-r--r-- | youtube_dl/extractor/pornhub.py | 32 |
1 files changed, 29 insertions, 3 deletions
diff --git a/youtube_dl/extractor/pornhub.py b/youtube_dl/extractor/pornhub.py index 9b413590a..b25f1f193 100644 --- a/youtube_dl/extractor/pornhub.py +++ b/youtube_dl/extractor/pornhub.py @@ -1,7 +1,9 @@ # coding: utf-8 from __future__ import unicode_literals +import functools import itertools +import operator # import os import re @@ -18,6 +20,7 @@ from ..utils import ( js_to_json, orderedSet, # sanitized_Request, + remove_quotes, str_to_int, ) # from ..aes import ( @@ -129,9 +132,32 @@ class PornHubIE(InfoExtractor): tv_webpage = dl_webpage('tv') - video_url = self._search_regex( - r'<video[^>]+\bsrc=(["\'])(?P<url>(?:https?:)?//.+?)\1', tv_webpage, - 'video url', group='url') + assignments = self._search_regex( + r'(var.+?mediastring.+?)</script>', tv_webpage, + 'encoded url').split(';') + + js_vars = {} + + def parse_js_value(inp): + inp = re.sub(r'/\*(?:(?!\*/).)*?\*/', '', inp) + if '+' in inp: + inps = inp.split('+') + return functools.reduce( + operator.concat, map(parse_js_value, inps)) + inp = inp.strip() + if inp in js_vars: + return js_vars[inp] + return remove_quotes(inp) + + for assn in assignments: + assn = assn.strip() + if not assn: + continue + assn = re.sub(r'var\s+', '', assn) + vname, value = assn.split('=', 1) + js_vars[vname] = parse_js_value(value) + + video_url = js_vars['mediastring'] title = self._search_regex( r'<h1>([^>]+)</h1>', tv_webpage, 'title', default=None) |