diff options
author | Sergey M․ <dstftw@gmail.com> | 2015-01-11 16:20:16 +0600 |
---|---|---|
committer | Sergey M․ <dstftw@gmail.com> | 2015-01-11 16:20:16 +0600 |
commit | a798e64c1549f4bdf240dec4492203b304a39783 (patch) | |
tree | 292f8d0bbe049f46e944d8d14a1f2302b37750ca | |
parent | 6a5fa754903370c8e522cb3c85fd1952c1d9ff9c (diff) |
[veehd] Improve extraction
-rw-r--r-- | youtube_dl/extractor/veehd.py | 35 |
1 files changed, 26 insertions, 9 deletions
diff --git a/youtube_dl/extractor/veehd.py b/youtube_dl/extractor/veehd.py index 4f76ca403..5dc38dd94 100644 --- a/youtube_dl/extractor/veehd.py +++ b/youtube_dl/extractor/veehd.py @@ -47,18 +47,35 @@ class VeeHDIE(InfoExtractor): self._download_webpage(player_url, video_id, 'Requesting player page') player_page = self._download_webpage( player_url, video_id, 'Downloading player page') + config_json = self._search_regex( - r'value=\'config=({.+?})\'', player_page, 'config json') - config = json.loads(config_json) + r'value=\'config=({.+?})\'', player_page, 'config json', default=None) + + if config_json: + config = json.loads(config_json) + video_url = compat_urlparse.unquote(config['clip']['url']) + else: + iframe_src = self._search_regex( + r'<iframe[^>]+src="/?([^"]+)"', player_page, 'iframe url') + iframe_url = 'http://veehd.com/%s' % iframe_src + + self._download_webpage(iframe_url, video_id, 'Requesting iframe page') + iframe_page = self._download_webpage( + iframe_url, video_id, 'Downloading iframe page') + + video_url = self._search_regex( + r"file\s*:\s*'([^']+)'", iframe_page, 'video url') - video_url = compat_urlparse.unquote(config['clip']['url']) title = clean_html(get_element_by_id('videoName', webpage).rpartition('|')[0]) - uploader_id = self._html_search_regex(r'<a href="/profile/\d+">(.+?)</a>', - webpage, 'uploader') - thumbnail = self._search_regex(r'<img id="veehdpreview" src="(.+?)"', - webpage, 'thumbnail') - description = self._html_search_regex(r'<td class="infodropdown".*?<div>(.*?)<ul', - webpage, 'description', flags=re.DOTALL) + uploader_id = self._html_search_regex( + r'<a href="/profile/\d+">(.+?)</a>', + webpage, 'uploader') + thumbnail = self._search_regex( + r'<img id="veehdpreview" src="(.+?)"', + webpage, 'thumbnail') + description = self._html_search_regex( + r'<td class="infodropdown".*?<div>(.*?)<ul', + webpage, 'description', flags=re.DOTALL) return { '_type': 'video', |