diff options
author | Sergey M․ <dstftw@gmail.com> | 2015-08-05 20:32:44 +0600 |
---|---|---|
committer | Sergey M․ <dstftw@gmail.com> | 2015-08-05 20:32:44 +0600 |
commit | be7a8379b47c35afe66abcc02aee597e5143b1d5 (patch) | |
tree | 7fc7a58058660c236fbc448345d05af1eb9ac638 /youtube_dl/extractor | |
parent | defce603852e99655a6351bb73c62b88711a9c9e (diff) |
[xhamster] Make more robust
Diffstat (limited to 'youtube_dl/extractor')
-rw-r--r-- | youtube_dl/extractor/xhamster.py | 18 |
1 files changed, 10 insertions, 8 deletions
diff --git a/youtube_dl/extractor/xhamster.py b/youtube_dl/extractor/xhamster.py index 9d025530f..481d79b89 100644 --- a/youtube_dl/extractor/xhamster.py +++ b/youtube_dl/extractor/xhamster.py @@ -46,12 +46,12 @@ class XHamsterIE(InfoExtractor): ] def _real_extract(self, url): - def extract_video_url(webpage): - mp4 = re.search(r'file:\s+\'([^\']+)\'', webpage) - if mp4 is None: - raise ExtractorError('Unable to extract media URL') - else: - return mp4.group(1) + def extract_video_url(webpage, name): + return self._search_regex( + [r'''file\s*:\s*(?P<q>["'])(?P<mp4>.+?)(?P=q)''', + r'''<a\s+href=(?P<q>["'])(?P<mp4>.+?)(?P=q)\s+class=["']mp4Thumb''', + r'''<video[^>]+file=(?P<q>["'])(?P<mp4>.+?)(?P=q)[^>]*>'''], + webpage, name, group='mp4') def is_hd(webpage): return '<div class=\'icon iconHD\'' in webpage @@ -97,7 +97,9 @@ class XHamsterIE(InfoExtractor): hd = is_hd(webpage) - video_url = extract_video_url(webpage) + format_id = 'hd' if hd else 'sd' + + video_url = extract_video_url(webpage, format_id) formats = [{ 'url': video_url, 'format_id': 'hd' if hd else 'sd', @@ -108,7 +110,7 @@ class XHamsterIE(InfoExtractor): mrss_url = self._search_regex(r'<link rel="canonical" href="([^"]+)', webpage, 'mrss_url') webpage = self._download_webpage(mrss_url + '?hd', video_id, note='Downloading HD webpage') if is_hd(webpage): - video_url = extract_video_url(webpage) + video_url = extract_video_url(webpage, 'hd') formats.append({ 'url': video_url, 'format_id': 'hd', |