diff options
Diffstat (limited to 'youtube_dl/extractor')
-rw-r--r-- | youtube_dl/extractor/byutv.py | 4 | ||||
-rw-r--r-- | youtube_dl/extractor/eporner.py | 22 | ||||
-rw-r--r-- | youtube_dl/extractor/espn.py | 6 | ||||
-rw-r--r-- | youtube_dl/extractor/extractors.py | 5 | ||||
-rw-r--r-- | youtube_dl/extractor/eyedotv.py | 64 | ||||
-rw-r--r-- | youtube_dl/extractor/formula1.py | 3 | ||||
-rw-r--r-- | youtube_dl/extractor/groupon.py | 4 | ||||
-rw-r--r-- | youtube_dl/extractor/howcast.py | 4 | ||||
-rw-r--r-- | youtube_dl/extractor/ooyala.py | 24 | ||||
-rw-r--r-- | youtube_dl/extractor/radiocanada.py | 130 | ||||
-rw-r--r-- | youtube_dl/extractor/teachingchannel.py | 3 | ||||
-rw-r--r-- | youtube_dl/extractor/veoh.py | 1 | ||||
-rw-r--r-- | youtube_dl/extractor/vice.py | 3 | ||||
-rw-r--r-- | youtube_dl/extractor/voxmedia.py | 12 | ||||
-rw-r--r-- | youtube_dl/extractor/xhamster.py | 73 |
15 files changed, 302 insertions, 56 deletions
diff --git a/youtube_dl/extractor/byutv.py b/youtube_dl/extractor/byutv.py index dda98059e..3aec601f8 100644 --- a/youtube_dl/extractor/byutv.py +++ b/youtube_dl/extractor/byutv.py @@ -11,6 +11,7 @@ class BYUtvIE(InfoExtractor): _VALID_URL = r'^https?://(?:www\.)?byutv.org/watch/[0-9a-f-]+/(?P<video_id>[^/?#]+)' _TEST = { 'url': 'http://www.byutv.org/watch/6587b9a3-89d2-42a6-a7f7-fd2f81840a7d/studio-c-season-5-episode-5', + 'md5': '05850eb8c749e2ee05ad5a1c34668493', 'info_dict': { 'id': 'studio-c-season-5-episode-5', 'ext': 'mp4', @@ -21,7 +22,8 @@ class BYUtvIE(InfoExtractor): }, 'params': { 'skip_download': True, - } + }, + 'add_ie': ['Ooyala'], } def _real_extract(self, url): diff --git a/youtube_dl/extractor/eporner.py b/youtube_dl/extractor/eporner.py index e006921ec..581276694 100644 --- a/youtube_dl/extractor/eporner.py +++ b/youtube_dl/extractor/eporner.py @@ -11,8 +11,8 @@ from ..utils import ( class EpornerIE(InfoExtractor): - _VALID_URL = r'https?://(?:www\.)?eporner\.com/hd-porn/(?P<id>\d+)/(?P<display_id>[\w-]+)' - _TEST = { + _VALID_URL = r'https?://(?:www\.)?eporner\.com/hd-porn/(?P<id>\w+)/(?P<display_id>[\w-]+)' + _TESTS = [{ 'url': 'http://www.eporner.com/hd-porn/95008/Infamous-Tiffany-Teen-Strip-Tease-Video/', 'md5': '39d486f046212d8e1b911c52ab4691f8', 'info_dict': { @@ -23,8 +23,22 @@ class EpornerIE(InfoExtractor): 'duration': 1838, 'view_count': int, 'age_limit': 18, - } - } + }, + }, + # New (May 2016) URL layout + { + 'url': 'http://www.eporner.com/hd-porn/3YRUtzMcWn0/Star-Wars-XXX-Parody/', + 'md5': '3469eeaa93b6967a34cdbdbb9d064b33', + 'info_dict': { + 'id': '3YRUtzMcWn0', + 'display_id': 'Star-Wars-XXX-Parody', + 'ext': 'mp4', + 'title': 'Star Wars XXX Parody', + 'duration': 361.0, + 'view_count': int, + 'age_limit': 18, + }, + }] def _real_extract(self, url): mobj = re.match(self._VALID_URL, url) diff --git a/youtube_dl/extractor/espn.py b/youtube_dl/extractor/espn.py index db4b263bc..66c08bec4 100644 --- a/youtube_dl/extractor/espn.py +++ b/youtube_dl/extractor/espn.py @@ -8,6 +8,7 @@ class ESPNIE(InfoExtractor): _VALID_URL = r'https?://espn\.go\.com/(?:[^/]+/)*(?P<id>[^/]+)' _TESTS = [{ 'url': 'http://espn.go.com/video/clip?id=10365079', + 'md5': '60e5d097a523e767d06479335d1bdc58', 'info_dict': { 'id': 'FkYWtmazr6Ed8xmvILvKLWjd4QvYZpzG', 'ext': 'mp4', @@ -15,21 +16,22 @@ class ESPNIE(InfoExtractor): 'description': None, }, 'params': { - # m3u8 download 'skip_download': True, }, + 'add_ie': ['OoyalaExternal'], }, { # intl video, from http://www.espnfc.us/video/mls-highlights/150/video/2743663/must-see-moments-best-of-the-mls-season 'url': 'http://espn.go.com/video/clip?id=2743663', + 'md5': 'f4ac89b59afc7e2d7dbb049523df6768', 'info_dict': { 'id': '50NDFkeTqRHB0nXBOK-RGdSG5YQPuxHg', 'ext': 'mp4', 'title': 'Must-See Moments: Best of the MLS season', }, 'params': { - # m3u8 download 'skip_download': True, }, + 'add_ie': ['OoyalaExternal'], }, { 'url': 'https://espn.go.com/video/iframe/twitter/?cms=espn&id=10365079', 'only_matching': True, diff --git a/youtube_dl/extractor/extractors.py b/youtube_dl/extractor/extractors.py index d8b3170ba..05561149a 100644 --- a/youtube_dl/extractor/extractors.py +++ b/youtube_dl/extractor/extractors.py @@ -231,6 +231,7 @@ from .everyonesmixtape import EveryonesMixtapeIE from .exfm import ExfmIE from .expotv import ExpoTVIE from .extremetube import ExtremeTubeIE +from .eyedotv import EyedoTVIE from .facebook import FacebookIE from .faz import FazIE from .fc2 import FC2IE @@ -617,6 +618,10 @@ from .qqmusic import ( QQMusicPlaylistIE, ) from .r7 import R7IE +from .radiocanada import ( + RadioCanadaIE, + RadioCanadaAudioVideoIE, +) from .radiode import RadioDeIE from .radiojavan import RadioJavanIE from .radiobremen import RadioBremenIE diff --git a/youtube_dl/extractor/eyedotv.py b/youtube_dl/extractor/eyedotv.py new file mode 100644 index 000000000..2f3035147 --- /dev/null +++ b/youtube_dl/extractor/eyedotv.py @@ -0,0 +1,64 @@ +# coding: utf-8 +from __future__ import unicode_literals + +from .common import InfoExtractor +from ..utils import ( + xpath_text, + parse_duration, + ExtractorError, +) + + +class EyedoTVIE(InfoExtractor): + _VALID_URL = r'https?://(?:www\.)?eyedo\.tv/[^/]+/(?:#!/)?Live/Detail/(?P<id>[0-9]+)' + _TEST = { + 'url': 'https://www.eyedo.tv/en-US/#!/Live/Detail/16301', + 'md5': 'ba14f17995cdfc20c36ba40e21bf73f7', + 'info_dict': { + 'id': '16301', + 'ext': 'mp4', + 'title': 'Journée du conseil scientifique de l\'Afnic 2015', + 'description': 'md5:4abe07293b2f73efc6e1c37028d58c98', + 'uploader': 'Afnic Live', + 'uploader_id': '8023', + } + } + _ROOT_URL = 'http://live.eyedo.net:1935/' + + def _real_extract(self, url): + video_id = self._match_id(url) + video_data = self._download_xml('http://eyedo.tv/api/live/GetLive/%s' % video_id, video_id) + + def _add_ns(path): + return self._xpath_ns(path, 'http://schemas.datacontract.org/2004/07/EyeDo.Core.Implementation.Web.ViewModels.Api') + + title = xpath_text(video_data, _add_ns('Titre'), 'title', True) + state_live_code = xpath_text(video_data, _add_ns('StateLiveCode'), 'title', True) + if state_live_code == 'avenir': + raise ExtractorError( + '%s said: We\'re sorry, but this video is not yet available.' % self.IE_NAME, + expected=True) + + is_live = state_live_code == 'live' + m3u8_url = None + # http://eyedo.tv/Content/Html5/Scripts/html5view.js + if is_live: + if xpath_text(video_data, 'Cdn') == 'true': + m3u8_url = 'http://rrr.sz.xlcdn.com/?account=eyedo&file=A%s&type=live&service=wowza&protocol=http&output=playlist.m3u8' % video_id + else: + m3u8_url = self._ROOT_URL + 'w/%s/eyedo_720p/playlist.m3u8' % video_id + else: + m3u8_url = self._ROOT_URL + 'replay-w/%s/mp4:%s.mp4/playlist.m3u8' % (video_id, video_id) + + return { + 'id': video_id, + 'title': title, + 'formats': self._extract_m3u8_formats( + m3u8_url, video_id, 'mp4', 'm3u8' if is_live else 'm3u8_native'), + 'description': xpath_text(video_data, _add_ns('Description')), + 'duration': parse_duration(xpath_text(video_data, _add_ns('Duration'))), + 'uploader': xpath_text(video_data, _add_ns('Createur')), + 'uploader_id': xpath_text(video_data, _add_ns('CreateurId')), + 'chapter': xpath_text(video_data, _add_ns('ChapitreTitre')), + 'chapter_id': xpath_text(video_data, _add_ns('ChapitreId')), + } diff --git a/youtube_dl/extractor/formula1.py b/youtube_dl/extractor/formula1.py index 726393fcc..322c41e5a 100644 --- a/youtube_dl/extractor/formula1.py +++ b/youtube_dl/extractor/formula1.py @@ -13,7 +13,8 @@ class Formula1IE(InfoExtractor): 'id': 'JvYXJpMzE6pArfHWm5ARp5AiUmD-gibV', 'ext': 'flv', 'title': 'Race highlights - Spain 2016', - } + }, + 'add_ie': ['Ooyala'], } def _real_extract(self, url): diff --git a/youtube_dl/extractor/groupon.py b/youtube_dl/extractor/groupon.py index 1dd0a81cc..a6da90931 100644 --- a/youtube_dl/extractor/groupon.py +++ b/youtube_dl/extractor/groupon.py @@ -14,6 +14,7 @@ class GrouponIE(InfoExtractor): 'description': 'Studio kept at 105 degrees and 40% humidity with anti-microbial and anti-slip Flotex flooring; certified instructors', }, 'playlist': [{ + 'md5': '42428ce8a00585f9bc36e49226eae7a1', 'info_dict': { 'id': 'fk6OhWpXgIQ', 'ext': 'mp4', @@ -24,10 +25,11 @@ class GrouponIE(InfoExtractor): 'uploader_id': 'groupon', 'uploader': 'Groupon', }, + 'add_ie': ['Youtube'], }], 'params': { 'skip_download': True, - } + }, } _PROVIDERS = { diff --git a/youtube_dl/extractor/howcast.py b/youtube_dl/extractor/howcast.py index e8f51e545..7e36b85ad 100644 --- a/youtube_dl/extractor/howcast.py +++ b/youtube_dl/extractor/howcast.py @@ -8,7 +8,7 @@ class HowcastIE(InfoExtractor): _VALID_URL = r'https?://(?:www\.)?howcast\.com/videos/(?P<id>\d+)' _TEST = { 'url': 'http://www.howcast.com/videos/390161-How-to-Tie-a-Square-Knot-Properly', - 'md5': '8b743df908c42f60cf6496586c7f12c3', + 'md5': '7d45932269a288149483144f01b99789', 'info_dict': { 'id': '390161', 'ext': 'mp4', @@ -19,9 +19,9 @@ class HowcastIE(InfoExtractor): 'duration': 56.823, }, 'params': { - # m3u8 download 'skip_download': True, }, + 'add_ie': ['Ooyala'], } def _real_extract(self, url): diff --git a/youtube_dl/extractor/ooyala.py b/youtube_dl/extractor/ooyala.py index 09bc291f0..2038a6ba5 100644 --- a/youtube_dl/extractor/ooyala.py +++ b/youtube_dl/extractor/ooyala.py @@ -8,6 +8,7 @@ from ..utils import ( float_or_none, ExtractorError, unsmuggle_url, + determine_ext, ) from ..compat import compat_urllib_parse_urlencode @@ -37,26 +38,27 @@ class OoyalaBaseIE(InfoExtractor): formats = [] if cur_auth_data['authorized']: for stream in cur_auth_data['streams']: - url = base64.b64decode( + s_url = base64.b64decode( stream['url']['data'].encode('ascii')).decode('utf-8') - if url in urls: + if s_url in urls: continue - urls.append(url) + urls.append(s_url) + ext = determine_ext(s_url, None) delivery_type = stream['delivery_type'] - if delivery_type == 'hls' or '.m3u8' in url: + if delivery_type == 'hls' or ext == 'm3u8': formats.extend(self._extract_m3u8_formats( - url, embed_code, 'mp4', 'm3u8_native', + s_url, embed_code, 'mp4', 'm3u8_native', m3u8_id='hls', fatal=False)) - elif delivery_type == 'hds' or '.f4m' in url: + elif delivery_type == 'hds' or ext == 'f4m': formats.extend(self._extract_f4m_formats( - url + '?hdcore=3.7.0', embed_code, f4m_id='hds', fatal=False)) - elif '.smil' in url: + s_url + '?hdcore=3.7.0', embed_code, f4m_id='hds', fatal=False)) + elif ext == 'smil': formats.extend(self._extract_smil_formats( - url, embed_code, fatal=False)) + s_url, embed_code, fatal=False)) else: formats.append({ - 'url': url, - 'ext': stream.get('delivery_type'), + 'url': s_url, + 'ext': ext or stream.get('delivery_type'), 'vcodec': stream.get('video_codec'), 'format_id': delivery_type, 'width': int_or_none(stream.get('width')), diff --git a/youtube_dl/extractor/radiocanada.py b/youtube_dl/extractor/radiocanada.py new file mode 100644 index 000000000..4f05bbddc --- /dev/null +++ b/youtube_dl/extractor/radiocanada.py @@ -0,0 +1,130 @@ +# coding: utf-8 +from __future__ import unicode_literals + +import re + +from .common import InfoExtractor +from ..utils import ( + xpath_text, + find_xpath_attr, + determine_ext, + int_or_none, + unified_strdate, + xpath_element, + ExtractorError, +) + + +class RadioCanadaIE(InfoExtractor): + IE_NAME = 'radiocanada' + _VALID_URL = r'(?:radiocanada:|https?://ici\.radio-canada\.ca/widgets/mediaconsole/)(?P<app_code>[^:/]+)[:/](?P<id>[0-9]+)' + _TEST = { + 'url': 'http://ici.radio-canada.ca/widgets/mediaconsole/medianet/7184272', + 'info_dict': { + 'id': '7184272', + 'ext': 'flv', + 'title': 'Le parcours du tireur capté sur vidéo', + 'description': 'Images des caméras de surveillance fournies par la GRC montrant le parcours du tireur d\'Ottawa', + 'upload_date': '20141023', + }, + 'params': { + # rtmp download + 'skip_download': True, + }, + } + + def _real_extract(self, url): + app_code, video_id = re.match(self._VALID_URL, url).groups() + + formats = [] + # TODO: extract m3u8 and f4m formats + # m3u8 formats can be extracted using ipad device_type return 403 error code when ffmpeg try to download segements + # f4m formats can be extracted using flashhd device_type but they produce unplayable file + for device_type in ('flash',): + v_data = self._download_xml( + 'http://api.radio-canada.ca/validationMedia/v1/Validation.ashx', + video_id, note='Downloading %s XML' % device_type, query={ + 'appCode': app_code, + 'idMedia': video_id, + 'connectionType': 'broadband', + 'multibitrate': 'true', + 'deviceType': device_type, + # paysJ391wsHjbOJwvCs26toz and bypasslock are used to bypass geo-restriction + 'paysJ391wsHjbOJwvCs26toz': 'CA', + 'bypasslock': 'NZt5K62gRqfc', + }) + v_url = xpath_text(v_data, 'url') + if not v_url: + continue + if v_url == 'null': + raise ExtractorError('%s said: %s' % ( + self.IE_NAME, xpath_text(v_data, 'message')), expected=True) + ext = determine_ext(v_url) + if ext == 'm3u8': + formats.extend(self._extract_m3u8_formats( + v_url, video_id, 'mp4', m3u8_id='hls', fatal=False)) + elif ext == 'f4m': + formats.extend(self._extract_f4m_formats(v_url, video_id, f4m_id='hds', fatal=False)) + else: + ext = determine_ext(v_url) + bitrates = xpath_element(v_data, 'bitrates') + for url_e in bitrates.findall('url'): + tbr = int_or_none(url_e.get('bitrate')) + if not tbr: + continue + formats.append({ + 'format_id': 'rtmp-%d' % tbr, + 'url': re.sub(r'\d+\.%s' % ext, '%d.%s' % (tbr, ext), v_url), + 'ext': 'flv', + 'protocol': 'rtmp', + 'width': int_or_none(url_e.get('width')), + 'height': int_or_none(url_e.get('height')), + 'tbr': tbr, + }) + self._sort_formats(formats) + + metadata = self._download_xml( + 'http://api.radio-canada.ca/metaMedia/v1/index.ashx', + video_id, note='Downloading metadata XML', query={ + 'appCode': app_code, + 'idMedia': video_id, + }) + + def get_meta(name): + el = find_xpath_attr(metadata, './/Meta', 'name', name) + return el.text if el is not None else None + + return { + 'id': video_id, + 'title': get_meta('Title'), + 'description': get_meta('Description') or get_meta('ShortDescription'), + 'thumbnail': get_meta('imageHR') or get_meta('imageMR') or get_meta('imageBR'), + 'duration': int_or_none(get_meta('length')), + 'series': get_meta('Emission'), + 'season_number': int_or_none('SrcSaison'), + 'episode_number': int_or_none('SrcEpisode'), + 'upload_date': unified_strdate(get_meta('Date')), + 'formats': formats, + } + + +class RadioCanadaAudioVideoIE(InfoExtractor): + 'radiocanada:audiovideo' + _VALID_URL = r'https?://ici\.radio-canada\.ca/audio-video/media-(?P<id>[0-9]+)' + _TEST = { + 'url': 'http://ici.radio-canada.ca/audio-video/media-7527184/barack-obama-au-vietnam', + 'info_dict': { + 'id': '7527184', + 'ext': 'flv', + 'title': 'Barack Obama au Vietnam', + 'description': 'Les États-Unis lèvent l\'embargo sur la vente d\'armes qui datait de la guerre du Vietnam', + 'upload_date': '20160523', + }, + 'params': { + # rtmp download + 'skip_download': True, + }, + } + + def _real_extract(self, url): + return self.url_result('radiocanada:medianet:%s' % self._match_id(url)) diff --git a/youtube_dl/extractor/teachingchannel.py b/youtube_dl/extractor/teachingchannel.py index e0477382c..d14d93e3a 100644 --- a/youtube_dl/extractor/teachingchannel.py +++ b/youtube_dl/extractor/teachingchannel.py @@ -11,6 +11,7 @@ class TeachingChannelIE(InfoExtractor): _TEST = { 'url': 'https://www.teachingchannel.org/videos/teacher-teaming-evolution', + 'md5': '3d6361864d7cac20b57c8784da17166f', 'info_dict': { 'id': 'F3bnlzbToeI6pLEfRyrlfooIILUjz4nM', 'ext': 'mp4', @@ -19,9 +20,9 @@ class TeachingChannelIE(InfoExtractor): 'duration': 422.255, }, 'params': { - # m3u8 download 'skip_download': True, }, + 'add_ie': ['Ooyala'], } def _real_extract(self, url): diff --git a/youtube_dl/extractor/veoh.py b/youtube_dl/extractor/veoh.py index 23ce0a0d1..0f5d68738 100644 --- a/youtube_dl/extractor/veoh.py +++ b/youtube_dl/extractor/veoh.py @@ -37,6 +37,7 @@ class VeohIE(InfoExtractor): 'uploader': 'afp-news', 'duration': 123, }, + 'skip': 'This video has been deleted.', }, { 'url': 'http://www.veoh.com/watch/v69525809F6Nc4frX', diff --git a/youtube_dl/extractor/vice.py b/youtube_dl/extractor/vice.py index 95daf4dfd..e2b2ce098 100644 --- a/youtube_dl/extractor/vice.py +++ b/youtube_dl/extractor/vice.py @@ -11,12 +11,14 @@ class ViceIE(InfoExtractor): _TESTS = [{ 'url': 'http://www.vice.com/video/cowboy-capitalists-part-1', + 'md5': 'e9d77741f9e42ba583e683cd170660f7', 'info_dict': { 'id': '43cW1mYzpia9IlestBjVpd23Yu3afAfp', 'ext': 'flv', 'title': 'VICE_COWBOYCAPITALISTS_PART01_v1_VICE_WM_1080p.mov', 'duration': 725.983, }, + 'add_ie': ['Ooyala'], }, { 'url': 'http://www.vice.com/video/how-to-hack-a-car', 'md5': '6fb2989a3fed069fb8eab3401fc2d3c9', @@ -29,6 +31,7 @@ class ViceIE(InfoExtractor): 'uploader': 'Motherboard', 'upload_date': '20140529', }, + 'add_ie': ['Youtube'], }, { 'url': 'https://news.vice.com/video/experimenting-on-animals-inside-the-monkey-lab', 'only_matching': True, diff --git a/youtube_dl/extractor/voxmedia.py b/youtube_dl/extractor/voxmedia.py index 9d73600aa..b1b32ad44 100644 --- a/youtube_dl/extractor/voxmedia.py +++ b/youtube_dl/extractor/voxmedia.py @@ -15,7 +15,8 @@ class VoxMediaIE(InfoExtractor): 'ext': 'mp4', 'title': 'Google\'s new material design direction', 'description': 'md5:2f44f74c4d14a1f800ea73e1c6832ad2', - } + }, + 'add_ie': ['Ooyala'], }, { # data-ooyala-id 'url': 'http://www.theverge.com/2014/10/21/7025853/google-nexus-6-hands-on-photos-video-android-phablet', @@ -25,7 +26,8 @@ class VoxMediaIE(InfoExtractor): 'ext': 'mp4', 'title': 'The Nexus 6: hands-on with Google\'s phablet', 'description': 'md5:87a51fe95ff8cea8b5bdb9ac7ae6a6af', - } + }, + 'add_ie': ['Ooyala'], }, { # volume embed 'url': 'http://www.vox.com/2016/3/31/11336640/mississippi-lgbt-religious-freedom-bill', @@ -35,7 +37,8 @@ class VoxMediaIE(InfoExtractor): 'ext': 'mp4', 'title': 'The new frontier of LGBTQ civil rights, explained', 'description': 'md5:0dc58e94a465cbe91d02950f770eb93f', - } + }, + 'add_ie': ['Ooyala'], }, { # youtube embed 'url': 'http://www.vox.com/2016/3/24/11291692/robot-dance', @@ -48,7 +51,8 @@ class VoxMediaIE(InfoExtractor): 'upload_date': '20160324', 'uploader_id': 'voxdotcom', 'uploader': 'Vox', - } + }, + 'add_ie': ['Youtube'], }, { # SBN.VideoLinkset.entryGroup multiple ooyala embeds 'url': 'http://www.sbnation.com/college-football-recruiting/2015/2/3/7970291/national-signing-day-rationalizations-itll-be-ok-itll-be-ok', diff --git a/youtube_dl/extractor/xhamster.py b/youtube_dl/extractor/xhamster.py index b3547174d..bd8e1af2e 100644 --- a/youtube_dl/extractor/xhamster.py +++ b/youtube_dl/extractor/xhamster.py @@ -12,37 +12,52 @@ from ..utils import ( class XHamsterIE(InfoExtractor): - _VALID_URL = r'(?P<proto>https?)://(?:.+?\.)?xhamster\.com/movies/(?P<id>[0-9]+)/(?P<seo>.+?)\.html(?:\?.*)?' - _TESTS = [ - { - 'url': 'http://xhamster.com/movies/1509445/femaleagent_shy_beauty_takes_the_bait.html', - 'info_dict': { - 'id': '1509445', - 'ext': 'mp4', - 'title': 'FemaleAgent Shy beauty takes the bait', - 'upload_date': '20121014', - 'uploader': 'Ruseful2011', - 'duration': 893.52, - 'age_limit': 18, - } + _VALID_URL = r'(?P<proto>https?)://(?:.+?\.)?xhamster\.com/movies/(?P<id>[0-9]+)/(?P<seo>.*?)\.html(?:\?.*)?' + _TESTS = [{ + 'url': 'http://xhamster.com/movies/1509445/femaleagent_shy_beauty_takes_the_bait.html', + 'md5': '8281348b8d3c53d39fffb377d24eac4e', + 'info_dict': { + 'id': '1509445', + 'ext': 'mp4', + 'title': 'FemaleAgent Shy beauty takes the bait', + 'upload_date': '20121014', + 'uploader': 'Ruseful2011', + 'duration': 893.52, + 'age_limit': 18, }, - { - 'url': 'http://xhamster.com/movies/2221348/britney_spears_sexy_booty.html?hd', - 'info_dict': { - 'id': '2221348', - 'ext': 'mp4', - 'title': 'Britney Spears Sexy Booty', - 'upload_date': '20130914', - 'uploader': 'jojo747400', - 'duration': 200.48, - 'age_limit': 18, - } + }, { + 'url': 'http://xhamster.com/movies/2221348/britney_spears_sexy_booty.html?hd', + 'info_dict': { + 'id': '2221348', + 'ext': 'mp4', + 'title': 'Britney Spears Sexy Booty', + 'upload_date': '20130914', + 'uploader': 'jojo747400', + 'duration': 200.48, + 'age_limit': 18, + }, + 'params': { + 'skip_download': True, + }, + }, { + # empty seo + 'url': 'http://xhamster.com/movies/5667973/.html', + 'info_dict': { + 'id': '5667973', + 'ext': 'mp4', + 'title': '....', + 'upload_date': '20160208', + 'uploader': 'parejafree', + 'duration': 72.0, + 'age_limit': 18, }, - { - 'url': 'https://xhamster.com/movies/2272726/amber_slayed_by_the_knight.html', - 'only_matching': True, + 'params': { + 'skip_download': True, }, - ] + }, { + 'url': 'https://xhamster.com/movies/2272726/amber_slayed_by_the_knight.html', + 'only_matching': True, + }] def _real_extract(self, url): def extract_video_url(webpage, name): @@ -170,7 +185,7 @@ class XHamsterEmbedIE(InfoExtractor): webpage = self._download_webpage(url, video_id) video_url = self._search_regex( - r'href="(https?://xhamster\.com/movies/%s/[^"]+\.html[^"]*)"' % video_id, + r'href="(https?://xhamster\.com/movies/%s/[^"]*\.html[^"]*)"' % video_id, webpage, 'xhamster url', default=None) if not video_url: |