diff options
author | remitamine <remitamine@gmail.com> | 2015-12-03 20:05:11 +0100 |
---|---|---|
committer | remitamine <remitamine@gmail.com> | 2015-12-03 20:05:11 +0100 |
commit | 640bb54e73779f4a941eae1f17e4be049ca575db (patch) | |
tree | ba0c50db3ad5bb0932a56cda66d4f386b145bb22 /youtube_dl/extractor/movieclips.py | |
parent | 497f5fd93fe1efd0df8dc58d518c328ed1409457 (diff) | |
parent | e0977d7686e5df524b1a024484e7a4bb9cfa261d (diff) |
Merge branch 'master' of https://github.com/rg3/youtube-dl into bilibili
Diffstat (limited to 'youtube_dl/extractor/movieclips.py')
-rw-r--r-- | youtube_dl/extractor/movieclips.py | 80 |
1 files changed, 20 insertions, 60 deletions
diff --git a/youtube_dl/extractor/movieclips.py b/youtube_dl/extractor/movieclips.py index 04e17d055..1564cb71f 100644 --- a/youtube_dl/extractor/movieclips.py +++ b/youtube_dl/extractor/movieclips.py @@ -1,80 +1,40 @@ +# coding: utf-8 from __future__ import unicode_literals -import re - from .common import InfoExtractor -from ..compat import ( - compat_str, -) -from ..utils import ( - ExtractorError, - clean_html, -) +from ..utils import sanitized_Request class MovieClipsIE(InfoExtractor): - _VALID_URL = r'https?://movieclips\.com/(?P<id>[\da-zA-Z]+)(?:-(?P<display_id>[\da-z-]+))?' + _VALID_URL = r'https?://(?:www.)?movieclips\.com/videos/(?P<id>[^/?#]+)' _TEST = { - 'url': 'http://movieclips.com/Wy7ZU-my-week-with-marilyn-movie-do-you-love-me/', + 'url': 'http://www.movieclips.com/videos/warcraft-trailer-1-561180739597?autoPlay=true&playlistId=5', 'info_dict': { - 'id': 'Wy7ZU', - 'display_id': 'my-week-with-marilyn-movie-do-you-love-me', + 'id': 'pKIGmG83AqD9', + 'display_id': 'warcraft-trailer-1-561180739597', 'ext': 'mp4', - 'title': 'My Week with Marilyn - Do You Love Me?', - 'description': 'md5:e86795bd332fe3cff461e7c8dc542acb', + 'title': 'Warcraft Trailer 1', + 'description': 'Watch Trailer 1 from Warcraft (2016). Legendary’s WARCRAFT is a 3D epic adventure of world-colliding conflict based.', 'thumbnail': 're:^https?://.*\.jpg$', }, - 'params': { - # rtmp download - 'skip_download': True, - } + 'add_ie': ['ThePlatform'], } def _real_extract(self, url): - mobj = re.match(self._VALID_URL, url) - video_id = mobj.group('id') - display_id = mobj.group('display_id') - show_id = display_id or video_id - - config = self._download_xml( - 'http://config.movieclips.com/player/config/%s' % video_id, - show_id, 'Downloading player config') - - if config.find('./country-region').text == 'false': - raise ExtractorError( - '%s said: %s' % (self.IE_NAME, config.find('./region_alert').text), expected=True) - - properties = config.find('./video/properties') - smil_file = properties.attrib['smil_file'] + display_id = self._match_id(url) - smil = self._download_xml(smil_file, show_id, 'Downloading SMIL') - base_url = smil.find('./head/meta').attrib['base'] - - formats = [] - for video in smil.findall('./body/switch/video'): - vbr = int(video.attrib['system-bitrate']) / 1000 - src = video.attrib['src'] - formats.append({ - 'url': base_url, - 'play_path': src, - 'ext': src.split(':')[0], - 'vbr': vbr, - 'format_id': '%dk' % vbr, - }) - - self._sort_formats(formats) - - title = '%s - %s' % (properties.attrib['clip_movie_title'], properties.attrib['clip_title']) - description = clean_html(compat_str(properties.attrib['clip_description'])) - thumbnail = properties.attrib['image'] - categories = properties.attrib['clip_categories'].split(',') + req = sanitized_Request(url) + # it doesn't work if it thinks the browser it's too old + req.add_header('User-Agent', 'Mozilla/5.0 (X11; Linux x86_64; rv:10.0) Gecko/20150101 Firefox/43.0 (Chrome)') + webpage = self._download_webpage(req, display_id) + theplatform_link = self._html_search_regex(r'src="(http://player.theplatform.com/p/.*?)"', webpage, 'theplatform link') + title = self._html_search_regex(r'<title[^>]*>([^>]+)-\s*\d+\s*|\s*Movieclips.com</title>', webpage, 'title') + description = self._html_search_meta('description', webpage) return { - 'id': video_id, - 'display_id': display_id, + '_type': 'url_transparent', + 'url': theplatform_link, 'title': title, + 'display_id': display_id, 'description': description, - 'thumbnail': thumbnail, - 'categories': categories, - 'formats': formats, } |