diff options
author | Sergey M․ <dstftw@gmail.com> | 2017-09-28 00:29:42 +0700 |
---|---|---|
committer | Sergey M․ <dstftw@gmail.com> | 2017-09-28 00:37:30 +0700 |
commit | 63d990d2859d0e981da2e416097655798334431b (patch) | |
tree | 67c50a9e1abe03abf95ed9ae1ddf7ce90dca724a /youtube_dl/extractor | |
parent | b14b2283a0b9f66f54e862641b5b1cd22c07bd62 (diff) |
[generic] Add support for Video.js embeds
Diffstat (limited to 'youtube_dl/extractor')
-rw-r--r-- | youtube_dl/extractor/generic.py | 56 |
1 files changed, 56 insertions, 0 deletions
diff --git a/youtube_dl/extractor/generic.py b/youtube_dl/extractor/generic.py index 096e2aab4..a3d09a036 100644 --- a/youtube_dl/extractor/generic.py +++ b/youtube_dl/extractor/generic.py @@ -22,6 +22,8 @@ from ..utils import ( HEADRequest, is_html, js_to_json, + KNOWN_EXTENSIONS, + mimetype2ext, orderedSet, sanitized_Request, smuggle_url, @@ -1130,6 +1132,22 @@ class GenericIE(InfoExtractor): 'skip_download': True, } }, + { + # Video.js embed + 'url': 'http://ortcam.com/solidworks-урок-6-настройка-чертежа_33f9b7351.html', + 'info_dict': { + 'id': 'yygqldloqIk', + 'ext': 'mp4', + 'title': 'SolidWorks. Урок 6 Настройка чертежа', + 'description': 'md5:baf95267792646afdbf030e4d06b2ab3', + 'upload_date': '20130314', + 'uploader': 'PROстое3D', + 'uploader_id': 'PROstoe3D', + }, + 'params': { + 'skip_download': True, + }, + }, # rtl.nl embed { 'url': 'http://www.rtlnieuws.nl/nieuws/buitenland/aanslagen-kopenhagen', @@ -2880,6 +2898,44 @@ class GenericIE(InfoExtractor): jwplayer_data, video_id, require_title=False, base_url=url) return merge_dicts(info, info_dict) + # Video.js embed + mobj = re.search( + r'(?s)\bvideojs\s*\(.+?\bplayer\.src\s*\(\s*(\[.+?\])\s*\)\s*;', + webpage) + if mobj is not None: + sources = self._parse_json( + mobj.group(1), video_id, transform_source=js_to_json, + fatal=False) or [] + formats = [] + for source in sources: + src = source.get('src') + if not src or not isinstance(src, compat_str): + continue + src = compat_urlparse.urljoin(url, src) + src_type = source.get('type') + if isinstance(src_type, compat_str): + src_type = src_type.lower() + ext = determine_ext(src).lower() + if src_type == 'video/youtube': + return self.url_result(src, YoutubeIE.ie_key()) + if src_type == 'application/dash+xml' or ext == 'mpd': + formats.extend(self._extract_mpd_formats( + src, video_id, mpd_id='dash', fatal=False)) + elif src_type == 'application/x-mpegurl' or ext == 'm3u8': + formats.extend(self._extract_m3u8_formats( + src, video_id, 'mp4', entry_protocol='m3u8_native', + m3u8_id='hls', fatal=False)) + else: + formats.append({ + 'url': src, + 'ext': (mimetype2ext(src_type) or + ext if ext in KNOWN_EXTENSIONS else 'mp4'), + }) + if formats: + self._sort_formats(formats) + info_dict['formats'] = formats + return info_dict + # Looking for http://schema.org/VideoObject json_ld = self._search_json_ld( webpage, video_id, default={}, expected_type='VideoObject') |