diff options
| author | Sergey M․ <dstftw@gmail.com> | 2017-09-28 00:29:42 +0700 | 
|---|---|---|
| committer | Sergey M․ <dstftw@gmail.com> | 2017-09-28 00:37:30 +0700 | 
| commit | 63d990d2859d0e981da2e416097655798334431b (patch) | |
| tree | 67c50a9e1abe03abf95ed9ae1ddf7ce90dca724a | |
| parent | b14b2283a0b9f66f54e862641b5b1cd22c07bd62 (diff) | |
[generic] Add support for Video.js embeds
| -rw-r--r-- | youtube_dl/extractor/generic.py | 56 | 
1 files changed, 56 insertions, 0 deletions
| diff --git a/youtube_dl/extractor/generic.py b/youtube_dl/extractor/generic.py index 096e2aab4..a3d09a036 100644 --- a/youtube_dl/extractor/generic.py +++ b/youtube_dl/extractor/generic.py @@ -22,6 +22,8 @@ from ..utils import (      HEADRequest,      is_html,      js_to_json, +    KNOWN_EXTENSIONS, +    mimetype2ext,      orderedSet,      sanitized_Request,      smuggle_url, @@ -1130,6 +1132,22 @@ class GenericIE(InfoExtractor):                  'skip_download': True,              }          }, +        { +            # Video.js embed +            'url': 'http://ortcam.com/solidworks-урок-6-настройка-чертежа_33f9b7351.html', +            'info_dict': { +                'id': 'yygqldloqIk', +                'ext': 'mp4', +                'title': 'SolidWorks. Урок 6 Настройка чертежа', +                'description': 'md5:baf95267792646afdbf030e4d06b2ab3', +                'upload_date': '20130314', +                'uploader': 'PROстое3D', +                'uploader_id': 'PROstoe3D', +            }, +            'params': { +                'skip_download': True, +            }, +        },          # rtl.nl embed          {              'url': 'http://www.rtlnieuws.nl/nieuws/buitenland/aanslagen-kopenhagen', @@ -2880,6 +2898,44 @@ class GenericIE(InfoExtractor):                  jwplayer_data, video_id, require_title=False, base_url=url)              return merge_dicts(info, info_dict) +        # Video.js embed +        mobj = re.search( +            r'(?s)\bvideojs\s*\(.+?\bplayer\.src\s*\(\s*(\[.+?\])\s*\)\s*;', +            webpage) +        if mobj is not None: +            sources = self._parse_json( +                mobj.group(1), video_id, transform_source=js_to_json, +                fatal=False) or [] +            formats = [] +            for source in sources: +                src = source.get('src') +                if not src or not isinstance(src, compat_str): +                    continue +                src = compat_urlparse.urljoin(url, src) +                src_type = source.get('type') +                if isinstance(src_type, compat_str): +                    src_type = src_type.lower() +                ext = determine_ext(src).lower() +                if src_type == 'video/youtube': +                    return self.url_result(src, YoutubeIE.ie_key()) +                if src_type == 'application/dash+xml' or ext == 'mpd': +                    formats.extend(self._extract_mpd_formats( +                        src, video_id, mpd_id='dash', fatal=False)) +                elif src_type == 'application/x-mpegurl' or ext == 'm3u8': +                    formats.extend(self._extract_m3u8_formats( +                        src, video_id, 'mp4', entry_protocol='m3u8_native', +                        m3u8_id='hls', fatal=False)) +                else: +                    formats.append({ +                        'url': src, +                        'ext': (mimetype2ext(src_type) or +                                ext if ext in KNOWN_EXTENSIONS else 'mp4'), +                    }) +            if formats: +                self._sort_formats(formats) +                info_dict['formats'] = formats +                return info_dict +          # Looking for http://schema.org/VideoObject          json_ld = self._search_json_ld(              webpage, video_id, default={}, expected_type='VideoObject') | 
