diff options
author | remitamine <remitamine@gmail.com> | 2015-12-21 04:24:58 +0100 |
---|---|---|
committer | remitamine <remitamine@gmail.com> | 2015-12-21 04:24:58 +0100 |
commit | 7cb09524749d4a061acc801d0a2d6ad08463e549 (patch) | |
tree | ab769aa8a8d1cce53103e848334bb79137f3d0b3 /youtube_dl/extractor | |
parent | 78653a33aa00ba5205940c2baac5d9f019795b88 (diff) |
[makertv] improve extraction
Diffstat (limited to 'youtube_dl/extractor')
-rw-r--r-- | youtube_dl/extractor/generic.py | 6 | ||||
-rw-r--r-- | youtube_dl/extractor/jwplatform.py | 10 | ||||
-rw-r--r-- | youtube_dl/extractor/makertv.py | 13 |
3 files changed, 22 insertions, 7 deletions
diff --git a/youtube_dl/extractor/generic.py b/youtube_dl/extractor/generic.py index 4d38b0c9d..f5dd88f54 100644 --- a/youtube_dl/extractor/generic.py +++ b/youtube_dl/extractor/generic.py @@ -53,6 +53,7 @@ from .onionstudios import OnionStudiosIE from .snagfilms import SnagFilmsEmbedIE from .screenwavemedia import ScreenwaveMediaIE from .mtv import MTVServicesEmbeddedIE +from .jwplatform import JWPlatformIE class GenericIE(InfoExtractor): @@ -1787,6 +1788,11 @@ class GenericIE(InfoExtractor): if snagfilms_url: return self.url_result(snagfilms_url) + # Look for JWPlatform embeds + jwplatform_url = JWPlatformIE._extract_url(webpage) + if jwplatform_url: + return self.url_result(jwplatform_url, 'JWPlatform') + # Look for ScreenwaveMedia embeds mobj = re.search(ScreenwaveMediaIE.EMBED_PATTERN, webpage) if mobj is not None: diff --git a/youtube_dl/extractor/jwplatform.py b/youtube_dl/extractor/jwplatform.py index 3a3dc439a..cdc095a79 100644 --- a/youtube_dl/extractor/jwplatform.py +++ b/youtube_dl/extractor/jwplatform.py @@ -1,6 +1,8 @@ # coding: utf-8 from __future__ import unicode_literals +import re + from .common import InfoExtractor from ..utils import int_or_none @@ -23,7 +25,7 @@ class JWPlatformIE(InfoExtractor): @staticmethod def _extract_url(webpage): mobj = re.search( - r'<script[^>]+?src=["\'](?P<url>(?:https?:)?//content.jwplatform.com/players/[a-zA-Z0-9]{8}', + r'<script[^>]+?src=["\'](?P<url>(?:https?:)?//content.jwplatform.com/players/[a-zA-Z0-9]{8})', webpage) if mobj: return mobj.group('url') @@ -42,7 +44,9 @@ class JWPlatformIE(InfoExtractor): source_url = self._proto_relative_url(source['file']) source_type = source.get('type') or '' if source_type == 'application/vnd.apple.mpegurl': - formats.extend(self._extract_m3u8_formats(source_url, video_id, 'mp4', 'm3u8_native', fatal=None)) + m3u8_formats = self._extract_m3u8_formats(source_url, video_id, 'mp4', 'm3u8_native', fatal=None) + if m3u8_formats: + formats.extend(m3u8_formats) elif source_type.startswith('audio'): formats.append({ 'url': source_url, @@ -57,7 +61,7 @@ class JWPlatformIE(InfoExtractor): self._sort_formats(formats) return { - 'id': video_data['mediaid'], + 'id': video_id, 'title': video_data['title'], 'description': video_data.get('description'), 'thumbnail': self._proto_relative_url(video_data.get('image')), diff --git a/youtube_dl/extractor/makertv.py b/youtube_dl/extractor/makertv.py index 0256e4e24..3c34d4604 100644 --- a/youtube_dl/extractor/makertv.py +++ b/youtube_dl/extractor/makertv.py @@ -5,12 +5,12 @@ from .common import InfoExtractor class MakerTVIE(InfoExtractor): - _VALID_URL = r'https?://(?:(?:www\.)?maker\.tv/(?:[^/]+/)?video|http://makerplayer.com/embed/maker)/(?P<id>[a-zA-Z0-9]{12})' + _VALID_URL = r'https?://(?:(?:www\.)?maker\.tv/(?:[^/]+/)*video|makerplayer.com/embed/maker)/(?P<id>[a-zA-Z0-9]{12})' _TEST = { 'url': 'http://www.maker.tv/video/Fh3QgymL9gsc', 'md5': 'ca237a53a8eb20b6dc5bd60564d4ab3e', 'info_dict': { - 'id': 'brOEcGut', + 'id': 'Fh3QgymL9gsc', 'ext': 'mp4', 'title': 'Maze Runner: The Scorch Trials Official Movie Review', 'description': 'md5:11ff3362d7ef1d679fdb649f6413975a', @@ -22,6 +22,11 @@ class MakerTVIE(InfoExtractor): def _real_extract(self, url): video_id = self._match_id(url) webpage = self._download_webpage(url, video_id) - jwplatform_id = self._search_regex([r'jwid="([^"]+)"', r'Maker.jw_id\s*=\s*"([^"]+)";'], webpage, 'jwplatform id') + jwplatform_id = self._search_regex(r'jw_?id="([^"]+)"', webpage, 'jwplatform id') - return self.url_result('jwplatform:%s' % jwplatform_id, 'JWPlatform') + return { + '_type': 'url_transparent', + 'id': video_id, + 'url': 'jwplatform:%s' % jwplatform_id, + 'ie_key': 'JWPlatform', + } |