aboutsummaryrefslogtreecommitdiff
path: root/youtube_dl/extractor
diff options
context:
space:
mode:
authorremitamine <remitamine@gmail.com>2015-12-21 04:24:58 +0100
committerremitamine <remitamine@gmail.com>2015-12-21 04:24:58 +0100
commit7cb09524749d4a061acc801d0a2d6ad08463e549 (patch)
treeab769aa8a8d1cce53103e848334bb79137f3d0b3 /youtube_dl/extractor
parent78653a33aa00ba5205940c2baac5d9f019795b88 (diff)
[makertv] improve extraction
Diffstat (limited to 'youtube_dl/extractor')
-rw-r--r--youtube_dl/extractor/generic.py6
-rw-r--r--youtube_dl/extractor/jwplatform.py10
-rw-r--r--youtube_dl/extractor/makertv.py13
3 files changed, 22 insertions, 7 deletions
diff --git a/youtube_dl/extractor/generic.py b/youtube_dl/extractor/generic.py
index 4d38b0c9d..f5dd88f54 100644
--- a/youtube_dl/extractor/generic.py
+++ b/youtube_dl/extractor/generic.py
@@ -53,6 +53,7 @@ from .onionstudios import OnionStudiosIE
from .snagfilms import SnagFilmsEmbedIE
from .screenwavemedia import ScreenwaveMediaIE
from .mtv import MTVServicesEmbeddedIE
+from .jwplatform import JWPlatformIE
class GenericIE(InfoExtractor):
@@ -1787,6 +1788,11 @@ class GenericIE(InfoExtractor):
if snagfilms_url:
return self.url_result(snagfilms_url)
+ # Look for JWPlatform embeds
+ jwplatform_url = JWPlatformIE._extract_url(webpage)
+ if jwplatform_url:
+ return self.url_result(jwplatform_url, 'JWPlatform')
+
# Look for ScreenwaveMedia embeds
mobj = re.search(ScreenwaveMediaIE.EMBED_PATTERN, webpage)
if mobj is not None:
diff --git a/youtube_dl/extractor/jwplatform.py b/youtube_dl/extractor/jwplatform.py
index 3a3dc439a..cdc095a79 100644
--- a/youtube_dl/extractor/jwplatform.py
+++ b/youtube_dl/extractor/jwplatform.py
@@ -1,6 +1,8 @@
# coding: utf-8
from __future__ import unicode_literals
+import re
+
from .common import InfoExtractor
from ..utils import int_or_none
@@ -23,7 +25,7 @@ class JWPlatformIE(InfoExtractor):
@staticmethod
def _extract_url(webpage):
mobj = re.search(
- r'<script[^>]+?src=["\'](?P<url>(?:https?:)?//content.jwplatform.com/players/[a-zA-Z0-9]{8}',
+ r'<script[^>]+?src=["\'](?P<url>(?:https?:)?//content.jwplatform.com/players/[a-zA-Z0-9]{8})',
webpage)
if mobj:
return mobj.group('url')
@@ -42,7 +44,9 @@ class JWPlatformIE(InfoExtractor):
source_url = self._proto_relative_url(source['file'])
source_type = source.get('type') or ''
if source_type == 'application/vnd.apple.mpegurl':
- formats.extend(self._extract_m3u8_formats(source_url, video_id, 'mp4', 'm3u8_native', fatal=None))
+ m3u8_formats = self._extract_m3u8_formats(source_url, video_id, 'mp4', 'm3u8_native', fatal=None)
+ if m3u8_formats:
+ formats.extend(m3u8_formats)
elif source_type.startswith('audio'):
formats.append({
'url': source_url,
@@ -57,7 +61,7 @@ class JWPlatformIE(InfoExtractor):
self._sort_formats(formats)
return {
- 'id': video_data['mediaid'],
+ 'id': video_id,
'title': video_data['title'],
'description': video_data.get('description'),
'thumbnail': self._proto_relative_url(video_data.get('image')),
diff --git a/youtube_dl/extractor/makertv.py b/youtube_dl/extractor/makertv.py
index 0256e4e24..3c34d4604 100644
--- a/youtube_dl/extractor/makertv.py
+++ b/youtube_dl/extractor/makertv.py
@@ -5,12 +5,12 @@ from .common import InfoExtractor
class MakerTVIE(InfoExtractor):
- _VALID_URL = r'https?://(?:(?:www\.)?maker\.tv/(?:[^/]+/)?video|http://makerplayer.com/embed/maker)/(?P<id>[a-zA-Z0-9]{12})'
+ _VALID_URL = r'https?://(?:(?:www\.)?maker\.tv/(?:[^/]+/)*video|makerplayer.com/embed/maker)/(?P<id>[a-zA-Z0-9]{12})'
_TEST = {
'url': 'http://www.maker.tv/video/Fh3QgymL9gsc',
'md5': 'ca237a53a8eb20b6dc5bd60564d4ab3e',
'info_dict': {
- 'id': 'brOEcGut',
+ 'id': 'Fh3QgymL9gsc',
'ext': 'mp4',
'title': 'Maze Runner: The Scorch Trials Official Movie Review',
'description': 'md5:11ff3362d7ef1d679fdb649f6413975a',
@@ -22,6 +22,11 @@ class MakerTVIE(InfoExtractor):
def _real_extract(self, url):
video_id = self._match_id(url)
webpage = self._download_webpage(url, video_id)
- jwplatform_id = self._search_regex([r'jwid="([^"]+)"', r'Maker.jw_id\s*=\s*"([^"]+)";'], webpage, 'jwplatform id')
+ jwplatform_id = self._search_regex(r'jw_?id="([^"]+)"', webpage, 'jwplatform id')
- return self.url_result('jwplatform:%s' % jwplatform_id, 'JWPlatform')
+ return {
+ '_type': 'url_transparent',
+ 'id': video_id,
+ 'url': 'jwplatform:%s' % jwplatform_id,
+ 'ie_key': 'JWPlatform',
+ }