diff options
author | Philipp Hagemeister <phihag@phihag.de> | 2014-08-24 04:47:18 +0200 |
---|---|---|
committer | Philipp Hagemeister <phihag@phihag.de> | 2014-08-24 04:47:28 +0200 |
commit | 4d54ef20a291eabc523119ca60ff05f0962771a4 (patch) | |
tree | 0ccb27f66094659aa6b9b4efc8a8714dbc199308 | |
parent | 54036b399188da60e8ca55c81b14031bc7891934 (diff) |
[ministrygrid] Add extractor (Fixes #2900)
-rw-r--r-- | youtube_dl/extractor/__init__.py | 1 | ||||
-rw-r--r-- | youtube_dl/extractor/generic.py | 13 | ||||
-rw-r--r-- | youtube_dl/extractor/ministrygrid.py | 57 |
3 files changed, 70 insertions, 1 deletions
diff --git a/youtube_dl/extractor/__init__.py b/youtube_dl/extractor/__init__.py index 52354e6e0..9dccbb8ed 100644 --- a/youtube_dl/extractor/__init__.py +++ b/youtube_dl/extractor/__init__.py @@ -184,6 +184,7 @@ from .malemotion import MalemotionIE from .mdr import MDRIE from .metacafe import MetacafeIE from .metacritic import MetacriticIE +from .ministrygrid import MinistryGridIE from .mit import TechTVMITIE, MITIE, OCWMITIE from .mitele import MiTeleIE from .mixcloud import MixcloudIE diff --git a/youtube_dl/extractor/generic.py b/youtube_dl/extractor/generic.py index 8b2d1d033..7fce564c6 100644 --- a/youtube_dl/extractor/generic.py +++ b/youtube_dl/extractor/generic.py @@ -22,6 +22,7 @@ from ..utils import ( smuggle_url, unescapeHTML, unified_strdate, + unsmuggle_url, url_basename, ) from .brightcove import BrightcoveIE @@ -487,7 +488,14 @@ class GenericIE(InfoExtractor): else: assert ':' in default_search return self.url_result(default_search + url) - video_id = os.path.splitext(url.rstrip('/').split('/')[-1])[0] + + url, smuggled_data = unsmuggle_url(url) + force_videoid = None + if smuggled_data and 'force_videoid' in smuggled_data: + force_videoid = smuggled_data['force_videoid'] + video_id = force_videoid + else: + video_id = os.path.splitext(url.rstrip('/').split('/')[-1])[0] self.to_screen('%s: Requesting header' % video_id) @@ -498,6 +506,9 @@ class GenericIE(InfoExtractor): new_url = response.geturl() if url != new_url: self.report_following_redirect(new_url) + if force_videoid: + new_url = smuggle_url( + new_url, {'force_videoid': force_videoid}) return self.url_result(new_url) # Check for direct link to a video diff --git a/youtube_dl/extractor/ministrygrid.py b/youtube_dl/extractor/ministrygrid.py new file mode 100644 index 000000000..949ad11db --- /dev/null +++ b/youtube_dl/extractor/ministrygrid.py @@ -0,0 +1,57 @@ +from __future__ import unicode_literals + +import json +import re + +from .common import InfoExtractor +from ..utils import ( + ExtractorError, + smuggle_url, +) + + +class MinistryGridIE(InfoExtractor): + _VALID_URL = r'https?://www\.ministrygrid.com/([^/?#]*/)*(?P<id>[^/#?]+)/?(?:$|[?#])' + + _TEST = { + 'url': 'http://www.ministrygrid.com/training-viewer/-/training/t4g-2014-conference/the-gospel-by-numbers-4/the-gospel-by-numbers', + 'md5': '844be0d2a1340422759c2a9101bab017', + 'info_dict': { + 'id': '3453494717001', + 'ext': 'mp4', + 'title': 'The Gospel by Numbers', + 'description': 'Coming soon from T4G 2014!', + 'uploader': 'LifeWay Christian Resources (MG)', + }, + } + + def _real_extract(self, url): + mobj = re.match(self._VALID_URL, url) + video_id = mobj.group('id') + + webpage = self._download_webpage(url, video_id) + portlets_json = self._search_regex( + r'Liferay\.Portlet\.list=(\[.+?\])', webpage, 'portlet list') + portlets = json.loads(portlets_json) + pl_id = self._search_regex( + r'<!--\s*p_l_id - ([0-9]+)<br>', webpage, 'p_l_id') + + for i, portlet in enumerate(portlets): + portlet_url = 'http://www.ministrygrid.com/c/portal/render_portlet?p_l_id=%s&p_p_id=%s' % (pl_id, portlet) + portlet_code = self._download_webpage( + portlet_url, video_id, + note='Looking in portlet %s (%d/%d)' % (portlet, i + 1, len(portlets)), + fatal=False) + video_iframe_url = self._search_regex( + r'<iframe.*?src="([^"]+)"', portlet_code, 'video iframe', + default=None) + if video_iframe_url: + surl = smuggle_url( + video_iframe_url, {'force_videoid': video_id}) + return { + '_type': 'url', + 'id': video_id, + 'url': surl, + } + + raise ExtractorError('Could not find video iframe in any portlets') |