diff options
| author | Philipp Hagemeister <phihag@phihag.de> | 2014-08-24 04:47:18 +0200 | 
|---|---|---|
| committer | Philipp Hagemeister <phihag@phihag.de> | 2014-08-24 04:47:28 +0200 | 
| commit | 4d54ef20a291eabc523119ca60ff05f0962771a4 (patch) | |
| tree | 0ccb27f66094659aa6b9b4efc8a8714dbc199308 | |
| parent | 54036b399188da60e8ca55c81b14031bc7891934 (diff) | |
[ministrygrid] Add extractor (Fixes #2900)
| -rw-r--r-- | youtube_dl/extractor/__init__.py | 1 | ||||
| -rw-r--r-- | youtube_dl/extractor/generic.py | 13 | ||||
| -rw-r--r-- | youtube_dl/extractor/ministrygrid.py | 57 | 
3 files changed, 70 insertions, 1 deletions
diff --git a/youtube_dl/extractor/__init__.py b/youtube_dl/extractor/__init__.py index 52354e6e0..9dccbb8ed 100644 --- a/youtube_dl/extractor/__init__.py +++ b/youtube_dl/extractor/__init__.py @@ -184,6 +184,7 @@ from .malemotion import MalemotionIE  from .mdr import MDRIE  from .metacafe import MetacafeIE  from .metacritic import MetacriticIE +from .ministrygrid import MinistryGridIE  from .mit import TechTVMITIE, MITIE, OCWMITIE  from .mitele import MiTeleIE  from .mixcloud import MixcloudIE diff --git a/youtube_dl/extractor/generic.py b/youtube_dl/extractor/generic.py index 8b2d1d033..7fce564c6 100644 --- a/youtube_dl/extractor/generic.py +++ b/youtube_dl/extractor/generic.py @@ -22,6 +22,7 @@ from ..utils import (      smuggle_url,      unescapeHTML,      unified_strdate, +    unsmuggle_url,      url_basename,  )  from .brightcove import BrightcoveIE @@ -487,7 +488,14 @@ class GenericIE(InfoExtractor):              else:                  assert ':' in default_search                  return self.url_result(default_search + url) -        video_id = os.path.splitext(url.rstrip('/').split('/')[-1])[0] + +        url, smuggled_data = unsmuggle_url(url) +        force_videoid = None +        if smuggled_data and 'force_videoid' in smuggled_data: +            force_videoid = smuggled_data['force_videoid'] +            video_id = force_videoid +        else: +            video_id = os.path.splitext(url.rstrip('/').split('/')[-1])[0]          self.to_screen('%s: Requesting header' % video_id) @@ -498,6 +506,9 @@ class GenericIE(InfoExtractor):              new_url = response.geturl()              if url != new_url:                  self.report_following_redirect(new_url) +                if force_videoid: +                    new_url = smuggle_url( +                        new_url, {'force_videoid': force_videoid})                  return self.url_result(new_url)              # Check for direct link to a video diff --git a/youtube_dl/extractor/ministrygrid.py b/youtube_dl/extractor/ministrygrid.py new file mode 100644 index 000000000..949ad11db --- /dev/null +++ b/youtube_dl/extractor/ministrygrid.py @@ -0,0 +1,57 @@ +from __future__ import unicode_literals + +import json +import re + +from .common import InfoExtractor +from ..utils import ( +    ExtractorError, +    smuggle_url, +) + + +class MinistryGridIE(InfoExtractor): +    _VALID_URL = r'https?://www\.ministrygrid.com/([^/?#]*/)*(?P<id>[^/#?]+)/?(?:$|[?#])' + +    _TEST = { +        'url': 'http://www.ministrygrid.com/training-viewer/-/training/t4g-2014-conference/the-gospel-by-numbers-4/the-gospel-by-numbers', +        'md5': '844be0d2a1340422759c2a9101bab017', +        'info_dict': { +            'id': '3453494717001', +            'ext': 'mp4', +            'title': 'The Gospel by Numbers', +            'description': 'Coming soon from T4G 2014!', +            'uploader': 'LifeWay Christian Resources (MG)', +        }, +    } + +    def _real_extract(self, url): +        mobj = re.match(self._VALID_URL, url) +        video_id = mobj.group('id') + +        webpage = self._download_webpage(url, video_id) +        portlets_json = self._search_regex( +            r'Liferay\.Portlet\.list=(\[.+?\])', webpage, 'portlet list') +        portlets = json.loads(portlets_json) +        pl_id = self._search_regex( +            r'<!--\s*p_l_id - ([0-9]+)<br>', webpage, 'p_l_id') + +        for i, portlet in enumerate(portlets): +            portlet_url = 'http://www.ministrygrid.com/c/portal/render_portlet?p_l_id=%s&p_p_id=%s' % (pl_id, portlet) +            portlet_code = self._download_webpage( +                portlet_url, video_id, +                note='Looking in portlet %s (%d/%d)' % (portlet, i + 1, len(portlets)), +                fatal=False) +            video_iframe_url = self._search_regex( +                r'<iframe.*?src="([^"]+)"', portlet_code, 'video iframe', +                default=None) +            if video_iframe_url: +                surl = smuggle_url( +                    video_iframe_url, {'force_videoid': video_id}) +                return { +                    '_type': 'url', +                    'id': video_id, +                    'url': surl, +                } + +        raise ExtractorError('Could not find video iframe in any portlets')  | 
