diff options
-rw-r--r-- | youtube_dl/extractor/aol.py | 10 | ||||
-rw-r--r-- | youtube_dl/extractor/arte.py | 20 | ||||
-rw-r--r-- | youtube_dl/extractor/extractors.py | 4 | ||||
-rw-r--r-- | youtube_dl/extractor/ministrygrid.py | 34 | ||||
-rw-r--r-- | youtube_dl/extractor/tdslifeway.py | 33 | ||||
-rw-r--r-- | youtube_dl/extractor/ubu.py | 57 | ||||
-rw-r--r-- | youtube_dl/extractor/wayofthemaster.py | 52 | ||||
-rw-r--r-- | youtube_dl/extractor/xboxclips.py | 2 |
8 files changed, 81 insertions, 131 deletions
diff --git a/youtube_dl/extractor/aol.py b/youtube_dl/extractor/aol.py index d4801a25b..24df8fe93 100644 --- a/youtube_dl/extractor/aol.py +++ b/youtube_dl/extractor/aol.py @@ -12,9 +12,10 @@ from ..utils import ( class AolIE(InfoExtractor): IE_NAME = 'on.aol.com' - _VALID_URL = r'(?:aol-video:|https?://on\.aol\.com/video/.*-)(?P<id>[^/?-]+)' + _VALID_URL = r'(?:aol-video:|https?://on\.aol\.com/.*-)(?P<id>[^/?-]+)' _TESTS = [{ + # video with 5min ID 'url': 'http://on.aol.com/video/u-s--official-warns-of-largest-ever-irs-phone-scam-518167793?icid=OnHomepageC2Wide_MustSee_Img', 'md5': '18ef68f48740e86ae94b98da815eec42', 'info_dict': { @@ -31,6 +32,7 @@ class AolIE(InfoExtractor): 'skip_download': True, } }, { + # video with vidible ID 'url': 'http://on.aol.com/video/netflix-is-raising-rates-5707d6b8e4b090497b04f706?context=PC:homepage:PL1944:1460189336183', 'info_dict': { 'id': '5707d6b8e4b090497b04f706', @@ -45,6 +47,12 @@ class AolIE(InfoExtractor): # m3u8 download 'skip_download': True, } + }, { + 'url': 'http://on.aol.com/partners/abc-551438d309eab105804dbfe8/sneak-peek-was-haley-really-framed-570eaebee4b0448640a5c944', + 'only_matching': True, + }, { + 'url': 'http://on.aol.com/shows/park-bench-shw518173474-559a1b9be4b0c3bfad3357a7?context=SH:SHW518173474:PL4327:1460619712763', + 'only_matching': True, }] def _real_extract(self, url): diff --git a/youtube_dl/extractor/arte.py b/youtube_dl/extractor/arte.py index f042d9163..a9e3266dc 100644 --- a/youtube_dl/extractor/arte.py +++ b/youtube_dl/extractor/arte.py @@ -210,7 +210,7 @@ class ArteTVPlus7IE(InfoExtractor): # It also uses the arte_vp_url url from the webpage to extract the information class ArteTVCreativeIE(ArteTVPlus7IE): IE_NAME = 'arte.tv:creative' - _VALID_URL = r'https?://creative\.arte\.tv/(?P<lang>fr|de|en|es)/(?:magazine?/)?(?P<id>[^/?#&]+)' + _VALID_URL = r'https?://creative\.arte\.tv/(?P<lang>fr|de|en|es)/(?:[^/]+/)*(?P<id>[^/?#&]+)' _TESTS = [{ 'url': 'http://creative.arte.tv/de/magazin/agentur-amateur-corporate-design', @@ -229,9 +229,27 @@ class ArteTVCreativeIE(ArteTVPlus7IE): 'description': 'Événement ! Quarante-cinq ans après leurs premiers succès, les légendaires Monty Python remontent sur scène.\n', 'upload_date': '20140805', } + }, { + 'url': 'http://creative.arte.tv/de/episode/agentur-amateur-4-der-erste-kunde', + 'only_matching': True, }] +class ArteTVInfoIE(ArteTVPlus7IE): + IE_NAME = 'arte.tv:info' + _VALID_URL = r'https?://info\.arte\.tv/(?P<lang>fr|de|en|es)/(?:[^/]+/)*(?P<id>[^/?#&]+)' + + _TEST = { + 'url': 'http://info.arte.tv/fr/service-civique-un-cache-misere', + 'info_dict': { + 'id': '067528-000-A', + 'ext': 'mp4', + 'title': 'Service civique, un cache misère ?', + 'upload_date': '20160403', + }, + } + + class ArteTVFutureIE(ArteTVPlus7IE): IE_NAME = 'arte.tv:future' _VALID_URL = r'https?://future\.arte\.tv/(?P<lang>fr|de|en|es)/(?P<id>[^/?#&]+)' diff --git a/youtube_dl/extractor/extractors.py b/youtube_dl/extractor/extractors.py index b06b717f5..51c9a4719 100644 --- a/youtube_dl/extractor/extractors.py +++ b/youtube_dl/extractor/extractors.py @@ -46,6 +46,7 @@ from .arte import ( ArteTVPlus7IE, ArteTVCreativeIE, ArteTVConcertIE, + ArteTVInfoIE, ArteTVFutureIE, ArteTVCinemaIE, ArteTVDDCIE, @@ -733,6 +734,7 @@ from .sztvhu import SztvHuIE from .tagesschau import TagesschauIE from .tapely import TapelyIE from .tass import TassIE +from .tdslifeway import TDSLifewayIE from .teachertube import ( TeacherTubeIE, TeacherTubeUserIE, @@ -835,7 +837,6 @@ from .twitter import ( TwitterIE, TwitterAmplifyIE, ) -from .ubu import UbuIE from .udemy import ( UdemyIE, UdemyCourseIE @@ -920,7 +921,6 @@ from .vulture import VultureIE from .walla import WallaIE from .washingtonpost import WashingtonPostIE from .wat import WatIE -from .wayofthemaster import WayOfTheMasterIE from .wdr import ( WDRIE, WDRMobileIE, diff --git a/youtube_dl/extractor/ministrygrid.py b/youtube_dl/extractor/ministrygrid.py index 949ad11db..e48eba3fa 100644 --- a/youtube_dl/extractor/ministrygrid.py +++ b/youtube_dl/extractor/ministrygrid.py @@ -1,8 +1,5 @@ from __future__ import unicode_literals -import json -import re - from .common import InfoExtractor from ..utils import ( ExtractorError, @@ -20,21 +17,28 @@ class MinistryGridIE(InfoExtractor): 'id': '3453494717001', 'ext': 'mp4', 'title': 'The Gospel by Numbers', + 'thumbnail': 're:^https?://.*\.jpg', + 'upload_date': '20140410', 'description': 'Coming soon from T4G 2014!', - 'uploader': 'LifeWay Christian Resources (MG)', + 'uploader_id': '2034960640001', + 'timestamp': 1397145591, + }, + 'params': { + # m3u8 download + 'skip_download': True, }, + 'add_ie': ['TDSLifeway'], } def _real_extract(self, url): - mobj = re.match(self._VALID_URL, url) - video_id = mobj.group('id') + video_id = self._match_id(url) webpage = self._download_webpage(url, video_id) - portlets_json = self._search_regex( - r'Liferay\.Portlet\.list=(\[.+?\])', webpage, 'portlet list') - portlets = json.loads(portlets_json) + portlets = self._parse_json(self._search_regex( + r'Liferay\.Portlet\.list=(\[.+?\])', webpage, 'portlet list'), + video_id) pl_id = self._search_regex( - r'<!--\s*p_l_id - ([0-9]+)<br>', webpage, 'p_l_id') + r'getPlid:function\(\){return"(\d+)"}', webpage, 'p_l_id') for i, portlet in enumerate(portlets): portlet_url = 'http://www.ministrygrid.com/c/portal/render_portlet?p_l_id=%s&p_p_id=%s' % (pl_id, portlet) @@ -46,12 +50,8 @@ class MinistryGridIE(InfoExtractor): r'<iframe.*?src="([^"]+)"', portlet_code, 'video iframe', default=None) if video_iframe_url: - surl = smuggle_url( - video_iframe_url, {'force_videoid': video_id}) - return { - '_type': 'url', - 'id': video_id, - 'url': surl, - } + return self.url_result( + smuggle_url(video_iframe_url, {'force_videoid': video_id}), + video_id=video_id) raise ExtractorError('Could not find video iframe in any portlets') diff --git a/youtube_dl/extractor/tdslifeway.py b/youtube_dl/extractor/tdslifeway.py new file mode 100644 index 000000000..4d1f5c801 --- /dev/null +++ b/youtube_dl/extractor/tdslifeway.py @@ -0,0 +1,33 @@ +from __future__ import unicode_literals + +from .common import InfoExtractor + + +class TDSLifewayIE(InfoExtractor): + _VALID_URL = r'https?://tds\.lifeway\.com/v1/trainingdeliverysystem/courses/(?P<id>\d+)/index\.html' + + _TEST = { + # From http://www.ministrygrid.com/training-viewer/-/training/t4g-2014-conference/the-gospel-by-numbers-4/the-gospel-by-numbers + 'url': 'http://tds.lifeway.com/v1/trainingdeliverysystem/courses/3453494717001/index.html?externalRegistration=AssetId%7C34F466F1-78F3-4619-B2AB-A8EFFA55E9E9%21InstanceId%7C0%21UserId%7Caaaaaaaa-aaaa-aaaa-aaaa-aaaaaaaaaaaa&grouping=http%3A%2F%2Flifeway.com%2Fvideo%2F3453494717001&activity_id=http%3A%2F%2Flifeway.com%2Fvideo%2F3453494717001&content_endpoint=http%3A%2F%2Ftds.lifeway.com%2Fv1%2Ftrainingdeliverysystem%2FScormEngineInterface%2FTCAPI%2Fcontent%2F&actor=%7B%22name%22%3A%5B%22Guest%20Guest%22%5D%2C%22account%22%3A%5B%7B%22accountServiceHomePage%22%3A%22http%3A%2F%2Fscorm.lifeway.com%2F%22%2C%22accountName%22%3A%22aaaaaaaa-aaaa-aaaa-aaaa-aaaaaaaaaaaa%22%7D%5D%2C%22objectType%22%3A%22Agent%22%7D&content_token=462a50b2-b6f9-4970-99b1-930882c499fb®istration=93d6ec8e-7f7b-4ed3-bbc8-a857913c0b2a&externalConfiguration=access%7CFREE%21adLength%7C-1%21assignOrgId%7C4AE36F78-299A-425D-91EF-E14A899B725F%21assignOrgParentId%7C%21courseId%7C%21isAnonymous%7Cfalse%21previewAsset%7Cfalse%21previewLength%7C-1%21previewMode%7Cfalse%21royalty%7CFREE%21sessionId%7C671422F9-8E79-48D4-9C2C-4EE6111EA1CD%21trackId%7C&auth=Basic%20OjhmZjk5MDBmLTBlYTMtNDJhYS04YjFlLWE4MWQ3NGNkOGRjYw%3D%3D&endpoint=http%3A%2F%2Ftds.lifeway.com%2Fv1%2Ftrainingdeliverysystem%2FScormEngineInterface%2FTCAPI%2F', + 'info_dict': { + 'id': '3453494717001', + 'ext': 'mp4', + 'title': 'The Gospel by Numbers', + 'thumbnail': 're:^https?://.*\.jpg', + 'upload_date': '20140410', + 'description': 'Coming soon from T4G 2014!', + 'uploader_id': '2034960640001', + 'timestamp': 1397145591, + }, + 'params': { + # m3u8 download + 'skip_download': True, + }, + 'add_ie': ['BrightcoveNew'], + } + + BRIGHTCOVE_URL_TEMPLATE = 'http://players.brightcove.net/2034960640001/default_default/index.html?videoId=%s' + + def _real_extract(self, url): + brightcove_id = self._match_id(url) + return self.url_result(self.BRIGHTCOVE_URL_TEMPLATE % brightcove_id, 'BrightcoveNew', brightcove_id) diff --git a/youtube_dl/extractor/ubu.py b/youtube_dl/extractor/ubu.py deleted file mode 100644 index 1d52cbc98..000000000 --- a/youtube_dl/extractor/ubu.py +++ /dev/null @@ -1,57 +0,0 @@ -from __future__ import unicode_literals - -import re - -from .common import InfoExtractor -from ..utils import ( - int_or_none, - qualities, -) - - -class UbuIE(InfoExtractor): - _VALID_URL = r'https?://(?:www\.)?ubu\.com/film/(?P<id>[\da-z_-]+)\.html' - _TEST = { - 'url': 'http://ubu.com/film/her_noise.html', - 'md5': '138d5652618bf0f03878978db9bef1ee', - 'info_dict': { - 'id': 'her_noise', - 'ext': 'm4v', - 'title': 'Her Noise - The Making Of (2007)', - 'duration': 3600, - }, - } - - def _real_extract(self, url): - video_id = self._match_id(url) - webpage = self._download_webpage(url, video_id) - - title = self._html_search_regex( - r'<title>.+?Film & Video: ([^<]+)</title>', webpage, 'title') - - duration = int_or_none(self._html_search_regex( - r'Duration: (\d+) minutes', webpage, 'duration', fatal=False), - invscale=60) - - formats = [] - FORMAT_REGEXES = [ - ('sq', r"'flashvars'\s*,\s*'file=([^']+)'"), - ('hq', r'href="(http://ubumexico\.centro\.org\.mx/video/[^"]+)"'), - ] - preference = qualities([fid for fid, _ in FORMAT_REGEXES]) - for format_id, format_regex in FORMAT_REGEXES: - m = re.search(format_regex, webpage) - if m: - formats.append({ - 'url': m.group(1), - 'format_id': format_id, - 'preference': preference(format_id), - }) - self._sort_formats(formats) - - return { - 'id': video_id, - 'title': title, - 'duration': duration, - 'formats': formats, - } diff --git a/youtube_dl/extractor/wayofthemaster.py b/youtube_dl/extractor/wayofthemaster.py deleted file mode 100644 index af7bb8b49..000000000 --- a/youtube_dl/extractor/wayofthemaster.py +++ /dev/null @@ -1,52 +0,0 @@ -from __future__ import unicode_literals - -import re - -from .common import InfoExtractor - - -class WayOfTheMasterIE(InfoExtractor): - _VALID_URL = r'https?://www\.wayofthemaster\.com/([^/?#]*/)*(?P<id>[^/?#]+)\.s?html(?:$|[?#])' - - _TEST = { - 'url': 'http://www.wayofthemaster.com/hbks.shtml', - 'md5': '5316b57487ada8480606a93cb3d18d24', - 'info_dict': { - 'id': 'hbks', - 'ext': 'mp4', - 'title': 'Intelligent Design vs. Evolution', - }, - } - - def _real_extract(self, url): - mobj = re.match(self._VALID_URL, url) - video_id = mobj.group('id') - - webpage = self._download_webpage(url, video_id) - - title = self._search_regex( - r'<img src="images/title_[^"]+".*?alt="([^"]+)"', - webpage, 'title', default=None) - if title is None: - title = self._html_search_regex( - r'<title>(.*?)</title>', webpage, 'page title') - - url_base = self._search_regex( - r'<param\s+name="?movie"?\s+value=".*?/wotm_videoplayer_highlow[0-9]*\.swf\?vid=([^"]+)"', - webpage, 'URL base') - formats = [{ - 'format_id': 'low', - 'quality': 1, - 'url': url_base + '_low.mp4', - }, { - 'format_id': 'high', - 'quality': 2, - 'url': url_base + '_high.mp4', - }] - self._sort_formats(formats) - - return { - 'id': video_id, - 'title': title, - 'formats': formats, - } diff --git a/youtube_dl/extractor/xboxclips.py b/youtube_dl/extractor/xboxclips.py index 236ff403b..b113ab1c4 100644 --- a/youtube_dl/extractor/xboxclips.py +++ b/youtube_dl/extractor/xboxclips.py @@ -12,7 +12,7 @@ from ..utils import ( class XboxClipsIE(InfoExtractor): _VALID_URL = r'https?://(?:www\.)?xboxclips\.com/(?:video\.php\?.*vid=|[^/]+/)(?P<id>[\w-]{36})' _TEST = { - 'url': 'https://xboxclips.com/video.php?uid=2533274823424419&gamertag=Iabdulelah&vid=074a69a9-5faf-46aa-b93b-9909c1720325', + 'url': 'http://xboxclips.com/video.php?uid=2533274823424419&gamertag=Iabdulelah&vid=074a69a9-5faf-46aa-b93b-9909c1720325', 'md5': 'fbe1ec805e920aeb8eced3c3e657df5d', 'info_dict': { 'id': '074a69a9-5faf-46aa-b93b-9909c1720325', |