diff options
-rw-r--r-- | youtube_dl/extractor/__init__.py | 1 | ||||
-rw-r--r-- | youtube_dl/extractor/ultimedia.py | 91 |
2 files changed, 92 insertions, 0 deletions
diff --git a/youtube_dl/extractor/__init__.py b/youtube_dl/extractor/__init__.py index 73c17aa84..867e7c935 100644 --- a/youtube_dl/extractor/__init__.py +++ b/youtube_dl/extractor/__init__.py @@ -534,6 +534,7 @@ from .twitch import ( TwitchStreamIE, ) from .ubu import UbuIE +from .ultimedia import UltimediaIE from .udemy import ( UdemyIE, UdemyCourseIE diff --git a/youtube_dl/extractor/ultimedia.py b/youtube_dl/extractor/ultimedia.py new file mode 100644 index 000000000..97e4445d4 --- /dev/null +++ b/youtube_dl/extractor/ultimedia.py @@ -0,0 +1,91 @@ +# coding: utf-8 +from __future__ import unicode_literals + +from .common import InfoExtractor +from ..utils import ( + qualities, + unified_strdate, + clean_html, +) + + +class UltimediaIE(InfoExtractor): + _VALID_URL = r'https?://(?:www\.)?ultimedia\.com/default/index/video[^/]+/id/(?P<id>[\d+a-z]+)' + _TESTS = [{ + # news + 'url': 'https://www.ultimedia.com/default/index/videogeneric/id/s8uk0r', + 'md5': '276a0e49de58c7e85d32b057837952a2', + 'info_dict': { + 'id': 's8uk0r', + 'ext': 'mp4', + 'title': 'Loi sur la fin de vie: le texte prévoit un renforcement des directives anticipées', + 'description': 'md5:3e5c8fd65791487333dda5db8aed32af', + 'thumbnail': 're:^https?://.*\.jpg', + 'upload_date': '20150317', + }, + }, { + # music + 'url': 'https://www.ultimedia.com/default/index/videomusic/id/xvpfp8', + 'md5': '2ea3513813cf230605c7e2ffe7eca61c', + 'info_dict': { + 'id': 'xvpfp8', + 'ext': 'mp4', + 'title': "Two - C'est la vie (Clip)", + 'description': 'Two', + 'thumbnail': 're:^https?://.*\.jpg', + 'upload_date': '20150224', + }, + }] + + def _real_extract(self, url): + video_id = self._match_id(url) + + webpage = self._download_webpage(url, video_id) + + deliver_url = self._search_regex( + r'<iframe[^>]+src="(https?://(?:www\.)?ultimedia\.com/deliver/[^"]+)"', + webpage, 'deliver URL') + + deliver_page = self._download_webpage( + deliver_url, video_id, 'Downloading iframe page') + + player = self._parse_json( + self._search_regex( + r"jwplayer\('player(?:_temp)?'\)\.setup\(({.+?})\)\.on", deliver_page, 'player'), + video_id) + + quality = qualities(['flash', 'html5']) + + formats = [{ + 'url': mode['config']['file'], + 'format_id': mode.get('type'), + 'quality': quality(mode.get('type')), + } for mode in player['modes']] + self._sort_formats(formats) + + thumbnail = player.get('image') + + title = clean_html(( + self._html_search_regex( + r'(?s)<div\s+id="catArticle">.+?</div>(.+?)</h1>', + webpage, 'title', default=None) + or self._search_regex( + r"var\s+nameVideo\s*=\s*'([^']+)'", + deliver_page, 'title'))) + + description = clean_html(self._html_search_regex( + r'(?s)<span>Description</span>(.+?)</p>', webpage, + 'description', fatal=False)) + + upload_date = unified_strdate(self._search_regex( + r'Ajouté le\s*<span>([^<]+)', webpage, + 'upload date', fatal=False)) + + return { + 'id': video_id, + 'title': title, + 'description': description, + 'thumbnail': thumbnail, + 'upload_date': upload_date, + 'formats': formats, + } |