diff options
author | Sergey M․ <dstftw@gmail.com> | 2017-06-24 18:33:31 +0700 |
---|---|---|
committer | Sergey M․ <dstftw@gmail.com> | 2017-06-24 18:33:31 +0700 |
commit | bd65f181532ab4e535b408d3ccf99723534eb326 (patch) | |
tree | 7c71ade20e913ff9cb539d33fa85b2e42526ce7e | |
parent | 73af5cc817ff19d21cb432c5a4e9e37dd35a353d (diff) |
[onetpl] Add support for videos embedded via pulsembed (closes #13482)
-rw-r--r-- | youtube_dl/extractor/onet.py | 30 |
1 files changed, 28 insertions, 2 deletions
diff --git a/youtube_dl/extractor/onet.py b/youtube_dl/extractor/onet.py index 94f57990b..58da1bc27 100644 --- a/youtube_dl/extractor/onet.py +++ b/youtube_dl/extractor/onet.py @@ -11,6 +11,7 @@ from ..utils import ( get_element_by_class, int_or_none, js_to_json, + NO_DEFAULT, parse_iso8601, remove_start, strip_or_none, @@ -199,6 +200,19 @@ class OnetPlIE(InfoExtractor): 'timestamp': 1487078046, }, }, { + # embedded via pulsembed + 'url': 'http://film.onet.pl/pensjonat-nad-rozlewiskiem-relacja-z-planu-serialu/y428n0', + 'info_dict': { + 'id': '501235.965429946', + 'ext': 'mp4', + 'title': '"Pensjonat nad rozlewiskiem": relacja z planu serialu', + 'upload_date': '20170622', + 'timestamp': 1498159955, + }, + 'params': { + 'skip_download': True, + }, + }, { 'url': 'http://film.onet.pl/zwiastuny/ghost-in-the-shell-drugi-zwiastun-pl/5q6yl3', 'only_matching': True, }, { @@ -212,13 +226,25 @@ class OnetPlIE(InfoExtractor): 'only_matching': True, }] + def _search_mvp_id(self, webpage, default=NO_DEFAULT): + return self._search_regex( + r'data-(?:params-)?mvp=["\'](\d+\.\d+)', webpage, 'mvp id', + default=default) + def _real_extract(self, url): video_id = self._match_id(url) webpage = self._download_webpage(url, video_id) - mvp_id = self._search_regex( - r'data-params-mvp=["\'](\d+\.\d+)', webpage, 'mvp id') + mvp_id = self._search_mvp_id(webpage, default=None) + + if not mvp_id: + pulsembed_url = self._search_regex( + r'data-src=(["\'])(?P<url>(?:https?:)?//pulsembed\.eu/.+?)\1', + webpage, 'pulsembed url', group='url') + webpage = self._download_webpage( + pulsembed_url, video_id, 'Downloading pulsembed webpage') + mvp_id = self._search_mvp_id(webpage) return self.url_result( 'onetmvp:%s' % mvp_id, OnetMVPIE.ie_key(), video_id=mvp_id) |