diff options
| author | Sergey M․ <dstftw@gmail.com> | 2017-06-24 18:33:31 +0700 | 
|---|---|---|
| committer | Sergey M․ <dstftw@gmail.com> | 2017-06-24 18:33:31 +0700 | 
| commit | bd65f181532ab4e535b408d3ccf99723534eb326 (patch) | |
| tree | 7c71ade20e913ff9cb539d33fa85b2e42526ce7e | |
| parent | 73af5cc817ff19d21cb432c5a4e9e37dd35a353d (diff) | |
[onetpl] Add support for videos embedded via pulsembed (closes #13482)
| -rw-r--r-- | youtube_dl/extractor/onet.py | 30 | 
1 files changed, 28 insertions, 2 deletions
| diff --git a/youtube_dl/extractor/onet.py b/youtube_dl/extractor/onet.py index 94f57990b..58da1bc27 100644 --- a/youtube_dl/extractor/onet.py +++ b/youtube_dl/extractor/onet.py @@ -11,6 +11,7 @@ from ..utils import (      get_element_by_class,      int_or_none,      js_to_json, +    NO_DEFAULT,      parse_iso8601,      remove_start,      strip_or_none, @@ -199,6 +200,19 @@ class OnetPlIE(InfoExtractor):              'timestamp': 1487078046,          },      }, { +        # embedded via pulsembed +        'url': 'http://film.onet.pl/pensjonat-nad-rozlewiskiem-relacja-z-planu-serialu/y428n0', +        'info_dict': { +            'id': '501235.965429946', +            'ext': 'mp4', +            'title': '"Pensjonat nad rozlewiskiem": relacja z planu serialu', +            'upload_date': '20170622', +            'timestamp': 1498159955, +        }, +        'params': { +            'skip_download': True, +        }, +    }, {          'url': 'http://film.onet.pl/zwiastuny/ghost-in-the-shell-drugi-zwiastun-pl/5q6yl3',          'only_matching': True,      }, { @@ -212,13 +226,25 @@ class OnetPlIE(InfoExtractor):          'only_matching': True,      }] +    def _search_mvp_id(self, webpage, default=NO_DEFAULT): +        return self._search_regex( +            r'data-(?:params-)?mvp=["\'](\d+\.\d+)', webpage, 'mvp id', +            default=default) +      def _real_extract(self, url):          video_id = self._match_id(url)          webpage = self._download_webpage(url, video_id) -        mvp_id = self._search_regex( -            r'data-params-mvp=["\'](\d+\.\d+)', webpage, 'mvp id') +        mvp_id = self._search_mvp_id(webpage, default=None) + +        if not mvp_id: +            pulsembed_url = self._search_regex( +                r'data-src=(["\'])(?P<url>(?:https?:)?//pulsembed\.eu/.+?)\1', +                webpage, 'pulsembed url', group='url') +            webpage = self._download_webpage( +                pulsembed_url, video_id, 'Downloading pulsembed webpage') +            mvp_id = self._search_mvp_id(webpage)          return self.url_result(              'onetmvp:%s' % mvp_id, OnetMVPIE.ie_key(), video_id=mvp_id) | 
