diff options
author | Sergey M․ <dstftw@gmail.com> | 2017-04-15 21:50:15 +0700 |
---|---|---|
committer | Sergey M․ <dstftw@gmail.com> | 2017-04-15 21:50:51 +0700 |
commit | 8068296276657c9d888338c2211c112d69de6fc4 (patch) | |
tree | 0a5aa9d02b484ae83e214474803b70d5f1c5ebce /youtube_dl/extractor | |
parent | 4db79fa1bc482ef5d97c8b73f0bf3683d5cc383c (diff) |
[streamango] Improve extraction (closes #12643)
Diffstat (limited to 'youtube_dl/extractor')
-rw-r--r-- | youtube_dl/extractor/streamango.py | 50 |
1 files changed, 30 insertions, 20 deletions
diff --git a/youtube_dl/extractor/streamango.py b/youtube_dl/extractor/streamango.py index a4ef06b66..aa4fad162 100644 --- a/youtube_dl/extractor/streamango.py +++ b/youtube_dl/extractor/streamango.py @@ -1,11 +1,18 @@ # coding: utf-8 from __future__ import unicode_literals +import re + from .common import InfoExtractor +from ..utils import ( + determine_ext, + int_or_none, + js_to_json, +) class StreamangoIE(InfoExtractor): - _VALID_URL = r'https?://(?:www\.)?streamango\.com/(?:f|embed)/(?P<id>.+?)/(?:.+)' + _VALID_URL = r'https?://(?:www\.)?streamango\.com/(?:f|embed)/(?P<id>[^/?#&]+)' _TESTS = [{ 'url': 'https://streamango.com/f/clapasobsptpkdfe/20170315_150006_mp4', 'md5': 'e992787515a182f55e38fc97588d802a', @@ -13,7 +20,6 @@ class StreamangoIE(InfoExtractor): 'id': 'clapasobsptpkdfe', 'ext': 'mp4', 'title': '20170315_150006.mp4', - 'url': r're:https://streamango\.com/v/d/clapasobsptpkdfe~[0-9]{10}~(?:[0-9]+\.){3}[0-9]+~.{8}/720', } }, { 'url': 'https://streamango.com/embed/clapasobsptpkdfe/20170315_150006_mp4', @@ -21,29 +27,33 @@ class StreamangoIE(InfoExtractor): }] def _real_extract(self, url): - def extract_url(urltype): - return self._search_regex( - r'type\s*:\s*["\']{}["\']\s*,\s*src\s*:\s*["\'](?P<url>.+?)["\'].*'.format(urltype), - webpage, 'video URL', group='url') - video_id = self._match_id(url) + webpage = self._download_webpage(url, video_id) title = self._og_search_title(webpage) - url = 'https:' + extract_url('video/mp4') - dashurl = extract_url(r'application/dash\+xml') - - formats = [{ - 'url': url, - 'ext': 'mp4', - 'width': 1280, - 'height': 720, - 'format_id': 'mp4', - }] - - formats.extend(self._extract_mpd_formats( - dashurl, video_id, mpd_id='dash', fatal=False)) + formats = [] + for format_ in re.findall(r'({[^}]*\bsrc\s*:\s*[^}]*})', webpage): + video = self._parse_json( + format_, video_id, transform_source=js_to_json, fatal=False) + if not video: + continue + src = video.get('src') + if not src: + continue + ext = determine_ext(src, default_ext=None) + if video.get('type') == 'application/dash+xml' or ext == 'mpd': + formats.extend(self._extract_mpd_formats( + src, video_id, mpd_id='dash', fatal=False)) + else: + formats.append({ + 'url': src, + 'ext': ext or 'mp4', + 'width': int_or_none(video.get('width')), + 'height': int_or_none(video.get('height')), + 'tbr': int_or_none(video.get('bitrate')), + }) self._sort_formats(formats) return { |