aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorSergey M․ <dstftw@gmail.com>2018-09-26 05:38:41 +0700
committerSergey M․ <dstftw@gmail.com>2018-09-26 05:38:41 +0700
commit8fd12a083131550476fb771c180a0734794d0b9d (patch)
treea88f4aef1a1d93899d3af1fc6274ebe17d40eadd
parent60ce0c67fd1ef71463af2c036bbabf06ec26bd98 (diff)
[mediaset] Improve embed support (closes #17668)
-rw-r--r--youtube_dl/extractor/generic.py2
-rw-r--r--youtube_dl/extractor/mediaset.py38
2 files changed, 33 insertions, 7 deletions
diff --git a/youtube_dl/extractor/generic.py b/youtube_dl/extractor/generic.py
index 76ef01332..2a48667f0 100644
--- a/youtube_dl/extractor/generic.py
+++ b/youtube_dl/extractor/generic.py
@@ -3023,7 +3023,7 @@ class GenericIE(InfoExtractor):
wapo_urls, video_id, video_title, ie=WashingtonPostIE.ie_key())
# Look for Mediaset embeds
- mediaset_urls = MediasetIE._extract_urls(webpage)
+ mediaset_urls = MediasetIE._extract_urls(self, webpage)
if mediaset_urls:
return self.playlist_from_matches(
mediaset_urls, video_id, video_title, ie=MediasetIE.ie_key())
diff --git a/youtube_dl/extractor/mediaset.py b/youtube_dl/extractor/mediaset.py
index 57f97409d..df3748798 100644
--- a/youtube_dl/extractor/mediaset.py
+++ b/youtube_dl/extractor/mediaset.py
@@ -4,6 +4,11 @@ from __future__ import unicode_literals
import re
from .theplatform import ThePlatformBaseIE
+from ..compat import (
+ compat_parse_qs,
+ compat_str,
+ compat_urllib_parse_urlparse,
+)
from ..utils import (
ExtractorError,
int_or_none,
@@ -76,12 +81,33 @@ class MediasetIE(ThePlatformBaseIE):
}]
@staticmethod
- def _extract_urls(webpage):
- return [
- mobj.group('url')
- for mobj in re.finditer(
- r'<iframe\b[^>]+\bsrc=(["\'])(?P<url>https?://(?:www\.)?video\.mediaset\.it/player/playerIFrame(?:Twitter)?\.shtml\?.*?\bid=\d+.*?)\1',
- webpage)]
+ def _extract_urls(ie, webpage):
+ def _qs(url):
+ return compat_parse_qs(compat_urllib_parse_urlparse(url).query)
+
+ def _program_guid(qs):
+ return qs.get('programGuid', [None])[0]
+
+ entries = []
+ for mobj in re.finditer(
+ r'<iframe\b[^>]+\bsrc=(["\'])(?P<url>(?:https?:)?//(?:www\.)?video\.mediaset\.it/player/playerIFrame(?:Twitter)?\.shtml.*?)\1',
+ webpage):
+ embed_url = mobj.group('url')
+ embed_qs = _qs(embed_url)
+ program_guid = _program_guid(embed_qs)
+ if program_guid:
+ entries.append(embed_url)
+ continue
+ video_id = embed_qs.get('id', [None])[0]
+ if not video_id:
+ continue
+ urlh = ie._request_webpage(
+ embed_url, video_id, note='Following embed URL redirect')
+ embed_url = compat_str(urlh.geturl())
+ program_guid = _program_guid(_qs(embed_url))
+ if program_guid:
+ entries.append(embed_url)
+ return entries
def _real_extract(self, url):
guid = self._match_id(url)