aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorSergey M․ <dstftw@gmail.com>2020-06-06 02:14:35 +0700
committerSergey M․ <dstftw@gmail.com>2020-06-06 02:14:35 +0700
commit562de77f41d0c08df9dbb08cfa86ba6c7d239c5a (patch)
tree42c89944b847cc028c55656605577bb49e56e734
parente1723c4bac4e465991789b5a29beb946d872f508 (diff)
[kaltura] Add support for multiple embeds on a webpage (closes #25523)
-rw-r--r--youtube_dl/extractor/generic.py18
-rw-r--r--youtube_dl/extractor/kaltura.py19
2 files changed, 28 insertions, 9 deletions
diff --git a/youtube_dl/extractor/generic.py b/youtube_dl/extractor/generic.py
index ce8252f6a..355067a50 100644
--- a/youtube_dl/extractor/generic.py
+++ b/youtube_dl/extractor/generic.py
@@ -1709,6 +1709,15 @@ class GenericIE(InfoExtractor):
'add_ie': ['Kaltura'],
},
{
+ # multiple kaltura embeds, nsfw
+ 'url': 'https://www.quartier-rouge.be/prive/femmes/kamila-avec-video-jaime-sadomie.html',
+ 'info_dict': {
+ 'id': 'kamila-avec-video-jaime-sadomie',
+ 'title': "Kamila avec vídeo “J'aime sadomie”",
+ },
+ 'playlist_count': 8,
+ },
+ {
# Non-standard Vimeo embed
'url': 'https://openclassrooms.com/courses/understanding-the-web',
'md5': '64d86f1c7d369afd9a78b38cbb88d80a',
@@ -2844,9 +2853,12 @@ class GenericIE(InfoExtractor):
return self.url_result(mobj.group('url'), 'Zapiks')
# Look for Kaltura embeds
- kaltura_url = KalturaIE._extract_url(webpage)
- if kaltura_url:
- return self.url_result(smuggle_url(kaltura_url, {'source_url': url}), KalturaIE.ie_key())
+ kaltura_urls = KalturaIE._extract_urls(webpage)
+ if kaltura_urls:
+ return self.playlist_from_matches(
+ kaltura_urls, video_id, video_title,
+ getter=lambda x: smuggle_url(x, {'source_url': url}),
+ ie=KalturaIE.ie_key())
# Look for EaglePlatform embeds
eagleplatform_url = EaglePlatformIE._extract_url(webpage)
diff --git a/youtube_dl/extractor/kaltura.py b/youtube_dl/extractor/kaltura.py
index 2d38b758b..49d13460d 100644
--- a/youtube_dl/extractor/kaltura.py
+++ b/youtube_dl/extractor/kaltura.py
@@ -113,9 +113,14 @@ class KalturaIE(InfoExtractor):
@staticmethod
def _extract_url(webpage):
+ urls = KalturaIE._extract_urls(webpage)
+ return urls[0] if urls else None
+
+ @staticmethod
+ def _extract_urls(webpage):
# Embed codes: https://knowledge.kaltura.com/embedding-kaltura-media-players-your-site
- mobj = (
- re.search(
+ finditer = (
+ re.finditer(
r"""(?xs)
kWidget\.(?:thumb)?[Ee]mbed\(
\{.*?
@@ -124,7 +129,7 @@ class KalturaIE(InfoExtractor):
(?P<q3>['"])entry_?[Ii]d(?P=q3)\s*:\s*
(?P<q4>['"])(?P<id>(?:(?!(?P=q4)).)+)(?P=q4)(?:,|\s*\})
""", webpage)
- or re.search(
+ or re.finditer(
r'''(?xs)
(?P<q1>["'])
(?:https?:)?//cdnapi(?:sec)?\.kaltura\.com(?::\d+)?/(?:(?!(?P=q1)).)*\b(?:p|partner_id)/(?P<partner_id>\d+)(?:(?!(?P=q1)).)*
@@ -138,7 +143,7 @@ class KalturaIE(InfoExtractor):
)
(?P<q3>["'])(?P<id>(?:(?!(?P=q3)).)+)(?P=q3)
''', webpage)
- or re.search(
+ or re.finditer(
r'''(?xs)
<(?:iframe[^>]+src|meta[^>]+\bcontent)=(?P<q1>["'])
(?:https?:)?//(?:(?:www|cdnapi(?:sec)?)\.)?kaltura\.com/(?:(?!(?P=q1)).)*\b(?:p|partner_id)/(?P<partner_id>\d+)
@@ -148,7 +153,8 @@ class KalturaIE(InfoExtractor):
(?P=q1)
''', webpage)
)
- if mobj:
+ urls = []
+ for mobj in finditer:
embed_info = mobj.groupdict()
for k, v in embed_info.items():
if v:
@@ -160,7 +166,8 @@ class KalturaIE(InfoExtractor):
webpage)
if service_mobj:
url = smuggle_url(url, {'service_url': service_mobj.group('id')})
- return url
+ urls.append(url)
+ return urls
def _kaltura_api_call(self, video_id, actions, service_url=None, *args, **kwargs):
params = actions[0]