diff options
| author | Sergey M․ <dstftw@gmail.com> | 2020-06-06 02:14:35 +0700 | 
|---|---|---|
| committer | Sergey M․ <dstftw@gmail.com> | 2020-06-06 02:14:35 +0700 | 
| commit | 562de77f41d0c08df9dbb08cfa86ba6c7d239c5a (patch) | |
| tree | 42c89944b847cc028c55656605577bb49e56e734 | |
| parent | e1723c4bac4e465991789b5a29beb946d872f508 (diff) | |
[kaltura] Add support for multiple embeds on a webpage (closes #25523)
| -rw-r--r-- | youtube_dl/extractor/generic.py | 18 | ||||
| -rw-r--r-- | youtube_dl/extractor/kaltura.py | 19 | 
2 files changed, 28 insertions, 9 deletions
diff --git a/youtube_dl/extractor/generic.py b/youtube_dl/extractor/generic.py index ce8252f6a..355067a50 100644 --- a/youtube_dl/extractor/generic.py +++ b/youtube_dl/extractor/generic.py @@ -1709,6 +1709,15 @@ class GenericIE(InfoExtractor):              'add_ie': ['Kaltura'],          },          { +            # multiple kaltura embeds, nsfw +            'url': 'https://www.quartier-rouge.be/prive/femmes/kamila-avec-video-jaime-sadomie.html', +            'info_dict': { +                'id': 'kamila-avec-video-jaime-sadomie', +                'title': "Kamila avec vídeo “J'aime sadomie”", +            }, +            'playlist_count': 8, +        }, +        {              # Non-standard Vimeo embed              'url': 'https://openclassrooms.com/courses/understanding-the-web',              'md5': '64d86f1c7d369afd9a78b38cbb88d80a', @@ -2844,9 +2853,12 @@ class GenericIE(InfoExtractor):              return self.url_result(mobj.group('url'), 'Zapiks')          # Look for Kaltura embeds -        kaltura_url = KalturaIE._extract_url(webpage) -        if kaltura_url: -            return self.url_result(smuggle_url(kaltura_url, {'source_url': url}), KalturaIE.ie_key()) +        kaltura_urls = KalturaIE._extract_urls(webpage) +        if kaltura_urls: +            return self.playlist_from_matches( +                kaltura_urls, video_id, video_title, +                getter=lambda x: smuggle_url(x, {'source_url': url}), +                ie=KalturaIE.ie_key())          # Look for EaglePlatform embeds          eagleplatform_url = EaglePlatformIE._extract_url(webpage) diff --git a/youtube_dl/extractor/kaltura.py b/youtube_dl/extractor/kaltura.py index 2d38b758b..49d13460d 100644 --- a/youtube_dl/extractor/kaltura.py +++ b/youtube_dl/extractor/kaltura.py @@ -113,9 +113,14 @@ class KalturaIE(InfoExtractor):      @staticmethod      def _extract_url(webpage): +        urls = KalturaIE._extract_urls(webpage) +        return urls[0] if urls else None + +    @staticmethod +    def _extract_urls(webpage):          # Embed codes: https://knowledge.kaltura.com/embedding-kaltura-media-players-your-site -        mobj = ( -            re.search( +        finditer = ( +            re.finditer(                  r"""(?xs)                      kWidget\.(?:thumb)?[Ee]mbed\(                      \{.*? @@ -124,7 +129,7 @@ class KalturaIE(InfoExtractor):                          (?P<q3>['"])entry_?[Ii]d(?P=q3)\s*:\s*                          (?P<q4>['"])(?P<id>(?:(?!(?P=q4)).)+)(?P=q4)(?:,|\s*\})                  """, webpage) -            or re.search( +            or re.finditer(                  r'''(?xs)                      (?P<q1>["'])                          (?:https?:)?//cdnapi(?:sec)?\.kaltura\.com(?::\d+)?/(?:(?!(?P=q1)).)*\b(?:p|partner_id)/(?P<partner_id>\d+)(?:(?!(?P=q1)).)* @@ -138,7 +143,7 @@ class KalturaIE(InfoExtractor):                      )                      (?P<q3>["'])(?P<id>(?:(?!(?P=q3)).)+)(?P=q3)                  ''', webpage) -            or re.search( +            or re.finditer(                  r'''(?xs)                      <(?:iframe[^>]+src|meta[^>]+\bcontent)=(?P<q1>["'])                        (?:https?:)?//(?:(?:www|cdnapi(?:sec)?)\.)?kaltura\.com/(?:(?!(?P=q1)).)*\b(?:p|partner_id)/(?P<partner_id>\d+) @@ -148,7 +153,8 @@ class KalturaIE(InfoExtractor):                      (?P=q1)                  ''', webpage)          ) -        if mobj: +        urls = [] +        for mobj in finditer:              embed_info = mobj.groupdict()              for k, v in embed_info.items():                  if v: @@ -160,7 +166,8 @@ class KalturaIE(InfoExtractor):                  webpage)              if service_mobj:                  url = smuggle_url(url, {'service_url': service_mobj.group('id')}) -            return url +            urls.append(url) +        return urls      def _kaltura_api_call(self, video_id, actions, service_url=None, *args, **kwargs):          params = actions[0]  | 
