diff options
author | pukkandan <pukkandan.ytdlp@gmail.com> | 2022-08-01 06:53:25 +0530 |
---|---|---|
committer | pukkandan <pukkandan.ytdlp@gmail.com> | 2022-08-02 01:08:16 +0530 |
commit | bfd973ece3369c593b5e82a88cc16de80088a73e (patch) | |
tree | 6a61140e44f412d16ece6794b5b3e4ead4905b3c /yt_dlp/extractor/theplatform.py | |
parent | 1e8fe57e5cd0f33f940df87430d75e1230ec5b7a (diff) |
[extractors] Use new framework for existing embeds (#4307)
`Brightcove` is difficult to migrate because it's subclasses may depend
on the signature of the current functions. So it is left as-is for now
Note: Tests have not been migrated
Diffstat (limited to 'yt_dlp/extractor/theplatform.py')
-rw-r--r-- | yt_dlp/extractor/theplatform.py | 24 |
1 files changed, 10 insertions, 14 deletions
diff --git a/yt_dlp/extractor/theplatform.py b/yt_dlp/extractor/theplatform.py index bf7efc013..c8026d294 100644 --- a/yt_dlp/extractor/theplatform.py +++ b/yt_dlp/extractor/theplatform.py @@ -123,6 +123,13 @@ class ThePlatformIE(ThePlatformBaseIE, AdobePassIE): (?:https?://(?:link|player)\.theplatform\.com/[sp]/(?P<provider_id>[^/]+)/ (?:(?:(?:[^/]+/)+select/)?(?P<media>media/(?:guid/\d+/)?)?|(?P<config>(?:[^/\?]+/(?:swf|config)|onsite)/select/))? |theplatform:)(?P<id>[^/\?&]+)''' + _EMBED_REGEX = [ + r'''(?x) + <meta\s+ + property=(["'])(?:og:video(?::(?:secure_)?url)?|twitter:player)\1\s+ + content=(["'])(?P<url>https?://player\.theplatform\.com/p/.+?)\2''', + r'(?s)<(?:iframe|script)[^>]+src=(["\'])(?P<url>(?:https?:)?//player\.theplatform\.com/p/.+?)\1' + ] _TESTS = [{ # from http://www.metacafe.com/watch/cb-e9I_cZgTgIPd/blackberrys_big_bold_z30/ @@ -192,22 +199,11 @@ class ThePlatformIE(ThePlatformBaseIE, AdobePassIE): }] @classmethod - def _extract_urls(cls, webpage): - m = re.search( - r'''(?x) - <meta\s+ - property=(["'])(?:og:video(?::(?:secure_)?url)?|twitter:player)\1\s+ - content=(["'])(?P<url>https?://player\.theplatform\.com/p/.+?)\2 - ''', webpage) - if m: - return [m.group('url')] - + def _extract_embed_urls(cls, url, webpage): # Are whitespaces ignored in URLs? # https://github.com/ytdl-org/youtube-dl/issues/12044 - matches = re.findall( - r'(?s)<(?:iframe|script)[^>]+src=(["\'])((?:https?:)?//player\.theplatform\.com/p/.+?)\1', webpage) - if matches: - return [re.sub(r'\s', '', list(zip(*matches))[1][0])] + for embed_url in super()._extract_embed_urls(url, webpage): + yield re.sub(r'\s', '', embed_url) @staticmethod def _sign_url(url, sig_key, sig_secret, life=600, include_qs=False): |