aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorYen Chi Hsuan <yan12125@gmail.com>2017-02-17 23:13:51 +0800
committerYen Chi Hsuan <yan12125@gmail.com>2017-02-17 23:13:51 +0800
commitfef51645d6c224f898ff6f44d041a458d21e8547 (patch)
tree0acf97ee5884fac9ddceffcaf178bdcf3aa5581b
parent4cead6a614b5a293e78dce5cd5eda7476f83985d (diff)
downloadyoutube-dl-fef51645d6c224f898ff6f44d041a458d21e8547.tar.xz
[theplatform] Recognize URLs with whitespaces (closes #12044)
-rw-r--r--ChangeLog1
-rw-r--r--youtube_dl/extractor/generic.py7
-rw-r--r--youtube_dl/extractor/theplatform.py6
3 files changed, 11 insertions, 3 deletions
diff --git a/ChangeLog b/ChangeLog
index 4e69b03d0..d5fe3dd5b 100644
--- a/ChangeLog
+++ b/ChangeLog
@@ -1,6 +1,7 @@
version <unreleased>
Extractors
++ [theplatform] Recognize URLs with whitespaces (#12044)
+ [generic] Support complex JWPlayer embedded videos (#12030)
diff --git a/youtube_dl/extractor/generic.py b/youtube_dl/extractor/generic.py
index 3db31debe..9868ca6d0 100644
--- a/youtube_dl/extractor/generic.py
+++ b/youtube_dl/extractor/generic.py
@@ -1501,7 +1501,12 @@ class GenericIE(InfoExtractor):
'skip_download': True,
},
'add_ie': [VideoPressIE.ie_key()],
- }
+ },
+ {
+ # ThePlatform embedded with whitespaces in URLs
+ 'url': 'http://www.golfchannel.com/topics/shows/golftalkcentral.htm',
+ 'only_matching': True,
+ },
# {
# # TODO: find another test
# # http://schema.org/VideoObject
diff --git a/youtube_dl/extractor/theplatform.py b/youtube_dl/extractor/theplatform.py
index 5c5987c6a..9a424b1c6 100644
--- a/youtube_dl/extractor/theplatform.py
+++ b/youtube_dl/extractor/theplatform.py
@@ -179,10 +179,12 @@ class ThePlatformIE(ThePlatformBaseIE, AdobePassIE):
if m:
return [m.group('url')]
+ # Are whitesapces ignored in URLs?
+ # https://github.com/rg3/youtube-dl/issues/12044
matches = re.findall(
- r'<(?:iframe|script)[^>]+src=(["\'])((?:https?:)?//player\.theplatform\.com/p/.+?)\1', webpage)
+ r'(?s)<(?:iframe|script)[^>]+src=(["\'])((?:https?:)?//player\.theplatform\.com/p/.+?)\1', webpage)
if matches:
- return list(zip(*matches))[1]
+ return [re.sub(r'\s', '', list(zip(*matches))[1][0])]
@staticmethod
def _sign_url(url, sig_key, sig_secret, life=600, include_qs=False):