diff options
| author | Yen Chi Hsuan <yan12125@gmail.com> | 2017-05-26 21:58:18 +0800 | 
|---|---|---|
| committer | Yen Chi Hsuan <yan12125@gmail.com> | 2017-05-26 21:58:18 +0800 | 
| commit | 5552c9eb0fece567f7dda13810939fca32d7d65a (patch) | |
| tree | f3bde11319d42fbe7013dc1326c83ad799c39892 | |
| parent | 59ed87cbd9ea08c889514a05b646141004f432a1 (diff) | |
[utils] Recognize more patterns in strip_jsonp()
Used in Youku Show pages
| -rw-r--r-- | ChangeLog | 1 | ||||
| -rw-r--r-- | test/test_utils.py | 8 | ||||
| -rw-r--r-- | youtube_dl/utils.py | 7 | 
3 files changed, 15 insertions, 1 deletions
@@ -1,6 +1,7 @@  version <unreleased>  Core ++ [utils] strip_jsonp() can recognize more patterns  * [postprocessor/ffmpeg] Fix metadata filename handling on Python 2 (#13182)  Extractors diff --git a/test/test_utils.py b/test/test_utils.py index f31559e71..d7e05817c 100644 --- a/test/test_utils.py +++ b/test/test_utils.py @@ -678,6 +678,14 @@ class TestUtil(unittest.TestCase):          d = json.loads(stripped)          self.assertEqual(d, {'status': 'success'}) +        stripped = strip_jsonp('window.cb && window.cb({"status": "success"});') +        d = json.loads(stripped) +        self.assertEqual(d, {'status': 'success'}) + +        stripped = strip_jsonp('window.cb && cb({"status": "success"});') +        d = json.loads(stripped) +        self.assertEqual(d, {'status': 'success'}) +      def test_uppercase_escape(self):          self.assertEqual(uppercase_escape('aä'), 'aä')          self.assertEqual(uppercase_escape('\\U0001d550'), '𝕐') diff --git a/youtube_dl/utils.py b/youtube_dl/utils.py index 4293a77f5..6c84bfe0f 100644 --- a/youtube_dl/utils.py +++ b/youtube_dl/utils.py @@ -2211,7 +2211,12 @@ def parse_age_limit(s):  def strip_jsonp(code):      return re.sub( -        r'(?s)^[a-zA-Z0-9_.$]+\s*\(\s*(.*)\);?\s*?(?://[^\n]*)*$', r'\1', code) +        r'''(?sx)^ +            (?:window\.)?(?P<func_name>[a-zA-Z0-9_.$]+) +            (?:\s*&&\s*(?P=func_name))? +            \s*\(\s*(?P<callback_data>.*)\);? +            \s*?(?://[^\n]*)*$''', +        r'\g<callback_data>', code)  def js_to_json(code):  | 
