diff options
author | Naglis Jonaitis <njonaitis@gmail.com> | 2015-03-26 17:15:27 +0200 |
---|---|---|
committer | Naglis Jonaitis <njonaitis@gmail.com> | 2015-03-26 17:15:27 +0200 |
commit | 91757b0f373ec3201f95066eeb0e09ebdcc1a067 (patch) | |
tree | efda2a6a08840285c1bc6b7fbc79081b12a297f3 | |
parent | fbfcc2972b3b24bda092eaed92b81113154c4327 (diff) |
[utils] Escape all HTML entities written in hexadecimal form
-rw-r--r-- | test/test_utils.py | 2 | ||||
-rw-r--r-- | youtube_dl/utils.py | 2 |
2 files changed, 3 insertions, 1 deletions
diff --git a/test/test_utils.py b/test/test_utils.py index a8ab87685..abaf1ab73 100644 --- a/test/test_utils.py +++ b/test/test_utils.py @@ -200,6 +200,8 @@ class TestUtil(unittest.TestCase): def test_unescape_html(self): self.assertEqual(unescapeHTML('%20;'), '%20;') + self.assertEqual(unescapeHTML('/'), '/') + self.assertEqual(unescapeHTML('/'), '/') self.assertEqual( unescapeHTML('é'), 'é') diff --git a/youtube_dl/utils.py b/youtube_dl/utils.py index 472d4df41..245d623d8 100644 --- a/youtube_dl/utils.py +++ b/youtube_dl/utils.py @@ -348,7 +348,7 @@ def _htmlentity_transform(entity): if entity in compat_html_entities.name2codepoint: return compat_chr(compat_html_entities.name2codepoint[entity]) - mobj = re.match(r'#(x?[0-9]+)', entity) + mobj = re.match(r'#(x[0-9a-fA-F]+|[0-9]+)', entity) if mobj is not None: numstr = mobj.group(1) if numstr.startswith('x'): |