aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorSergey M․ <dstftw@gmail.com>2017-06-12 01:52:24 +0700
committerSergey M․ <dstftw@gmail.com>2017-06-12 01:52:24 +0700
commitb4a3d461e4a00dfc60047b667aa3136c8b03eda8 (patch)
treea7caefe338ebec8d6a48047d34965488100d7971
parent72b409559c984bb116015ada55acd38120b24cc0 (diff)
[utils] Handle HTMLParseError in extract_attributes (closes #13349)
-rw-r--r--test/test_utils.py2
-rw-r--r--youtube_dl/utils.py9
2 files changed, 9 insertions, 2 deletions
diff --git a/test/test_utils.py b/test/test_utils.py
index 41b094d89..2b93b3604 100644
--- a/test/test_utils.py
+++ b/test/test_utils.py
@@ -916,6 +916,8 @@ class TestUtil(unittest.TestCase):
supports_outside_bmp = False
if supports_outside_bmp:
self.assertEqual(extract_attributes('<e x="Smile &#128512;!">'), {'x': 'Smile \U0001f600!'})
+ # Malformed HTML should not break attributes extraction on older Python
+ self.assertEqual(extract_attributes('<mal"formed/>'), {})
def test_clean_html(self):
self.assertEqual(clean_html('a:\nb'), 'a: b')
diff --git a/youtube_dl/utils.py b/youtube_dl/utils.py
index 16bf49408..1973bd483 100644
--- a/youtube_dl/utils.py
+++ b/youtube_dl/utils.py
@@ -36,6 +36,7 @@ import xml.etree.ElementTree
import zlib
from .compat import (
+ compat_HTMLParseError,
compat_HTMLParser,
compat_basestring,
compat_chr,
@@ -409,8 +410,12 @@ def extract_attributes(html_element):
but the cases in the unit test will work for all of 2.6, 2.7, 3.2-3.5.
"""
parser = HTMLAttributeParser()
- parser.feed(html_element)
- parser.close()
+ try:
+ parser.feed(html_element)
+ parser.close()
+ # Older Python may throw HTMLParseError in case of malformed HTML
+ except compat_HTMLParseError:
+ pass
return parser.attrs