diff options
| author | Sergey M․ <dstftw@gmail.com> | 2017-06-12 01:52:24 +0700 | 
|---|---|---|
| committer | Sergey M․ <dstftw@gmail.com> | 2017-06-12 01:52:24 +0700 | 
| commit | b4a3d461e4a00dfc60047b667aa3136c8b03eda8 (patch) | |
| tree | a7caefe338ebec8d6a48047d34965488100d7971 | |
| parent | 72b409559c984bb116015ada55acd38120b24cc0 (diff) | |
[utils] Handle HTMLParseError in extract_attributes (closes #13349)
| -rw-r--r-- | test/test_utils.py | 2 | ||||
| -rw-r--r-- | youtube_dl/utils.py | 9 | 
2 files changed, 9 insertions, 2 deletions
| diff --git a/test/test_utils.py b/test/test_utils.py index 41b094d89..2b93b3604 100644 --- a/test/test_utils.py +++ b/test/test_utils.py @@ -916,6 +916,8 @@ class TestUtil(unittest.TestCase):              supports_outside_bmp = False          if supports_outside_bmp:              self.assertEqual(extract_attributes('<e x="Smile 😀!">'), {'x': 'Smile \U0001f600!'}) +        # Malformed HTML should not break attributes extraction on older Python +        self.assertEqual(extract_attributes('<mal"formed/>'), {})      def test_clean_html(self):          self.assertEqual(clean_html('a:\nb'), 'a: b') diff --git a/youtube_dl/utils.py b/youtube_dl/utils.py index 16bf49408..1973bd483 100644 --- a/youtube_dl/utils.py +++ b/youtube_dl/utils.py @@ -36,6 +36,7 @@ import xml.etree.ElementTree  import zlib  from .compat import ( +    compat_HTMLParseError,      compat_HTMLParser,      compat_basestring,      compat_chr, @@ -409,8 +410,12 @@ def extract_attributes(html_element):      but the cases in the unit test will work for all of 2.6, 2.7, 3.2-3.5.      """      parser = HTMLAttributeParser() -    parser.feed(html_element) -    parser.close() +    try: +        parser.feed(html_element) +        parser.close() +    # Older Python may throw HTMLParseError in case of malformed HTML +    except compat_HTMLParseError: +        pass      return parser.attrs | 
