diff options
-rw-r--r-- | test/test_utils.py | 6 | ||||
-rw-r--r-- | youtube_dl/utils.py | 5 |
2 files changed, 7 insertions, 4 deletions
diff --git a/test/test_utils.py b/test/test_utils.py index 5a0109977..9a3a8ddff 100644 --- a/test/test_utils.py +++ b/test/test_utils.py @@ -641,8 +641,8 @@ class TestUtil(unittest.TestCase): self.assertEqual(extract_attributes('<e x="y">'), {'x': 'y'}) self.assertEqual(extract_attributes('<e x="&">'), {'x': '&'}) # XML self.assertEqual(extract_attributes('<e x=""">'), {'x': '"'}) - self.assertEqual(extract_attributes('<e x="£">'), {'x': '£'}) # HTML 3.2 - self.assertEqual(extract_attributes('<e x="λ">'), {'x': 'λ'}) # HTML 4.0 + self.assertEqual(extract_attributes('<e x="£">'), {'x': '£'}) # HTML 3.2 + self.assertEqual(extract_attributes('<e x="λ">'), {'x': 'λ'}) # HTML 4.0 self.assertEqual(extract_attributes('<e x="&foo">'), {'x': '&foo'}) self.assertEqual(extract_attributes('<e x="\'">'), {'x': "'"}) self.assertEqual(extract_attributes('<e x=\'"\'>'), {'x': '"'}) @@ -654,7 +654,7 @@ class TestUtil(unittest.TestCase): self.assertEqual(extract_attributes('<e \nx=\n"y"\n>'), {'x': 'y'}) self.assertEqual(extract_attributes("<e \nx=\n'y'\n>"), {'x': 'y'}) self.assertEqual(extract_attributes('<e \nx="\ny\n">'), {'x': '\ny\n'}) - self.assertEqual(extract_attributes('<e CAPS=x>'), {'caps': 'x'}) # Names lowercased + self.assertEqual(extract_attributes('<e CAPS=x>'), {'caps': 'x'}) # Names lowercased self.assertEqual(extract_attributes('<e x=1 X=2>'), {'x': '2'}) self.assertEqual(extract_attributes('<e X=1 x=2>'), {'x': '2'}) self.assertEqual(extract_attributes('<e _:funny-name1=1>'), {'_:funny-name1': '1'}) diff --git a/youtube_dl/utils.py b/youtube_dl/utils.py index ec186918c..8ec1bd469 100644 --- a/youtube_dl/utils.py +++ b/youtube_dl/utils.py @@ -273,15 +273,17 @@ def get_element_by_attribute(attribute, value, html): return unescapeHTML(res) + class HTMLAttributeParser(compat_HTMLParser): """Trivial HTML parser to gather the attributes for a single element""" def __init__(self): - self.attrs = { } + self.attrs = {} compat_HTMLParser.__init__(self) def handle_starttag(self, tag, attrs): self.attrs = dict(attrs) + def extract_attributes(html_element): """Given a string for an HTML element such as <el @@ -303,6 +305,7 @@ def extract_attributes(html_element): parser.close() return parser.attrs + def clean_html(html): """Clean an HTML snippet into a readable string""" |