aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--test/test_utils.py6
-rw-r--r--youtube_dl/utils.py5
2 files changed, 7 insertions, 4 deletions
diff --git a/test/test_utils.py b/test/test_utils.py
index 5a0109977..9a3a8ddff 100644
--- a/test/test_utils.py
+++ b/test/test_utils.py
@@ -641,8 +641,8 @@ class TestUtil(unittest.TestCase):
self.assertEqual(extract_attributes('<e x="&#x79;">'), {'x': 'y'})
self.assertEqual(extract_attributes('<e x="&amp;">'), {'x': '&'}) # XML
self.assertEqual(extract_attributes('<e x="&quot;">'), {'x': '"'})
- self.assertEqual(extract_attributes('<e x="&pound;">'), {'x': '£'}) # HTML 3.2
- self.assertEqual(extract_attributes('<e x="&lambda;">'), {'x': 'λ'}) # HTML 4.0
+ self.assertEqual(extract_attributes('<e x="&pound;">'), {'x': '£'}) # HTML 3.2
+ self.assertEqual(extract_attributes('<e x="&lambda;">'), {'x': 'λ'}) # HTML 4.0
self.assertEqual(extract_attributes('<e x="&foo">'), {'x': '&foo'})
self.assertEqual(extract_attributes('<e x="\'">'), {'x': "'"})
self.assertEqual(extract_attributes('<e x=\'"\'>'), {'x': '"'})
@@ -654,7 +654,7 @@ class TestUtil(unittest.TestCase):
self.assertEqual(extract_attributes('<e \nx=\n"y"\n>'), {'x': 'y'})
self.assertEqual(extract_attributes("<e \nx=\n'y'\n>"), {'x': 'y'})
self.assertEqual(extract_attributes('<e \nx="\ny\n">'), {'x': '\ny\n'})
- self.assertEqual(extract_attributes('<e CAPS=x>'), {'caps': 'x'}) # Names lowercased
+ self.assertEqual(extract_attributes('<e CAPS=x>'), {'caps': 'x'}) # Names lowercased
self.assertEqual(extract_attributes('<e x=1 X=2>'), {'x': '2'})
self.assertEqual(extract_attributes('<e X=1 x=2>'), {'x': '2'})
self.assertEqual(extract_attributes('<e _:funny-name1=1>'), {'_:funny-name1': '1'})
diff --git a/youtube_dl/utils.py b/youtube_dl/utils.py
index ec186918c..8ec1bd469 100644
--- a/youtube_dl/utils.py
+++ b/youtube_dl/utils.py
@@ -273,15 +273,17 @@ def get_element_by_attribute(attribute, value, html):
return unescapeHTML(res)
+
class HTMLAttributeParser(compat_HTMLParser):
"""Trivial HTML parser to gather the attributes for a single element"""
def __init__(self):
- self.attrs = { }
+ self.attrs = {}
compat_HTMLParser.__init__(self)
def handle_starttag(self, tag, attrs):
self.attrs = dict(attrs)
+
def extract_attributes(html_element):
"""Given a string for an HTML element such as
<el
@@ -303,6 +305,7 @@ def extract_attributes(html_element):
parser.close()
return parser.attrs
+
def clean_html(html):
"""Clean an HTML snippet into a readable string"""