aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorremitamine <remitamine@gmail.com>2015-09-11 04:44:17 +0100
committerremitamine <remitamine@gmail.com>2015-09-11 04:44:17 +0100
commit689fb748ee1ba8e61f99d21a3bcb1bc83b708649 (patch)
tree368ea9a04adef249dcce3633cb54a9add8eb344c
parent1721fef28b89ac4264db978ab7fee3b4dd154056 (diff)
[utlis] add extract_attributes for extracting html tags attributes
-rw-r--r--youtube_dl/utils.py8
1 files changed, 8 insertions, 0 deletions
diff --git a/youtube_dl/utils.py b/youtube_dl/utils.py
index 206dd56bc..bcebf9cc5 100644
--- a/youtube_dl/utils.py
+++ b/youtube_dl/utils.py
@@ -248,6 +248,14 @@ def get_element_by_attribute(attribute, value, html):
return unescapeHTML(res)
+def extract_attributes(attributes_str, attributes_regex=r'(?s)\s*([^\s=]+)\s*=\s*["\']([^"\']+)["\']'):
+ attributes = re.findall(attributes_regex, attributes_str)
+ attributes_dict = {}
+ if attributes:
+ attributes_dict = {attribute_name: attribute_value for (attribute_name, attribute_value) in attributes}
+ return attributes_dict
+
+
def clean_html(html):
"""Clean an HTML snippet into a readable string"""