From 689fb748ee1ba8e61f99d21a3bcb1bc83b708649 Mon Sep 17 00:00:00 2001 From: remitamine Date: Fri, 11 Sep 2015 04:44:17 +0100 Subject: [utlis] add extract_attributes for extracting html tags attributes --- youtube_dl/utils.py | 8 ++++++++ 1 file changed, 8 insertions(+) (limited to 'youtube_dl/utils.py') diff --git a/youtube_dl/utils.py b/youtube_dl/utils.py index 206dd56bc..bcebf9cc5 100644 --- a/youtube_dl/utils.py +++ b/youtube_dl/utils.py @@ -248,6 +248,14 @@ def get_element_by_attribute(attribute, value, html): return unescapeHTML(res) +def extract_attributes(attributes_str, attributes_regex=r'(?s)\s*([^\s=]+)\s*=\s*["\']([^"\']+)["\']'): + attributes = re.findall(attributes_regex, attributes_str) + attributes_dict = {} + if attributes: + attributes_dict = {attribute_name: attribute_value for (attribute_name, attribute_value) in attributes} + return attributes_dict + + def clean_html(html): """Clean an HTML snippet into a readable string""" -- cgit v1.2.3 From 9550ca506fccf9c9d795816cc0a7817ff262ef45 Mon Sep 17 00:00:00 2001 From: remitamine Date: Sat, 31 Oct 2015 19:36:04 +0100 Subject: [utils] change extract_attributes to work in python 2 --- youtube_dl/utils.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'youtube_dl/utils.py') diff --git a/youtube_dl/utils.py b/youtube_dl/utils.py index bcebf9cc5..518cea98b 100644 --- a/youtube_dl/utils.py +++ b/youtube_dl/utils.py @@ -252,7 +252,8 @@ def extract_attributes(attributes_str, attributes_regex=r'(?s)\s*([^\s=]+)\s*=\s attributes = re.findall(attributes_regex, attributes_str) attributes_dict = {} if attributes: - attributes_dict = {attribute_name: attribute_value for (attribute_name, attribute_value) in attributes} + for (attribute_name, attribute_value) in attributes: + attributes_dict[attribute_name] = attribute_value return attributes_dict -- cgit v1.2.3