aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorJaime Marquínez Ferrándiz <jaime.marquinez.ferrandiz@gmail.com>2014-11-02 17:28:42 +0100
committerJaime Marquínez Ferrándiz <jaime.marquinez.ferrandiz@gmail.com>2014-11-02 19:31:06 +0100
commit4f195f55f0b734f2897319abf96c5542ce6212d4 (patch)
tree0ad95ea47420d7ac5a2101cc37cc792c19d1c9dd
parentac35c266868ddfd39f8b708f425e1baf2555e406 (diff)
Do not override stdlib html parser 'locatestarttagend' regex (fixes #4081)
'<a href="foo" ><img src="bar" / ></a>' wouldn't be parsed right (the problem is '/ >', '/>' worked fine). We need to change it in python 2.6 (for example the description of youtube videos wouldn't be extracted).
-rw-r--r--youtube_dl/utils.py4
1 files changed, 3 insertions, 1 deletions
diff --git a/youtube_dl/utils.py b/youtube_dl/utils.py
index 233286de8..bdd637e48 100644
--- a/youtube_dl/utils.py
+++ b/youtube_dl/utils.py
@@ -152,7 +152,9 @@ def xpath_text(node, xpath, name=None, fatal=False):
return n.text
-compat_html_parser.locatestarttagend = re.compile(r"""<[a-zA-Z][-.a-zA-Z0-9:_]*(?:\s+(?:(?<=['"\s])[^\s/>][^\s/=>]*(?:\s*=+\s*(?:'[^']*'|"[^"]*"|(?!['"])[^>\s]*))?\s*)*)?\s*""", re.VERBOSE) # backport bugfix
+if sys.version_info < (2, 7):
+ compat_html_parser.locatestarttagend = re.compile(r"""<[a-zA-Z][-.a-zA-Z0-9:_]*(?:\s+(?:(?<=['"\s])[^\s/>][^\s/=>]*(?:\s*=+\s*(?:'[^']*'|"[^"]*"|(?!['"])[^>\s]*))?\s*)*)?\s*""", re.VERBOSE) # backport bugfix
+
class BaseHTMLParser(compat_html_parser.HTMLParser):
def __init(self):
compat_html_parser.HTMLParser.__init__(self)