aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorJaime Marquínez Ferrándiz <jaime.marquinez.ferrandiz@gmail.com>2013-11-15 12:54:13 +0100
committerJaime Marquínez Ferrándiz <jaime.marquinez.ferrandiz@gmail.com>2013-11-15 12:54:13 +0100
commit78fb87b2837e15124b5855734a951598dfe025fe (patch)
tree202b4d8bec7917cfeb8a039ec1470874794685f3
parentab2d524780736249c8988313db021e83642c24d1 (diff)
downloadyoutube-dl-78fb87b2837e15124b5855734a951598dfe025fe.tar.xz
Don't accept '>' inside the content attribute in OpenGraph regexes
-rw-r--r--youtube_dl/extractor/common.py8
1 files changed, 5 insertions, 3 deletions
diff --git a/youtube_dl/extractor/common.py b/youtube_dl/extractor/common.py
index e02176852..45dd01789 100644
--- a/youtube_dl/extractor/common.py
+++ b/youtube_dl/extractor/common.py
@@ -316,10 +316,12 @@ class InfoExtractor(object):
# Helper functions for extracting OpenGraph info
@staticmethod
def _og_regexes(prop):
- esc_prop = re.escape(prop)
+ content_re = r'content=(?:"([^>]+?)"|\'(.+?)\')'
+ property_re = r'property=[\'"]og:%s[\'"]' % re.escape(prop)
+ template = r'<meta[^>]+?%s[^>]+?%s'
return [
- r'<meta[^>]+?property=[\'"]og:%s[\'"][^>]+?content=(?:"(.+?)"|\'(.+?)\')' % esc_prop,
- r'<meta[^>]+?content=(?:"(.+?)"|\'(.+?)\')[^>]+?property=[\'"]og:%s[\'"]' % esc_prop,
+ template % (property_re, content_re),
+ template % (content_re, property_re),
]
def _og_search_property(self, prop, html, name=None, **kargs):