diff options
author | Jaime Marquínez Ferrándiz <jaime.marquinez.ferrandiz@gmail.com> | 2013-11-15 12:54:13 +0100 |
---|---|---|
committer | Jaime Marquínez Ferrándiz <jaime.marquinez.ferrandiz@gmail.com> | 2013-11-15 12:54:13 +0100 |
commit | 78fb87b2837e15124b5855734a951598dfe025fe (patch) | |
tree | 202b4d8bec7917cfeb8a039ec1470874794685f3 /youtube_dl/extractor/common.py | |
parent | ab2d524780736249c8988313db021e83642c24d1 (diff) |
Don't accept '>' inside the content attribute in OpenGraph regexes
Diffstat (limited to 'youtube_dl/extractor/common.py')
-rw-r--r-- | youtube_dl/extractor/common.py | 8 |
1 files changed, 5 insertions, 3 deletions
diff --git a/youtube_dl/extractor/common.py b/youtube_dl/extractor/common.py index e02176852..45dd01789 100644 --- a/youtube_dl/extractor/common.py +++ b/youtube_dl/extractor/common.py @@ -316,10 +316,12 @@ class InfoExtractor(object): # Helper functions for extracting OpenGraph info @staticmethod def _og_regexes(prop): - esc_prop = re.escape(prop) + content_re = r'content=(?:"([^>]+?)"|\'(.+?)\')' + property_re = r'property=[\'"]og:%s[\'"]' % re.escape(prop) + template = r'<meta[^>]+?%s[^>]+?%s' return [ - r'<meta[^>]+?property=[\'"]og:%s[\'"][^>]+?content=(?:"(.+?)"|\'(.+?)\')' % esc_prop, - r'<meta[^>]+?content=(?:"(.+?)"|\'(.+?)\')[^>]+?property=[\'"]og:%s[\'"]' % esc_prop, + template % (property_re, content_re), + template % (content_re, property_re), ] def _og_search_property(self, prop, html, name=None, **kargs): |