aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorPhilipp Hagemeister <phihag@phihag.de>2015-01-07 11:43:36 +0100
committerPhilipp Hagemeister <phihag@phihag.de>2015-01-08 16:14:50 +0100
commitbec2248141c9cc3e44794d7ee48a6bcfc4904eac (patch)
tree36da40044ca3f53b023dfebd8f5e79f3967bba68
parent211503c39f5ce6e875d18545007060c2764f6f7e (diff)
[InfoExtractor/common] Correct and test meta tag matching
-rw-r--r--test/test_InfoExtractor.py18
-rw-r--r--youtube_dl/extractor/common.py2
2 files changed, 19 insertions, 1 deletions
diff --git a/test/test_InfoExtractor.py b/test/test_InfoExtractor.py
index 13c18ed95..be8d12997 100644
--- a/test/test_InfoExtractor.py
+++ b/test/test_InfoExtractor.py
@@ -40,5 +40,23 @@ class TestInfoExtractor(unittest.TestCase):
self.assertEqual(ie._og_search_description(html), 'Some video\'s description ')
self.assertEqual(ie._og_search_thumbnail(html), 'http://domain.com/pic.jpg?key1=val1&key2=val2')
+ def test_html_search_meta(self):
+ ie = self.ie
+ html = '''
+ <meta name="a" content="1" />
+ <meta name='b' content='2'>
+ <meta name="c" content='3'>
+ <meta name=d content='4'>
+ <meta property="e" content='5' >
+ <meta content="6" name="f">
+ '''
+
+ self.assertEqual(ie._html_search_meta('a', html), '1')
+ self.assertEqual(ie._html_search_meta('b', html), '2')
+ self.assertEqual(ie._html_search_meta('c', html), '3')
+ self.assertEqual(ie._html_search_meta('d', html), '4')
+ self.assertEqual(ie._html_search_meta('e', html), '5')
+ self.assertEqual(ie._html_search_meta('f', html), '6')
+
if __name__ == '__main__':
unittest.main()
diff --git a/youtube_dl/extractor/common.py b/youtube_dl/extractor/common.py
index df32b5ca0..d703893dc 100644
--- a/youtube_dl/extractor/common.py
+++ b/youtube_dl/extractor/common.py
@@ -594,7 +594,7 @@ class InfoExtractor(object):
return self._html_search_regex(
r'''(?isx)<meta
(?=[^>]+(?:itemprop|name|property)=(["\']?)%s\1)
- [^>]+content=(["\'])(?P<content>.*?)\1''' % re.escape(name),
+ [^>]+?content=(["\'])(?P<content>.*?)\2''' % re.escape(name),
html, display_name, fatal=fatal, group='content', **kwargs)
def _dc_search_uploader(self, html):