diff options
author | Philipp Hagemeister <phihag@phihag.de> | 2015-01-07 11:43:36 +0100 |
---|---|---|
committer | Philipp Hagemeister <phihag@phihag.de> | 2015-01-08 16:14:50 +0100 |
commit | bec2248141c9cc3e44794d7ee48a6bcfc4904eac (patch) | |
tree | 36da40044ca3f53b023dfebd8f5e79f3967bba68 | |
parent | 211503c39f5ce6e875d18545007060c2764f6f7e (diff) |
[InfoExtractor/common] Correct and test meta tag matching
-rw-r--r-- | test/test_InfoExtractor.py | 18 | ||||
-rw-r--r-- | youtube_dl/extractor/common.py | 2 |
2 files changed, 19 insertions, 1 deletions
diff --git a/test/test_InfoExtractor.py b/test/test_InfoExtractor.py index 13c18ed95..be8d12997 100644 --- a/test/test_InfoExtractor.py +++ b/test/test_InfoExtractor.py @@ -40,5 +40,23 @@ class TestInfoExtractor(unittest.TestCase): self.assertEqual(ie._og_search_description(html), 'Some video\'s description ') self.assertEqual(ie._og_search_thumbnail(html), 'http://domain.com/pic.jpg?key1=val1&key2=val2') + def test_html_search_meta(self): + ie = self.ie + html = ''' + <meta name="a" content="1" /> + <meta name='b' content='2'> + <meta name="c" content='3'> + <meta name=d content='4'> + <meta property="e" content='5' > + <meta content="6" name="f"> + ''' + + self.assertEqual(ie._html_search_meta('a', html), '1') + self.assertEqual(ie._html_search_meta('b', html), '2') + self.assertEqual(ie._html_search_meta('c', html), '3') + self.assertEqual(ie._html_search_meta('d', html), '4') + self.assertEqual(ie._html_search_meta('e', html), '5') + self.assertEqual(ie._html_search_meta('f', html), '6') + if __name__ == '__main__': unittest.main() diff --git a/youtube_dl/extractor/common.py b/youtube_dl/extractor/common.py index df32b5ca0..d703893dc 100644 --- a/youtube_dl/extractor/common.py +++ b/youtube_dl/extractor/common.py @@ -594,7 +594,7 @@ class InfoExtractor(object): return self._html_search_regex( r'''(?isx)<meta (?=[^>]+(?:itemprop|name|property)=(["\']?)%s\1) - [^>]+content=(["\'])(?P<content>.*?)\1''' % re.escape(name), + [^>]+?content=(["\'])(?P<content>.*?)\2''' % re.escape(name), html, display_name, fatal=fatal, group='content', **kwargs) def _dc_search_uploader(self, html): |