aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorMarcin Cieślak <saper@saper.info>2013-11-05 23:19:29 +0100
committerMarcin Cieślak <saper@saper.info>2013-11-05 23:19:29 +0100
commita8eeb0597b11dbc9d1b48f95264cc2815311aa15 (patch)
tree63d322d9d1b1a4ccaf58c23ab98c33fc3b465fc5
parent7f34001d57671ad4713cb052632dd626d663d0ae (diff)
downloadyoutube-dl-a8eeb0597b11dbc9d1b48f95264cc2815311aa15.tar.xz
Fix AssertionError when og property not found
On tvp.pl some webpages contain OpenGraph metadata and some don't. If og property is not found, _og_search_description fails with WARNING: unable to extract OpenGraph description; please report this issue on http://yt-dl.org/bug Traceback (most recent call last): File "/usr/home/saper/bin/youtube-dl", line 18, in <module> youtube_dl.main() File "/usr/home/saper/sw/youtube-dl/youtube_dl/__init__.py", line 766, in main _real_main(argv) File "/usr/home/saper/sw/youtube-dl/youtube_dl/__init__.py", line 719, in _real_main retcode = ydl.download(all_urls) File "/usr/home/saper/sw/youtube-dl/youtube_dl/YoutubeDL.py", line 715, in download videos = self.extract_info(url) File "/usr/home/saper/sw/youtube-dl/youtube_dl/YoutubeDL.py", line 348, in extract_info ie_result = ie.extract(url) File "/usr/home/saper/sw/youtube-dl/youtube_dl/extractor/common.py", line 125, in extract return self._real_extract(url) File "/usr/home/saper/sw/youtube-dl/youtube_dl/extractor/tvp.py", line 56, in _real_extract info['description'] = self._og_search_description(webpage) File "/usr/home/saper/sw/youtube-dl/youtube_dl/extractor/common.py", line 331, in _og_search_description return self._og_search_property('description', html, fatal=False, **kargs) File "/usr/home/saper/sw/youtube-dl/youtube_dl/extractor/common.py", line 325, in _og_search_property return unescapeHTML(escaped) File "/usr/home/saper/sw/youtube-dl/youtube_dl/utils.py", line 494, in unescapeHTML assert type(s) == type(u'') AssertionError The patch allows me to use: try: info['description'] = self._og_search_description(webpage) info['thumbnail'] = self._og_search_thumbnail(webpage) except RegexNotFoundError: pass
-rw-r--r--youtube_dl/extractor/common.py4
1 files changed, 3 insertions, 1 deletions
diff --git a/youtube_dl/extractor/common.py b/youtube_dl/extractor/common.py
index e0ccba533..fb2d50a09 100644
--- a/youtube_dl/extractor/common.py
+++ b/youtube_dl/extractor/common.py
@@ -322,7 +322,9 @@ class InfoExtractor(object):
if name is None:
name = 'OpenGraph %s' % prop
escaped = self._search_regex(self._og_regex(prop), html, name, flags=re.DOTALL, **kargs)
- return unescapeHTML(escaped)
+ if not escaped is None:
+ return unescapeHTML(escaped)
+ return None
def _og_search_thumbnail(self, html, **kargs):
return self._og_search_property('image', html, u'thumbnail url', fatal=False, **kargs)