aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorPhilipp Hagemeister <phihag@phihag.de>2014-09-13 09:09:55 +0200
committerPhilipp Hagemeister <phihag@phihag.de>2014-09-13 09:09:57 +0200
commitbf0ff93277ba36fbda70223ca7e78b5132e54ddf (patch)
tree48944267037d6d8f6ae18533ed9b203444c3e9ee
parentdc752ff442f74926b3301cf26f6e418e5cf8ec7f (diff)
downloadyoutube-dl-bf0ff93277ba36fbda70223ca7e78b5132e54ddf.tar.xz
[ard] Make more robust against missing thumbnails
I cannot reproduce this error, it's from travis.
-rw-r--r--youtube_dl/extractor/ard.py6
-rw-r--r--youtube_dl/utils.py11
2 files changed, 15 insertions, 2 deletions
diff --git a/youtube_dl/extractor/ard.py b/youtube_dl/extractor/ard.py
index ef94c7239..12457f0f9 100644
--- a/youtube_dl/extractor/ard.py
+++ b/youtube_dl/extractor/ard.py
@@ -13,6 +13,7 @@ from ..utils import (
int_or_none,
parse_duration,
unified_strdate,
+ xpath_text,
)
@@ -157,8 +158,9 @@ class ARDIE(InfoExtractor):
player_url = mobj.group('mainurl') + '~playerXml.xml'
doc = self._download_xml(player_url, display_id)
video_node = doc.find('./video')
- upload_date = unified_strdate(video_node.find('./broadcastDate').text)
- thumbnail = video_node.find('.//teaserImage//variant/url').text
+ upload_date = unified_strdate(xpath_text(
+ video_node, './broadcastDate'))
+ thumbnail = xpath_text(video_node, './/teaserImage//variant/url')
formats = []
for a in video_node.findall('.//asset'):
diff --git a/youtube_dl/utils.py b/youtube_dl/utils.py
index 8828161e5..7536b3b36 100644
--- a/youtube_dl/utils.py
+++ b/youtube_dl/utils.py
@@ -304,6 +304,17 @@ def xpath_with_ns(path, ns_map):
return '/'.join(replaced)
+def xpath_text(node, xpath, name=None, fatal=False):
+ n = node.find(xpath)
+ if n is None:
+ if fatal:
+ name = xpath if name is None else name
+ raise ExtractorError('Could not find XML element %s' % name)
+ else:
+ return None
+ return n.text
+
+
compat_html_parser.locatestarttagend = re.compile(r"""<[a-zA-Z][-.a-zA-Z0-9:_]*(?:\s+(?:(?<=['"\s])[^\s/>][^\s/=>]*(?:\s*=+\s*(?:'[^']*'|"[^"]*"|(?!['"])[^>\s]*))?\s*)*)?\s*""", re.VERBOSE) # backport bugfix
class BaseHTMLParser(compat_html_parser.HTMLParser):
def __init(self):