aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorYen Chi Hsuan <yan12125@gmail.com>2015-12-19 18:21:42 +0800
committerYen Chi Hsuan <yan12125@gmail.com>2015-12-19 18:21:42 +0800
commitd631d5f9f27f93767226192e4288990413fa9dbd (patch)
tree055755c210c91b43d2491087764560c2f9c0ab39
parent4f29fa99069760dc47ef9ca5dbf607a567d2982f (diff)
downloadyoutube-dl-d631d5f9f27f93767226192e4288990413fa9dbd.tar.xz
[utils] Fix TTML conversion
Tolerate invalid timestamps (closes #7909)
-rw-r--r--test/test_utils.py7
-rw-r--r--youtube_dl/utils.py11
2 files changed, 13 insertions, 5 deletions
diff --git a/test/test_utils.py b/test/test_utils.py
index 8fc74e591..86045e680 100644
--- a/test/test_utils.py
+++ b/test/test_utils.py
@@ -661,8 +661,8 @@ ffmpeg version 2.4.4 Copyright (c) 2000-2014 the FFmpeg ...'''), '2.4.4')
{'like_count': 190, 'dislike_count': 10}))
def test_parse_dfxp_time_expr(self):
- self.assertEqual(parse_dfxp_time_expr(None), 0.0)
- self.assertEqual(parse_dfxp_time_expr(''), 0.0)
+ self.assertEqual(parse_dfxp_time_expr(None), None)
+ self.assertEqual(parse_dfxp_time_expr(''), None)
self.assertEqual(parse_dfxp_time_expr('0.1'), 0.1)
self.assertEqual(parse_dfxp_time_expr('0.1s'), 0.1)
self.assertEqual(parse_dfxp_time_expr('00:00:01'), 1.0)
@@ -676,6 +676,9 @@ ffmpeg version 2.4.4 Copyright (c) 2000-2014 the FFmpeg ...'''), '2.4.4')
<p begin="0" end="1">The following line contains Chinese characters and special symbols</p>
<p begin="1" end="2">第二行<br/>♪♪</p>
<p begin="2" dur="1"><span>Third<br/>Line</span></p>
+ <p begin="3" end="-1">Lines with invalid timestamps are ignored</p>
+ <p begin="-1" end="-1">Ignore, two</p>
+ <p begin="3" dur="-1">Ignored, three</p>
</div>
</body>
</tt>'''
diff --git a/youtube_dl/utils.py b/youtube_dl/utils.py
index 91917fc96..ee20c3d9b 100644
--- a/youtube_dl/utils.py
+++ b/youtube_dl/utils.py
@@ -1976,7 +1976,7 @@ def match_filter_func(filter_str):
def parse_dfxp_time_expr(time_expr):
if not time_expr:
- return 0.0
+ return
mobj = re.match(r'^(?P<time_offset>\d+(?:\.\d+)?)s?$', time_expr)
if mobj:
@@ -2020,10 +2020,15 @@ def dfxp2srt(dfxp_data):
raise ValueError('Invalid dfxp/TTML subtitle')
for para, index in zip(paras, itertools.count(1)):
- begin_time = parse_dfxp_time_expr(para.attrib['begin'])
+ begin_time = parse_dfxp_time_expr(para.attrib.get('begin'))
end_time = parse_dfxp_time_expr(para.attrib.get('end'))
+ dur = parse_dfxp_time_expr(para.attrib.get('dur'))
+ if begin_time is None:
+ continue
if not end_time:
- end_time = begin_time + parse_dfxp_time_expr(para.attrib['dur'])
+ if not dur:
+ continue
+ end_time = begin_time + dur
out.append('%d\n%s --> %s\n%s\n\n' % (
index,
srt_subtitles_timecode(begin_time),