aboutsummaryrefslogtreecommitdiff
path: root/youtube_dl/utils.py
diff options
context:
space:
mode:
authorYen Chi Hsuan <yan12125@gmail.com>2017-09-16 12:18:38 +0800
committerYen Chi Hsuan <yan12125@gmail.com>2017-09-16 12:18:38 +0800
commit3869028ffb6be6ab719e5cf1004276dfdfd1216d (patch)
tree86301ec756a57fea9248042be358a42b6af6d7c2 /youtube_dl/utils.py
parent68d43a61b552007a718894967b869c0f1d8ff00f (diff)
[utils] Use bytes-like objects in dfxp2srt
This fixes handling of non-UTF8 TTML subtitles Closes #14191
Diffstat (limited to 'youtube_dl/utils.py')
-rw-r--r--youtube_dl/utils.py18
1 files changed, 11 insertions, 7 deletions
diff --git a/youtube_dl/utils.py b/youtube_dl/utils.py
index 9e4492d40..b724e0b70 100644
--- a/youtube_dl/utils.py
+++ b/youtube_dl/utils.py
@@ -2572,14 +2572,18 @@ def srt_subtitles_timecode(seconds):
def dfxp2srt(dfxp_data):
+ '''
+ @param dfxp_data A bytes-like object containing DFXP data
+ @returns A unicode object containing converted SRT data
+ '''
LEGACY_NAMESPACES = (
- ('http://www.w3.org/ns/ttml', [
- 'http://www.w3.org/2004/11/ttaf1',
- 'http://www.w3.org/2006/04/ttaf1',
- 'http://www.w3.org/2006/10/ttaf1',
+ (b'http://www.w3.org/ns/ttml', [
+ b'http://www.w3.org/2004/11/ttaf1',
+ b'http://www.w3.org/2006/04/ttaf1',
+ b'http://www.w3.org/2006/10/ttaf1',
]),
- ('http://www.w3.org/ns/ttml#styling', [
- 'http://www.w3.org/ns/ttml#style',
+ (b'http://www.w3.org/ns/ttml#styling', [
+ b'http://www.w3.org/ns/ttml#style',
]),
)
@@ -2674,7 +2678,7 @@ def dfxp2srt(dfxp_data):
for ns in v:
dfxp_data = dfxp_data.replace(ns, k)
- dfxp = compat_etree_fromstring(dfxp_data.encode('utf-8'))
+ dfxp = compat_etree_fromstring(dfxp_data)
out = []
paras = dfxp.findall(_x('.//ttml:p')) or dfxp.findall('.//p')