aboutsummaryrefslogtreecommitdiff
path: root/youtube_dl/utils.py
diff options
context:
space:
mode:
Diffstat (limited to 'youtube_dl/utils.py')
-rw-r--r--youtube_dl/utils.py102
1 files changed, 87 insertions, 15 deletions
diff --git a/youtube_dl/utils.py b/youtube_dl/utils.py
index c69d3e165..7d15eab64 100644
--- a/youtube_dl/utils.py
+++ b/youtube_dl/utils.py
@@ -371,6 +371,18 @@ def unescapeHTML(s):
r'&([^;]+);', lambda m: _htmlentity_transform(m.group(1)), s)
+def get_subprocess_encoding():
+ if sys.platform == 'win32' and sys.getwindowsversion()[0] >= 5:
+ # For subprocess calls, encode with locale encoding
+ # Refer to http://stackoverflow.com/a/9951851/35070
+ encoding = preferredencoding()
+ else:
+ encoding = sys.getfilesystemencoding()
+ if encoding is None:
+ encoding = 'utf-8'
+ return encoding
+
+
def encodeFilename(s, for_subprocess=False):
"""
@param s The name of the file
@@ -382,21 +394,24 @@ def encodeFilename(s, for_subprocess=False):
if sys.version_info >= (3, 0):
return s
- if sys.platform == 'win32' and sys.getwindowsversion()[0] >= 5:
- # Pass '' directly to use Unicode APIs on Windows 2000 and up
- # (Detecting Windows NT 4 is tricky because 'major >= 4' would
- # match Windows 9x series as well. Besides, NT 4 is obsolete.)
- if not for_subprocess:
- return s
- else:
- # For subprocess calls, encode with locale encoding
- # Refer to http://stackoverflow.com/a/9951851/35070
- encoding = preferredencoding()
- else:
- encoding = sys.getfilesystemencoding()
- if encoding is None:
- encoding = 'utf-8'
- return s.encode(encoding, 'ignore')
+ # Pass '' directly to use Unicode APIs on Windows 2000 and up
+ # (Detecting Windows NT 4 is tricky because 'major >= 4' would
+ # match Windows 9x series as well. Besides, NT 4 is obsolete.)
+ if not for_subprocess and sys.platform == 'win32' and sys.getwindowsversion()[0] >= 5:
+ return s
+
+ return s.encode(get_subprocess_encoding(), 'ignore')
+
+
+def decodeFilename(b, for_subprocess=False):
+
+ if sys.version_info >= (3, 0):
+ return b
+
+ if not isinstance(b, bytes):
+ return b
+
+ return b.decode(get_subprocess_encoding(), 'ignore')
def encodeArgument(s):
@@ -408,6 +423,10 @@ def encodeArgument(s):
return encodeFilename(s, True)
+def decodeArgument(b):
+ return decodeFilename(b, True)
+
+
def decodeOption(optval):
if optval is None:
return optval
@@ -1791,6 +1810,59 @@ def match_filter_func(filter_str):
return _match_func
+def parse_dfxp_time_expr(time_expr):
+ if not time_expr:
+ return 0.0
+
+ mobj = re.match(r'^(?P<time_offset>\d+(?:\.\d+)?)s?$', time_expr)
+ if mobj:
+ return float(mobj.group('time_offset'))
+
+ mobj = re.match(r'^(\d+):(\d\d):(\d\d(?:\.\d+)?)$', time_expr)
+ if mobj:
+ return 3600 * int(mobj.group(1)) + 60 * int(mobj.group(2)) + float(mobj.group(3))
+
+
+def format_srt_time(seconds):
+ (mins, secs) = divmod(seconds, 60)
+ (hours, mins) = divmod(mins, 60)
+ millisecs = (secs - int(secs)) * 1000
+ secs = int(secs)
+ return '%02d:%02d:%02d,%03d' % (hours, mins, secs, millisecs)
+
+
+def dfxp2srt(dfxp_data):
+ _x = functools.partial(xpath_with_ns, ns_map={'ttml': 'http://www.w3.org/ns/ttml'})
+
+ def parse_node(node):
+ str_or_empty = functools.partial(str_or_none, default='')
+
+ out = str_or_empty(node.text)
+
+ for child in node:
+ if child.tag == _x('ttml:br'):
+ out += '\n' + str_or_empty(child.tail)
+ elif child.tag == _x('ttml:span'):
+ out += str_or_empty(parse_node(child))
+ else:
+ out += str_or_empty(xml.etree.ElementTree.tostring(child))
+
+ return out
+
+ dfxp = xml.etree.ElementTree.fromstring(dfxp_data.encode('utf-8'))
+ out = []
+ paras = dfxp.findall(_x('.//ttml:p'))
+
+ for para, index in zip(paras, itertools.count(1)):
+ out.append('%d\n%s --> %s\n%s\n\n' % (
+ index,
+ format_srt_time(parse_dfxp_time_expr(para.attrib.get('begin'))),
+ format_srt_time(parse_dfxp_time_expr(para.attrib.get('end'))),
+ parse_node(para)))
+
+ return ''.join(out)
+
+
class PerRequestProxyHandler(compat_urllib_request.ProxyHandler):
def __init__(self, proxies=None):
# Set default handlers