aboutsummaryrefslogtreecommitdiff
path: root/youtube_dl/utils.py
diff options
context:
space:
mode:
Diffstat (limited to 'youtube_dl/utils.py')
-rw-r--r--youtube_dl/utils.py72
1 files changed, 63 insertions, 9 deletions
diff --git a/youtube_dl/utils.py b/youtube_dl/utils.py
index b9bff5fde..64ab30910 100644
--- a/youtube_dl/utils.py
+++ b/youtube_dl/utils.py
@@ -1,19 +1,20 @@
#!/usr/bin/env python
# -*- coding: utf-8 -*-
+import datetime
+import email.utils
import errno
import gzip
import io
import json
import locale
import os
+import platform
import re
+import socket
import sys
import traceback
import zlib
-import email.utils
-import socket
-import datetime
try:
import urllib.request as compat_urllib_request
@@ -36,6 +37,11 @@ except ImportError: # Python 2
from urlparse import urlparse as compat_urllib_parse_urlparse
try:
+ import urllib.parse as compat_urlparse
+except ImportError: # Python 2
+ import urlparse as compat_urlparse
+
+try:
import http.cookiejar as compat_cookiejar
except ImportError: # Python 2
import cookielib as compat_cookiejar
@@ -56,6 +62,11 @@ except ImportError: # Python 2
import httplib as compat_http_client
try:
+ from urllib.error import HTTPError as compat_HTTPError
+except ImportError: # Python 2
+ from urllib2 import HTTPError as compat_HTTPError
+
+try:
from subprocess import DEVNULL
compat_subprocess_get_DEVNULL = lambda: DEVNULL
except ImportError:
@@ -198,6 +209,20 @@ else:
with open(fn, 'w', encoding='utf-8') as f:
json.dump(obj, f)
+if sys.version_info >= (2,7):
+ def find_xpath_attr(node, xpath, key, val):
+ """ Find the xpath xpath[@key=val] """
+ assert re.match(r'^[a-zA-Z]+$', key)
+ assert re.match(r'^[a-zA-Z@\s]*$', val)
+ expr = xpath + u"[@%s='%s']" % (key, val)
+ return node.find(expr)
+else:
+ def find_xpath_attr(node, xpath, key, val):
+ for f in node.findall(xpath):
+ if f.attrib.get(key) == val:
+ return f
+ return None
+
def htmlentity_transform(matchobj):
"""Transforms an HTML entity to a character.
@@ -470,7 +495,7 @@ def make_HTTPS_handler(opts):
class ExtractorError(Exception):
"""Error during info extraction."""
- def __init__(self, msg, tb=None, expected=False):
+ def __init__(self, msg, tb=None, expected=False, cause=None):
""" tb, if given, is the original traceback (so that it can be printed out).
If expected is set, this is a normal error message and most likely not a bug in youtube-dl.
"""
@@ -478,11 +503,12 @@ class ExtractorError(Exception):
if sys.exc_info()[0] in (compat_urllib_error.URLError, socket.timeout, UnavailableVideoError):
expected = True
if not expected:
- msg = msg + u'; please report this issue on https://yt-dl.org/bug . Be sure to call youtube-dl with the --verbose flag and include its complete output.'
+ msg = msg + u'; please report this issue on https://yt-dl.org/bug . Be sure to call youtube-dl with the --verbose flag and include its complete output. Make sure you are using the latest version; type youtube-dl -U to update.'
super(ExtractorError, self).__init__(msg)
self.traceback = tb
self.exc_info = sys.exc_info() # preserve original exception
+ self.cause = cause
def format_traceback(self):
if self.traceback is None:
@@ -603,8 +629,23 @@ class YoutubeDLHandler(compat_urllib_request.HTTPHandler):
old_resp = resp
# gzip
if resp.headers.get('Content-encoding', '') == 'gzip':
- gz = gzip.GzipFile(fileobj=io.BytesIO(resp.read()), mode='r')
- resp = self.addinfourl_wrapper(gz, old_resp.headers, old_resp.url, old_resp.code)
+ content = resp.read()
+ gz = gzip.GzipFile(fileobj=io.BytesIO(content), mode='rb')
+ try:
+ uncompressed = io.BytesIO(gz.read())
+ except IOError as original_ioerror:
+ # There may be junk add the end of the file
+ # See http://stackoverflow.com/q/4928560/35070 for details
+ for i in range(1, 1024):
+ try:
+ gz = gzip.GzipFile(fileobj=io.BytesIO(content[:-i]), mode='rb')
+ uncompressed = io.BytesIO(gz.read())
+ except IOError:
+ continue
+ break
+ else:
+ raise original_ioerror
+ resp = self.addinfourl_wrapper(uncompressed, old_resp.headers, old_resp.url, old_resp.code)
resp.msg = old_resp.msg
# deflate
if resp.headers.get('Content-encoding', '') == 'deflate':
@@ -631,12 +672,15 @@ def unified_strdate(date_str):
pass
return upload_date
-def determine_ext(url):
+def determine_ext(url, default_ext=u'unknown_video'):
guess = url.partition(u'?')[0].rpartition(u'.')[2]
if re.match(r'^[A-Za-z0-9]+$', guess):
return guess
else:
- return u'unknown_video'
+ return default_ext
+
+def subtitles_filename(filename, sub_lang, sub_format):
+ return filename.rsplit('.', 1)[0] + u'.' + sub_lang + u'.' + sub_format
def date_from_str(date_str):
"""
@@ -689,3 +733,13 @@ class DateRange(object):
return self.start <= date <= self.end
def __str__(self):
return '%s - %s' % ( self.start.isoformat(), self.end.isoformat())
+
+
+def platform_name():
+ """ Returns the platform name as a compat_str """
+ res = platform.platform()
+ if isinstance(res, bytes):
+ res = res.decode(preferredencoding())
+
+ assert isinstance(res, compat_str)
+ return res