diff options
Diffstat (limited to 'youtube_dl/utils.py')
| -rw-r--r-- | youtube_dl/utils.py | 140 | 
1 files changed, 107 insertions, 33 deletions
| diff --git a/youtube_dl/utils.py b/youtube_dl/utils.py index b97e62ae9..42ad520f9 100644 --- a/youtube_dl/utils.py +++ b/youtube_dl/utils.py @@ -24,6 +24,7 @@ import socket  import struct  import subprocess  import sys +import tempfile  import traceback  import xml.etree.ElementTree  import zlib @@ -91,11 +92,9 @@ except ImportError:      compat_subprocess_get_DEVNULL = lambda: open(os.path.devnull, 'w')  try: -    from urllib.parse import parse_qs as compat_parse_qs -except ImportError: # Python 2 -    # HACK: The following is the correct parse_qs implementation from cpython 3's stdlib. -    # Python 2's version is apparently totally broken -    def _unquote(string, encoding='utf-8', errors='replace'): +    from urllib.parse import unquote as compat_urllib_parse_unquote +except ImportError: +    def compat_urllib_parse_unquote(string, encoding='utf-8', errors='replace'):          if string == '':              return string          res = string.split('%') @@ -130,6 +129,13 @@ except ImportError: # Python 2              string += pct_sequence.decode(encoding, errors)          return string + +try: +    from urllib.parse import parse_qs as compat_parse_qs +except ImportError: # Python 2 +    # HACK: The following is the correct parse_qs implementation from cpython 3's stdlib. +    # Python 2's version is apparently totally broken +      def _parse_qsl(qs, keep_blank_values=False, strict_parsing=False,                  encoding='utf-8', errors='replace'):          qs, _coerce_result = qs, unicode @@ -149,10 +155,12 @@ except ImportError: # Python 2                      continue              if len(nv[1]) or keep_blank_values:                  name = nv[0].replace('+', ' ') -                name = _unquote(name, encoding=encoding, errors=errors) +                name = compat_urllib_parse_unquote( +                    name, encoding=encoding, errors=errors)                  name = _coerce_result(name)                  value = nv[1].replace('+', ' ') -                value = _unquote(value, encoding=encoding, errors=errors) +                value = compat_urllib_parse_unquote( +                    value, encoding=encoding, errors=errors)                  value = _coerce_result(value)                  r.append((name, value))          return r @@ -221,22 +229,46 @@ else:          assert type(s) == type(u'')          print(s) -# In Python 2.x, json.dump expects a bytestream. -# In Python 3.x, it writes to a character stream -if sys.version_info < (3,0): -    def write_json_file(obj, fn): -        with open(fn, 'wb') as f: -            json.dump(obj, f) -else: -    def write_json_file(obj, fn): -        with open(fn, 'w', encoding='utf-8') as f: -            json.dump(obj, f) -if sys.version_info >= (2,7): +def write_json_file(obj, fn): +    """ Encode obj as JSON and write it to fn, atomically """ + +    args = { +        'suffix': '.tmp', +        'prefix': os.path.basename(fn) + '.', +        'dir': os.path.dirname(fn), +        'delete': False, +    } + +    # In Python 2.x, json.dump expects a bytestream. +    # In Python 3.x, it writes to a character stream +    if sys.version_info < (3, 0): +        args['mode'] = 'wb' +    else: +        args.update({ +            'mode': 'w', +            'encoding': 'utf-8', +        }) + +    tf = tempfile.NamedTemporaryFile(**args) + +    try: +        with tf: +            json.dump(obj, tf) +        os.rename(tf.name, fn) +    except: +        try: +            os.remove(tf.name) +        except OSError: +            pass +        raise + + +if sys.version_info >= (2, 7):      def find_xpath_attr(node, xpath, key, val):          """ Find the xpath xpath[@key=val] """ -        assert re.match(r'^[a-zA-Z]+$', key) -        assert re.match(r'^[a-zA-Z0-9@\s:._]*$', val) +        assert re.match(r'^[a-zA-Z-]+$', key) +        assert re.match(r'^[a-zA-Z0-9@\s:._-]*$', val)          expr = xpath + u"[@%s='%s']" % (key, val)          return node.find(expr)  else: @@ -775,7 +807,7 @@ class YoutubeDLHandler(compat_urllib_request.HTTPHandler):      https_response = http_response -def parse_iso8601(date_str): +def parse_iso8601(date_str, delimiter='T'):      """ Return a UNIX timestamp from the given date """      if date_str is None: @@ -795,8 +827,8 @@ def parse_iso8601(date_str):              timezone = datetime.timedelta(                  hours=sign * int(m.group('hours')),                  minutes=sign * int(m.group('minutes'))) - -    dt = datetime.datetime.strptime(date_str, '%Y-%m-%dT%H:%M:%S') - timezone +    date_format =  '%Y-%m-%d{0}%H:%M:%S'.format(delimiter) +    dt = datetime.datetime.strptime(date_str, date_format) - timezone      return calendar.timegm(dt.timetuple()) @@ -816,7 +848,11 @@ def unified_strdate(date_str):          '%d %b %Y',          '%B %d %Y',          '%b %d %Y', +        '%b %dst %Y %I:%M%p', +        '%b %dnd %Y %I:%M%p', +        '%b %dth %Y %I:%M%p',          '%Y-%m-%d', +        '%Y/%m/%d',          '%d.%m.%Y',          '%d/%m/%Y',          '%Y/%m/%d %H:%M:%S', @@ -842,6 +878,8 @@ def unified_strdate(date_str):      return upload_date  def determine_ext(url, default_ext=u'unknown_video'): +    if url is None: +        return default_ext      guess = url.partition(u'?')[0].rpartition(u'.')[2]      if re.match(r'^[A-Za-z0-9]+$', guess):          return guess @@ -1190,11 +1228,6 @@ def format_bytes(bytes):      return u'%.2f%s' % (converted, suffix) -def str_to_int(int_str): -    int_str = re.sub(r'[,\.]', u'', int_str) -    return int(int_str) - -  def get_term_width():      columns = os.environ.get('COLUMNS', None)      if columns: @@ -1262,15 +1295,28 @@ class HEADRequest(compat_urllib_request.Request):          return "HEAD" -def int_or_none(v, scale=1, default=None, get_attr=None): +def int_or_none(v, scale=1, default=None, get_attr=None, invscale=1):      if get_attr:          if v is not None:              v = getattr(v, get_attr, None) -    return default if v is None else (int(v) // scale) +    if v == '': +        v = None +    return default if v is None else (int(v) * invscale // scale) + + +def str_or_none(v, default=None): +    return default if v is None else compat_str(v) + + +def str_to_int(int_str): +    if int_str is None: +        return None +    int_str = re.sub(r'[,\.]', u'', int_str) +    return int(int_str) -def float_or_none(v, scale=1, default=None): -    return default if v is None else (float(v) / scale) +def float_or_none(v, scale=1, invscale=1, default=None): +    return default if v is None else (float(v) * invscale / scale)  def parse_duration(s): @@ -1425,7 +1471,35 @@ US_RATINGS = {  def strip_jsonp(code): -    return re.sub(r'(?s)^[a-zA-Z_]+\s*\(\s*(.*)\);\s*?\s*$', r'\1', code) +    return re.sub(r'(?s)^[a-zA-Z0-9_]+\s*\(\s*(.*)\);?\s*?\s*$', r'\1', code) + + +def js_to_json(code): +    def fix_kv(m): +        key = m.group(2) +        if key.startswith("'"): +            assert key.endswith("'") +            assert '"' not in key +            key = '"%s"' % key[1:-1] +        elif not key.startswith('"'): +            key = '"%s"' % key + +        value = m.group(4) +        if value.startswith("'"): +            assert value.endswith("'") +            assert '"' not in value +            value = '"%s"' % value[1:-1] + +        return m.group(1) + key + m.group(3) + value + +    res = re.sub(r'''(?x) +            ([{,]\s*) +            ("[^"]*"|\'[^\']*\'|[a-z0-9A-Z]+) +            (:\s*) +            ([0-9.]+|true|false|"[^"]*"|\'[^\']*\'|\[|\{) +        ''', fix_kv, code) +    res = re.sub(r',(\s*\])', lambda m: m.group(1), res) +    return res  def qualities(quality_ids): | 
