diff options
Diffstat (limited to 'youtube_dl/utils.py')
| -rw-r--r-- | youtube_dl/utils.py | 60 | 
1 files changed, 43 insertions, 17 deletions
| diff --git a/youtube_dl/utils.py b/youtube_dl/utils.py index 4d3cbac74..bbe554a65 100644 --- a/youtube_dl/utils.py +++ b/youtube_dl/utils.py @@ -41,6 +41,7 @@ from .compat import (      compat_urllib_parse_urlparse,      compat_urllib_request,      compat_urlparse, +    compat_WINFUNCTYPE,      shlex_quote,  ) @@ -166,7 +167,7 @@ def xpath_text(node, xpath, name=None, fatal=False):          xpath = xpath.encode('ascii')      n = node.find(xpath) -    if n is None: +    if n is None or n.text is None:          if fatal:              name = xpath if name is None else name              raise ExtractorError('Could not find XML element %s' % name) @@ -644,17 +645,19 @@ def parse_iso8601(date_str, delimiter='T'):      return calendar.timegm(dt.timetuple()) -def unified_strdate(date_str): +def unified_strdate(date_str, day_first=True):      """Return a string with the date in the format YYYYMMDD"""      if date_str is None:          return None -      upload_date = None      # Replace commas      date_str = date_str.replace(',', ' ')      # %z (UTC offset) is only supported in python>=3.2      date_str = re.sub(r' ?(\+|-)[0-9]{2}:?[0-9]{2}$', '', date_str) +    # Remove AM/PM + timezone +    date_str = re.sub(r'(?i)\s*(?:AM|PM)\s+[A-Z]+', '', date_str) +      format_expressions = [          '%d %B %Y',          '%d %b %Y', @@ -669,7 +672,6 @@ def unified_strdate(date_str):          '%d/%m/%Y',          '%d/%m/%y',          '%Y/%m/%d %H:%M:%S', -        '%d/%m/%Y %H:%M:%S',          '%Y-%m-%d %H:%M:%S',          '%Y-%m-%d %H:%M:%S.%f',          '%d.%m.%Y %H:%M', @@ -681,6 +683,14 @@ def unified_strdate(date_str):          '%Y-%m-%dT%H:%M:%S.%f',          '%Y-%m-%dT%H:%M',      ] +    if day_first: +        format_expressions.extend([ +            '%d/%m/%Y %H:%M:%S', +        ]) +    else: +        format_expressions.extend([ +            '%m/%d/%Y %H:%M:%S', +        ])      for expression in format_expressions:          try:              upload_date = datetime.datetime.strptime(date_str, expression).strftime('%Y%m%d') @@ -712,8 +722,10 @@ def date_from_str(date_str):      Return a datetime object from a string in the format YYYYMMDD or      (now|today)[+-][0-9](day|week|month|year)(s)?"""      today = datetime.date.today() -    if date_str == 'now'or date_str == 'today': +    if date_str in ('now', 'today'):          return today +    if date_str == 'yesterday': +        return today - datetime.timedelta(days=1)      match = re.match('(now|today)(?P<sign>[+-])(?P<time>\d+)(?P<unit>day|week|month|year)(s)?', date_str)      if match is not None:          sign = match.group('sign') @@ -806,21 +818,21 @@ def _windows_write_string(s, out):      if fileno not in WIN_OUTPUT_IDS:          return False -    GetStdHandle = ctypes.WINFUNCTYPE( +    GetStdHandle = compat_WINFUNCTYPE(          ctypes.wintypes.HANDLE, ctypes.wintypes.DWORD)(          ("GetStdHandle", ctypes.windll.kernel32))      h = GetStdHandle(WIN_OUTPUT_IDS[fileno]) -    WriteConsoleW = ctypes.WINFUNCTYPE( +    WriteConsoleW = compat_WINFUNCTYPE(          ctypes.wintypes.BOOL, ctypes.wintypes.HANDLE, ctypes.wintypes.LPWSTR,          ctypes.wintypes.DWORD, ctypes.POINTER(ctypes.wintypes.DWORD),          ctypes.wintypes.LPVOID)(("WriteConsoleW", ctypes.windll.kernel32))      written = ctypes.wintypes.DWORD(0) -    GetFileType = ctypes.WINFUNCTYPE(ctypes.wintypes.DWORD, ctypes.wintypes.DWORD)(("GetFileType", ctypes.windll.kernel32)) +    GetFileType = compat_WINFUNCTYPE(ctypes.wintypes.DWORD, ctypes.wintypes.DWORD)(("GetFileType", ctypes.windll.kernel32))      FILE_TYPE_CHAR = 0x0002      FILE_TYPE_REMOTE = 0x8000 -    GetConsoleMode = ctypes.WINFUNCTYPE( +    GetConsoleMode = compat_WINFUNCTYPE(          ctypes.wintypes.BOOL, ctypes.wintypes.HANDLE,          ctypes.POINTER(ctypes.wintypes.DWORD))(          ("GetConsoleMode", ctypes.windll.kernel32)) @@ -1024,7 +1036,7 @@ def smuggle_url(url, data):  def unsmuggle_url(smug_url, default=None): -    if not '#__youtubedl_smuggle' in smug_url: +    if '#__youtubedl_smuggle' not in smug_url:          return smug_url, default      url, _, sdata = smug_url.rpartition('#')      jsond = compat_parse_qs(sdata)['__youtubedl_smuggle'][0] @@ -1090,11 +1102,14 @@ def parse_filesize(s):      }      units_re = '|'.join(re.escape(u) for u in _UNIT_TABLE) -    m = re.match(r'(?P<num>[0-9]+(?:\.[0-9]*)?)\s*(?P<unit>%s)' % units_re, s) +    m = re.match( +        r'(?P<num>[0-9]+(?:[,.][0-9]*)?)\s*(?P<unit>%s)' % units_re, s)      if not m:          return None -    return int(float(m.group('num')) * _UNIT_TABLE[m.group('unit')]) +    num_str = m.group('num').replace(',', '.') +    mult = _UNIT_TABLE[m.group('unit')] +    return int(float(num_str) * mult)  def get_term_width(): @@ -1203,18 +1218,29 @@ def parse_duration(s):      m = re.match(          r'''(?ix)T? +        (?: +            (?P<only_mins>[0-9.]+)\s*(?:mins?|minutes?)\s*| +            (?P<only_hours>[0-9.]+)\s*(?:hours?)| +              (?:                  (?:(?P<hours>[0-9]+)\s*(?:[:h]|hours?)\s*)?                  (?P<mins>[0-9]+)\s*(?:[:m]|mins?|minutes?)\s*              )? -            (?P<secs>[0-9]+)(?P<ms>\.[0-9]+)?\s*(?:s|secs?|seconds?)?$''', s) +            (?P<secs>[0-9]+)(?P<ms>\.[0-9]+)?\s*(?:s|secs?|seconds?)? +        )$''', s)      if not m:          return None -    res = int(m.group('secs')) +    res = 0 +    if m.group('only_mins'): +        return float_or_none(m.group('only_mins'), invscale=60) +    if m.group('only_hours'): +        return float_or_none(m.group('only_hours'), invscale=60 * 60) +    if m.group('secs'): +        res += int(m.group('secs'))      if m.group('mins'):          res += int(m.group('mins')) * 60 -        if m.group('hours'): -            res += int(m.group('hours')) * 60 * 60 +    if m.group('hours'): +        res += int(m.group('hours')) * 60 * 60      if m.group('ms'):          res += float(m.group('ms'))      return res @@ -1488,7 +1514,7 @@ def limit_length(s, length):  def version_tuple(v): -    return [int(e) for e in v.split('.')] +    return tuple(int(e) for e in re.split(r'[-.]', v))  def is_outdated_version(version, limit, assume_new=True): | 
