diff options
Diffstat (limited to 'youtube_dl/utils.py')
| -rw-r--r-- | youtube_dl/utils.py | 47 | 
1 files changed, 38 insertions, 9 deletions
| diff --git a/youtube_dl/utils.py b/youtube_dl/utils.py index 4e00317f1..a5f584ec5 100644 --- a/youtube_dl/utils.py +++ b/youtube_dl/utils.py @@ -1696,6 +1696,17 @@ MONTH_NAMES = {          'juillet', 'août', 'septembre', 'octobre', 'novembre', 'décembre'],  } +# Timezone names for RFC2822 obs-zone +# From https://github.com/python/cpython/blob/3.11/Lib/email/_parseaddr.py#L36-L42 +TIMEZONE_NAMES = { +    'UT': 0, 'UTC': 0, 'GMT': 0, 'Z': 0, +    'AST': -4, 'ADT': -3,  # Atlantic (used in Canada) +    'EST': -5, 'EDT': -4,  # Eastern +    'CST': -6, 'CDT': -5,  # Central +    'MST': -7, 'MDT': -6,  # Mountain +    'PST': -8, 'PDT': -7   # Pacific +} +  KNOWN_EXTENSIONS = (      'mp4', 'm4a', 'm4p', 'm4b', 'm4r', 'm4v', 'aac',      'flv', 'f4v', 'f4a', 'f4b', @@ -1735,12 +1746,17 @@ DATE_FORMATS = (      '%b %dth %Y %I:%M',      '%Y %m %d',      '%Y-%m-%d', +    '%Y.%m.%d.',      '%Y/%m/%d',      '%Y/%m/%d %H:%M',      '%Y/%m/%d %H:%M:%S', +    '%Y%m%d%H%M', +    '%Y%m%d%H%M%S', +    '%Y%m%d',      '%Y-%m-%d %H:%M',      '%Y-%m-%d %H:%M:%S',      '%Y-%m-%d %H:%M:%S.%f', +    '%Y-%m-%d %H:%M:%S:%f',      '%d.%m.%Y %H:%M',      '%d.%m.%Y %H.%M',      '%Y-%m-%dT%H:%M:%SZ', @@ -1753,6 +1769,7 @@ DATE_FORMATS = (      '%b %d %Y at %H:%M:%S',      '%B %d %Y at %H:%M',      '%B %d %Y at %H:%M:%S', +    '%H:%M %d-%b-%Y',  )  DATE_FORMATS_DAY_FIRST = list(DATE_FORMATS) @@ -1763,6 +1780,7 @@ DATE_FORMATS_DAY_FIRST.extend([      '%d/%m/%Y',      '%d/%m/%y',      '%d/%m/%Y %H:%M:%S', +    '%d-%m-%Y %H:%M',  ])  DATE_FORMATS_MONTH_FIRST = list(DATE_FORMATS) @@ -2966,10 +2984,22 @@ class YoutubeDLRedirectHandler(compat_urllib_request.HTTPRedirectHandler):  def extract_timezone(date_str):      m = re.search( -        r'^.{8,}?(?P<tz>Z$| ?(?P<sign>\+|-)(?P<hours>[0-9]{2}):?(?P<minutes>[0-9]{2})$)', -        date_str) +        r'''(?x) +            ^.{8,}?                                              # >=8 char non-TZ prefix, if present +            (?P<tz>Z|                                            # just the UTC Z, or +                (?:(?<=.\b\d{4}|\b\d{2}:\d\d)|                   # preceded by 4 digits or hh:mm or +                   (?<!.\b[a-zA-Z]{3}|[a-zA-Z]{4}|..\b\d\d))     # not preceded by 3 alpha word or >= 4 alpha or 2 digits +                   [ ]?                                          # optional space +                (?P<sign>\+|-)                                   # +/- +                (?P<hours>[0-9]{2}):?(?P<minutes>[0-9]{2})       # hh[:]mm +            $) +        ''', date_str)      if not m: -        timezone = datetime.timedelta() +        m = re.search(r'\d{1,2}:\d{1,2}(?:\.\d+)?(?P<tz>\s*[A-Z]+)$', date_str) +        timezone = TIMEZONE_NAMES.get(m and m.group('tz').strip()) +        if timezone is not None: +            date_str = date_str[:-len(m.group('tz'))] +        timezone = datetime.timedelta(hours=timezone or 0)      else:          date_str = date_str[:-len(m.group('tz'))]          if not m.group('sign'): @@ -3037,7 +3067,8 @@ def unified_timestamp(date_str, day_first=True):      if date_str is None:          return None -    date_str = re.sub(r'[,|]', '', date_str) +    date_str = re.sub(r'\s+', ' ', re.sub( +        r'(?i)[,|]|(mon|tues?|wed(nes)?|thu(rs)?|fri|sat(ur)?)(day)?', '', date_str))      pm_delta = 12 if re.search(r'(?i)PM', date_str) else 0      timezone, date_str = extract_timezone(date_str) @@ -3063,7 +3094,7 @@ def unified_timestamp(date_str, day_first=True):              pass      timetuple = email.utils.parsedate_tz(date_str)      if timetuple: -        return calendar.timegm(timetuple) + pm_delta * 3600 +        return calendar.timegm(timetuple) + pm_delta * 3600 - timezone.total_seconds()  def determine_ext(url, default_ext='unknown_video'): @@ -3673,13 +3704,11 @@ def int_or_none(v, scale=1, default=None, get_attr=None, invscale=1):      if get_attr:          if v is not None:              v = getattr(v, get_attr, None) -    if v == '': -        v = None -    if v is None: +    if v in (None, ''):          return default      try:          return int(v) * invscale // scale -    except (ValueError, TypeError): +    except (ValueError, TypeError, OverflowError):          return default | 
