diff options
Diffstat (limited to 'youtube_dl/utils.py')
-rw-r--r-- | youtube_dl/utils.py | 34 |
1 files changed, 32 insertions, 2 deletions
diff --git a/youtube_dl/utils.py b/youtube_dl/utils.py index daf94abd1..3536a5bd6 100644 --- a/youtube_dl/utils.py +++ b/youtube_dl/utils.py @@ -399,8 +399,9 @@ def formatSeconds(secs): def make_HTTPS_handler(params, **kwargs): opts_no_check_certificate = params.get('nocheckcertificate', False) if hasattr(ssl, 'create_default_context'): # Python >= 3.4 or 2.7.9 - context = ssl.create_default_context(ssl.Purpose.CLIENT_AUTH) + context = ssl.create_default_context(ssl.Purpose.SERVER_AUTH) if opts_no_check_certificate: + context.check_hostname = False context.verify_mode = ssl.CERT_NONE try: return YoutubeDLHTTPSHandler(params, context=context, **kwargs) @@ -723,6 +724,7 @@ def unified_strdate(date_str, day_first=True): '%b %dst %Y %I:%M%p', '%b %dnd %Y %I:%M%p', '%b %dth %Y %I:%M%p', + '%Y %m %d', '%Y-%m-%d', '%Y/%m/%d', '%Y/%m/%d %H:%M:%S', @@ -1277,7 +1279,7 @@ def parse_duration(s): s = s.strip() m = re.match( - r'''(?ix)T? + r'''(?ix)(?:P?T)? (?: (?P<only_mins>[0-9.]+)\s*(?:mins?|minutes?)\s*| (?P<only_hours>[0-9.]+)\s*(?:hours?)| @@ -1612,6 +1614,14 @@ def urlhandle_detect_ext(url_handle): except AttributeError: # Python < 3 getheader = url_handle.info().getheader + cd = getheader('Content-Disposition') + if cd: + m = re.match(r'attachment;\s*filename="(?P<filename>[^"]+)"', cd) + if m: + e = determine_ext(m.group('filename'), default_ext=None) + if e: + return e + return getheader('Content-Type').split("/")[1] @@ -1623,3 +1633,23 @@ def age_restricted(content_limit, age_limit): if content_limit is None: return False # Content available for everyone return age_limit < content_limit + + +def is_html(first_bytes): + """ Detect whether a file contains HTML by examining its first bytes. """ + + BOMS = [ + (b'\xef\xbb\xbf', 'utf-8'), + (b'\x00\x00\xfe\xff', 'utf-32-be'), + (b'\xff\xfe\x00\x00', 'utf-32-le'), + (b'\xff\xfe', 'utf-16-le'), + (b'\xfe\xff', 'utf-16-be'), + ] + for bom, enc in BOMS: + if first_bytes.startswith(bom): + s = first_bytes[len(bom):].decode(enc, 'replace') + break + else: + s = first_bytes.decode('utf-8', 'replace') + + return re.match(r'^\s*<', s) |