diff options
| author | Sergey M․ <dstftw@gmail.com> | 2018-02-19 22:50:23 +0700 | 
|---|---|---|
| committer | Sergey M․ <dstftw@gmail.com> | 2018-02-19 22:50:23 +0700 | 
| commit | befa4708fd2165b85d04002c3845adf191d34302 (patch) | |
| tree | be0eaae66b9f4aa11100d12c6c0c6a1e0d18851b | |
| parent | 90830004c893e2d5f0643c05af064cfc7a3b579e (diff) | |
[utils] Fixup some common URL's typos in sanitize_url (closes #15649)
| -rw-r--r-- | test/test_utils.py | 7 | ||||
| -rw-r--r-- | youtube_dl/utils.py | 18 | 
2 files changed, 22 insertions, 3 deletions
diff --git a/test/test_utils.py b/test/test_utils.py index fdf6031f7..d8d257d1d 100644 --- a/test/test_utils.py +++ b/test/test_utils.py @@ -57,6 +57,7 @@ from youtube_dl.utils import (      read_batch_urls,      sanitize_filename,      sanitize_path, +    sanitize_url,      expand_path,      prepend_extension,      replace_extension, @@ -219,6 +220,12 @@ class TestUtil(unittest.TestCase):          self.assertEqual(sanitize_path('./abc'), 'abc')          self.assertEqual(sanitize_path('./../abc'), '..\\abc') +    def test_sanitize_url(self): +        self.assertEqual(sanitize_url('//foo.bar'), 'http://foo.bar') +        self.assertEqual(sanitize_url('httpss://foo.bar'), 'https://foo.bar') +        self.assertEqual(sanitize_url('rmtps://foo.bar'), 'rtmps://foo.bar') +        self.assertEqual(sanitize_url('https://foo.bar'), 'https://foo.bar') +      def test_expand_path(self):          def env(var):              return '%{0}%'.format(var) if sys.platform == 'win32' else '${0}'.format(var) diff --git a/youtube_dl/utils.py b/youtube_dl/utils.py index 7f24cbb04..af639a124 100644 --- a/youtube_dl/utils.py +++ b/youtube_dl/utils.py @@ -538,10 +538,22 @@ def sanitize_path(s):      return os.path.join(*sanitized_path) -# Prepend protocol-less URLs with `http:` scheme in order to mitigate the number of -# unwanted failures due to missing protocol  def sanitize_url(url): -    return 'http:%s' % url if url.startswith('//') else url +    # Prepend protocol-less URLs with `http:` scheme in order to mitigate +    # the number of unwanted failures due to missing protocol +    if url.startswith('//'): +        return 'http:%s' % url +    # Fix some common typos seen so far +    COMMON_TYPOS = ( +        # https://github.com/rg3/youtube-dl/issues/15649 +        (r'^httpss://', r'https://'), +        # https://bx1.be/lives/direct-tv/ +        (r'^rmtp([es]?)://', r'rtmp\1://'), +    ) +    for mistake, fixup in COMMON_TYPOS: +        if re.match(mistake, url): +            return re.sub(mistake, fixup, url) +    return url  def sanitized_Request(url, *args, **kwargs):  | 
