diff options
| author | Philipp Hagemeister <phihag@phihag.de> | 2014-02-25 01:43:17 +0100 | 
|---|---|---|
| committer | Philipp Hagemeister <phihag@phihag.de> | 2014-02-25 01:43:17 +0100 | 
| commit | 62e609ab771140b185e98ed085445d40b751cbfc (patch) | |
| tree | 9daee9760c5d77d8acc9bdef877ac8fbbeb07488 | |
| parent | f6acbdecf483513166287a41cba5eed928404dc2 (diff) | |
Ignore BOM in batch files (Fixes #2450)
| -rw-r--r-- | test/test_utils.py | 11 | ||||
| -rw-r--r-- | youtube_dl/__init__.py | 13 | ||||
| -rw-r--r-- | youtube_dl/utils.py | 17 | 
3 files changed, 34 insertions, 7 deletions
| diff --git a/test/test_utils.py b/test/test_utils.py index 84553b943..4e3c37fb4 100644 --- a/test/test_utils.py +++ b/test/test_utils.py @@ -9,6 +9,7 @@ sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__))))  # Various small unit tests +import io  import xml.etree.ElementTree  #from youtube_dl.utils import htmlentity_transform @@ -21,6 +22,7 @@ from youtube_dl.utils import (      orderedSet,      PagedList,      parse_duration, +    read_batch_urls,      sanitize_filename,      shell_quote,      smuggle_url, @@ -250,5 +252,14 @@ class TestUtil(unittest.TestCase):      def test_struct_unpack(self):          self.assertEqual(struct_unpack(u'!B', b'\x00'), (0,)) +    def test_read_batch_urls(self): +        f = io.StringIO(u'''\xef\xbb\xbf foo +            bar\r +            baz +            # More after this line\r +            ; or after this +            bam''') +        self.assertEqual(read_batch_urls(f), [u'foo', u'bar', u'baz', u'bam']) +  if __name__ == '__main__':      unittest.main() diff --git a/youtube_dl/__init__.py b/youtube_dl/__init__.py index 84f29a1a5..2aaafd37a 100644 --- a/youtube_dl/__init__.py +++ b/youtube_dl/__init__.py @@ -71,6 +71,7 @@ from .utils import (      get_cachedir,      MaxDownloadsReached,      preferredencoding, +    read_batch_urls,      SameFileError,      setproctitle,      std_headers, @@ -552,21 +553,19 @@ def _real_main(argv=None):          sys.exit(0)      # Batch file verification -    batchurls = [] +    batch_urls = []      if opts.batchfile is not None:          try:              if opts.batchfile == '-':                  batchfd = sys.stdin              else: -                batchfd = open(opts.batchfile, 'r') -            batchurls = batchfd.readlines() -            batchurls = [x.strip() for x in batchurls] -            batchurls = [x for x in batchurls if len(x) > 0 and not re.search(r'^[#/;]', x)] +                batchfd = io.open(opts.batchfile, 'r', encoding='utf-8', errors='ignore') +            batch_urls = read_batch_urls(batchfd)              if opts.verbose: -                write_string(u'[debug] Batch file urls: ' + repr(batchurls) + u'\n') +                write_string(u'[debug] Batch file urls: ' + repr(batch_urls) + u'\n')          except IOError:              sys.exit(u'ERROR: batch file could not be read') -    all_urls = batchurls + args +    all_urls = batch_urls + args      all_urls = [url.strip() for url in all_urls]      _enc = preferredencoding()      all_urls = [url.decode(_enc, 'ignore') if isinstance(url, bytes) else url for url in all_urls] diff --git a/youtube_dl/utils.py b/youtube_dl/utils.py index 25e40a837..0c482631a 100644 --- a/youtube_dl/utils.py +++ b/youtube_dl/utils.py @@ -1,6 +1,7 @@  #!/usr/bin/env python  # -*- coding: utf-8 -*- +import contextlib  import ctypes  import datetime  import email.utils @@ -1245,3 +1246,19 @@ except TypeError:  else:      struct_pack = struct.pack      struct_unpack = struct.unpack + + +def read_batch_urls(batch_fd): +    def fixup(url): +        if not isinstance(url, compat_str): +            url = url.decode('utf-8', 'replace') +        BOM_UTF8 = u'\xef\xbb\xbf' +        if url.startswith(BOM_UTF8): +            url = url[len(BOM_UTF8):] +        url = url.strip() +        if url.startswith(('#', ';', ']')): +            return False +        return url + +    with contextlib.closing(batch_fd) as fd: +        return [url for url in map(fixup, fd) if url] | 
