diff options
Diffstat (limited to 'youtube_dl')
-rw-r--r-- | youtube_dl/__init__.py | 13 | ||||
-rw-r--r-- | youtube_dl/utils.py | 17 |
2 files changed, 23 insertions, 7 deletions
diff --git a/youtube_dl/__init__.py b/youtube_dl/__init__.py index 84f29a1a5..2aaafd37a 100644 --- a/youtube_dl/__init__.py +++ b/youtube_dl/__init__.py @@ -71,6 +71,7 @@ from .utils import ( get_cachedir, MaxDownloadsReached, preferredencoding, + read_batch_urls, SameFileError, setproctitle, std_headers, @@ -552,21 +553,19 @@ def _real_main(argv=None): sys.exit(0) # Batch file verification - batchurls = [] + batch_urls = [] if opts.batchfile is not None: try: if opts.batchfile == '-': batchfd = sys.stdin else: - batchfd = open(opts.batchfile, 'r') - batchurls = batchfd.readlines() - batchurls = [x.strip() for x in batchurls] - batchurls = [x for x in batchurls if len(x) > 0 and not re.search(r'^[#/;]', x)] + batchfd = io.open(opts.batchfile, 'r', encoding='utf-8', errors='ignore') + batch_urls = read_batch_urls(batchfd) if opts.verbose: - write_string(u'[debug] Batch file urls: ' + repr(batchurls) + u'\n') + write_string(u'[debug] Batch file urls: ' + repr(batch_urls) + u'\n') except IOError: sys.exit(u'ERROR: batch file could not be read') - all_urls = batchurls + args + all_urls = batch_urls + args all_urls = [url.strip() for url in all_urls] _enc = preferredencoding() all_urls = [url.decode(_enc, 'ignore') if isinstance(url, bytes) else url for url in all_urls] diff --git a/youtube_dl/utils.py b/youtube_dl/utils.py index 25e40a837..0c482631a 100644 --- a/youtube_dl/utils.py +++ b/youtube_dl/utils.py @@ -1,6 +1,7 @@ #!/usr/bin/env python # -*- coding: utf-8 -*- +import contextlib import ctypes import datetime import email.utils @@ -1245,3 +1246,19 @@ except TypeError: else: struct_pack = struct.pack struct_unpack = struct.unpack + + +def read_batch_urls(batch_fd): + def fixup(url): + if not isinstance(url, compat_str): + url = url.decode('utf-8', 'replace') + BOM_UTF8 = u'\xef\xbb\xbf' + if url.startswith(BOM_UTF8): + url = url[len(BOM_UTF8):] + url = url.strip() + if url.startswith(('#', ';', ']')): + return False + return url + + with contextlib.closing(batch_fd) as fd: + return [url for url in map(fixup, fd) if url] |