aboutsummaryrefslogtreecommitdiff
path: root/youtube_dl/YoutubeDL.py
diff options
context:
space:
mode:
Diffstat (limited to 'youtube_dl/YoutubeDL.py')
-rwxr-xr-xyoutube_dl/YoutubeDL.py287
1 files changed, 222 insertions, 65 deletions
diff --git a/youtube_dl/YoutubeDL.py b/youtube_dl/YoutubeDL.py
index 4a9610355..4cc3ec2fb 100755
--- a/youtube_dl/YoutubeDL.py
+++ b/youtube_dl/YoutubeDL.py
@@ -7,6 +7,7 @@ import collections
import datetime
import errno
import io
+import itertools
import json
import locale
import os
@@ -22,12 +23,16 @@ import traceback
if os.name == 'nt':
import ctypes
-from .utils import (
+from .compat import (
compat_cookiejar,
+ compat_expanduser,
compat_http_client,
+ compat_kwargs,
compat_str,
compat_urllib_error,
compat_urllib_request,
+)
+from .utils import (
escape_url,
ContentTooShortError,
date_from_str,
@@ -57,11 +62,19 @@ from .utils import (
write_string,
YoutubeDLHandler,
prepend_extension,
+ args_to_str,
+ age_restricted,
)
from .cache import Cache
from .extractor import get_info_extractor, gen_extractors
from .downloader import get_suitable_downloader
-from .postprocessor import FFmpegMergerPP
+from .downloader.rtmp import rtmpdump_version
+from .postprocessor import (
+ FFmpegFixupStretchedPP,
+ FFmpegMergerPP,
+ FFmpegPostProcessor,
+ get_postprocessor,
+)
from .version import __version__
@@ -107,8 +120,10 @@ class YoutubeDL(object):
forcefilename: Force printing final filename.
forceduration: Force printing duration.
forcejson: Force printing info_dict as JSON.
+ dump_single_json: Force printing the info_dict of the whole playlist
+ (or video) as a single JSON line.
simulate: Do not download the video files.
- format: Video format code.
+ format: Video format code. See options.py for more information.
format_limit: Highest quality format to try.
outtmpl: Template for output names.
restrictfilenames: Do not allow "&" and spaces in file names
@@ -116,6 +131,7 @@ class YoutubeDL(object):
nooverwrites: Prevent overwriting files.
playliststart: Playlist item to start at.
playlistend: Playlist item to end at.
+ playlistreverse: Download playlist items in reverse order.
matchtitle: Download only matching titles.
rejecttitle: Reject downloads for matching titles.
logger: Log messages to a logging.Logger instance.
@@ -165,6 +181,37 @@ class YoutubeDL(object):
'auto' for elaborate guessing
encoding: Use this encoding instead of the system-specified.
extract_flat: Do not resolve URLs, return the immediate result.
+ Pass in 'in_playlist' to only show this behavior for
+ playlist items.
+ postprocessors: A list of dictionaries, each with an entry
+ * key: The name of the postprocessor. See
+ youtube_dl/postprocessor/__init__.py for a list.
+ as well as any further keyword arguments for the
+ postprocessor.
+ progress_hooks: A list of functions that get called on download
+ progress, with a dictionary with the entries
+ * filename: The final filename
+ * status: One of "downloading" and "finished"
+
+ The dict may also have some of the following entries:
+
+ * downloaded_bytes: Bytes on disk
+ * total_bytes: Size of the whole file, None if unknown
+ * tmpfilename: The filename we're currently writing to
+ * eta: The estimated time in seconds, None if unknown
+ * speed: The download speed in bytes/second, None if
+ unknown
+
+ Progress hooks are guaranteed to be called at least once
+ (with status "finished") if the download is successful.
+ merge_output_format: Extension to use when merging formats.
+ fixup: Automatically correct known faults of the file.
+ One of:
+ - "never": do nothing
+ - "warn": only emit a warning
+ - "detect_or_warn": check whether we can do anything
+ about it, warn otherwise
+
The following parameters are not used by YoutubeDL itself, they are used by
the FileDownloader:
@@ -184,7 +231,7 @@ class YoutubeDL(object):
_num_downloads = None
_screen_file = None
- def __init__(self, params=None):
+ def __init__(self, params=None, auto_init=True):
"""Create a FileDownloader object with the given options."""
if params is None:
params = {}
@@ -228,11 +275,11 @@ class YoutubeDL(object):
if (sys.version_info >= (3,) and sys.platform != 'win32' and
sys.getfilesystemencoding() in ['ascii', 'ANSI_X3.4-1968']
- and not params['restrictfilenames']):
+ and not params.get('restrictfilenames', False)):
# On Python 3, the Unicode filesystem API will throw errors (#1474)
self.report_warning(
'Assuming --restrict-filenames since file system encoding '
- 'cannot encode all charactes. '
+ 'cannot encode all characters. '
'Set the LC_ALL environment variable to fix this.')
self.params['restrictfilenames'] = True
@@ -241,6 +288,36 @@ class YoutubeDL(object):
self._setup_opener()
+ if auto_init:
+ self.print_debug_header()
+ self.add_default_info_extractors()
+
+ for pp_def_raw in self.params.get('postprocessors', []):
+ pp_class = get_postprocessor(pp_def_raw['key'])
+ pp_def = dict(pp_def_raw)
+ del pp_def['key']
+ pp = pp_class(self, **compat_kwargs(pp_def))
+ self.add_post_processor(pp)
+
+ for ph in self.params.get('progress_hooks', []):
+ self.add_progress_hook(ph)
+
+ def warn_if_short_id(self, argv):
+ # short YouTube ID starting with dash?
+ idxs = [
+ i for i, a in enumerate(argv)
+ if re.match(r'^-[0-9A-Za-z_-]{10}$', a)]
+ if idxs:
+ correct_argv = (
+ ['youtube-dl'] +
+ [a for i, a in enumerate(argv) if i not in idxs] +
+ ['--'] + [argv[i] for i in idxs]
+ )
+ self.report_warning(
+ 'Long argument string detected. '
+ 'Use -- to separate parameters and URLs, like this:\n%s\n' %
+ args_to_str(correct_argv))
+
def add_info_extractor(self, ie):
"""Add an InfoExtractor object to the end of the list."""
self._ies.append(ie)
@@ -285,7 +362,7 @@ class YoutubeDL(object):
self._output_process.stdin.write((message + '\n').encode('utf-8'))
self._output_process.stdin.flush()
res = ''.join(self._output_channel.readline().decode('utf-8')
- for _ in range(line_count))
+ for _ in range(line_count))
return res[:-len('\n')]
def to_screen(self, message, skip_eol=False):
@@ -447,7 +524,7 @@ class YoutubeDL(object):
template_dict = collections.defaultdict(lambda: 'NA', template_dict)
outtmpl = self.params.get('outtmpl', DEFAULT_OUTTMPL)
- tmpl = os.path.expanduser(outtmpl)
+ tmpl = compat_expanduser(outtmpl)
filename = tmpl % template_dict
return filename
except ValueError as err:
@@ -482,13 +559,8 @@ class YoutubeDL(object):
max_views = self.params.get('max_views')
if max_views is not None and view_count > max_views:
return 'Skipping %s, because it has exceeded the maximum view count (%d/%d)' % (video_title, view_count, max_views)
- age_limit = self.params.get('age_limit')
- if age_limit is not None:
- actual_age_limit = info_dict.get('age_limit')
- if actual_age_limit is None:
- actual_age_limit = 0
- if age_limit < actual_age_limit:
- return 'Skipping "' + title + '" because it is age restricted'
+ if age_restricted(info_dict.get('age_limit'), self.params.get('age_limit')):
+ return 'Skipping "%s" because it is age restricted' % title
if self.in_download_archive(info_dict):
return '%s has already been recorded in archive' % video_title
return None
@@ -522,7 +594,7 @@ class YoutubeDL(object):
try:
ie_result = ie.extract(url)
- if ie_result is None: # Finished already (backwards compatibility; listformats and friends should be moved here)
+ if ie_result is None: # Finished already (backwards compatibility; listformats and friends should be moved here)
break
if isinstance(ie_result, list):
# Backwards compatibility: old IE result format
@@ -535,7 +607,7 @@ class YoutubeDL(object):
return self.process_ie_result(ie_result, download, extra_info)
else:
return ie_result
- except ExtractorError as de: # An error we somewhat expected
+ except ExtractorError as de: # An error we somewhat expected
self.report_error(compat_str(de), de.format_traceback())
break
except MaxDownloadsReached:
@@ -568,8 +640,12 @@ class YoutubeDL(object):
result_type = ie_result.get('_type', 'video')
- if self.params.get('extract_flat', False):
- if result_type in ('url', 'url_transparent'):
+ if result_type in ('url', 'url_transparent'):
+ extract_flat = self.params.get('extract_flat', False)
+ if ((extract_flat == 'in_playlist' and 'playlist' in extra_info) or
+ extract_flat is True):
+ if self.params.get('forcejson', False):
+ self.to_stdout(json.dumps(ie_result))
return ie_result
if result_type == 'video':
@@ -588,27 +664,19 @@ class YoutubeDL(object):
ie_result['url'], ie_key=ie_result.get('ie_key'),
extra_info=extra_info, download=False, process=False)
- def make_result(embedded_info):
- new_result = ie_result.copy()
- for f in ('_type', 'url', 'ext', 'player_url', 'formats',
- 'entries', 'ie_key', 'duration',
- 'subtitles', 'annotations', 'format',
- 'thumbnail', 'thumbnails'):
- if f in new_result:
- del new_result[f]
- if f in embedded_info:
- new_result[f] = embedded_info[f]
- return new_result
- new_result = make_result(info)
+ force_properties = dict(
+ (k, v) for k, v in ie_result.items() if v is not None)
+ for f in ('_type', 'url'):
+ if f in force_properties:
+ del force_properties[f]
+ new_result = info.copy()
+ new_result.update(force_properties)
assert new_result.get('_type') != 'url_transparent'
- if new_result.get('_type') == 'compat_list':
- new_result['entries'] = [
- make_result(e) for e in new_result['entries']]
return self.process_ie_result(
new_result, download=download, extra_info=extra_info)
- elif result_type == 'playlist':
+ elif result_type == 'playlist' or result_type == 'multi_video':
# We process each entry in the playlist
playlist = ie_result.get('title', None) or ie_result.get('id', None)
self.to_screen('[download] Downloading playlist: %s' % playlist)
@@ -621,27 +689,39 @@ class YoutubeDL(object):
if playlistend == -1:
playlistend = None
- if isinstance(ie_result['entries'], list):
- n_all_entries = len(ie_result['entries'])
- entries = ie_result['entries'][playliststart:playlistend]
+ ie_entries = ie_result['entries']
+ if isinstance(ie_entries, list):
+ n_all_entries = len(ie_entries)
+ entries = ie_entries[playliststart:playlistend]
n_entries = len(entries)
self.to_screen(
"[%s] playlist %s: Collected %d video ids (downloading %d of them)" %
(ie_result['extractor'], playlist, n_all_entries, n_entries))
- else:
- assert isinstance(ie_result['entries'], PagedList)
- entries = ie_result['entries'].getslice(
+ elif isinstance(ie_entries, PagedList):
+ entries = ie_entries.getslice(
playliststart, playlistend)
n_entries = len(entries)
self.to_screen(
"[%s] playlist %s: Downloading %d videos" %
(ie_result['extractor'], playlist, n_entries))
+ else: # iterable
+ entries = list(itertools.islice(
+ ie_entries, playliststart, playlistend))
+ n_entries = len(entries)
+ self.to_screen(
+ "[%s] playlist %s: Downloading %d videos" %
+ (ie_result['extractor'], playlist, n_entries))
+
+ if self.params.get('playlistreverse', False):
+ entries = entries[::-1]
for i, entry in enumerate(entries, 1):
- self.to_screen('[download] Downloading video #%s of %s' % (i, n_entries))
+ self.to_screen('[download] Downloading video %s of %s' % (i, n_entries))
extra = {
'n_entries': n_entries,
'playlist': playlist,
+ 'playlist_id': ie_result.get('id'),
+ 'playlist_title': ie_result.get('title'),
'playlist_index': i + playliststart,
'extractor': ie_result['extractor'],
'webpage_url': ie_result['webpage_url'],
@@ -661,14 +741,20 @@ class YoutubeDL(object):
ie_result['entries'] = playlist_results
return ie_result
elif result_type == 'compat_list':
+ self.report_warning(
+ 'Extractor %s returned a compat_list result. '
+ 'It needs to be updated.' % ie_result.get('extractor'))
+
def _fixup(r):
- self.add_extra_info(r,
+ self.add_extra_info(
+ r,
{
'extractor': ie_result['extractor'],
'webpage_url': ie_result['webpage_url'],
'webpage_url_basename': url_basename(ie_result['webpage_url']),
'extractor_key': ie_result['extractor_key'],
- })
+ }
+ )
return r
ie_result['entries'] = [
self.process_ie_result(_fixup(r), download, extra_info)
@@ -708,7 +794,7 @@ class YoutubeDL(object):
if video_formats:
return video_formats[0]
else:
- extensions = ['mp4', 'flv', 'webm', '3gp', 'm4a']
+ extensions = ['mp4', 'flv', 'webm', '3gp', 'm4a', 'mp3', 'ogg', 'aac', 'wav']
if format_spec in extensions:
filter_f = lambda f: f['ext'] == format_spec
else:
@@ -746,6 +832,10 @@ class YoutubeDL(object):
info_dict['display_id'] = info_dict['id']
if info_dict.get('upload_date') is None and info_dict.get('timestamp') is not None:
+ # Working around negative timestamps in Windows
+ # (see http://bugs.python.org/issue1646728)
+ if info_dict['timestamp'] < 0 and os.name == 'nt':
+ info_dict['timestamp'] = 0
upload_date = datetime.datetime.utcfromtimestamp(
info_dict['timestamp'])
info_dict['upload_date'] = upload_date.strftime('%Y%m%d')
@@ -818,12 +908,33 @@ class YoutubeDL(object):
# Two formats have been requested like '137+139'
format_1, format_2 = rf.split('+')
formats_info = (self.select_format(format_1, formats),
- self.select_format(format_2, formats))
+ self.select_format(format_2, formats))
if all(formats_info):
+ # The first format must contain the video and the
+ # second the audio
+ if formats_info[0].get('vcodec') == 'none':
+ self.report_error('The first format must '
+ 'contain the video, try using '
+ '"-f %s+%s"' % (format_2, format_1))
+ return
+ output_ext = (
+ formats_info[0]['ext']
+ if self.params.get('merge_output_format') is None
+ else self.params['merge_output_format'])
selected_format = {
'requested_formats': formats_info,
'format': rf,
'ext': formats_info[0]['ext'],
+ 'width': formats_info[0].get('width'),
+ 'height': formats_info[0].get('height'),
+ 'resolution': formats_info[0].get('resolution'),
+ 'fps': formats_info[0].get('fps'),
+ 'vcodec': formats_info[0].get('vcodec'),
+ 'vbr': formats_info[0].get('vbr'),
+ 'stretched_ratio': formats_info[0].get('stretched_ratio'),
+ 'acodec': formats_info[1].get('acodec'),
+ 'abr': formats_info[1].get('abr'),
+ 'ext': output_ext,
}
else:
selected_format = None
@@ -882,8 +993,12 @@ class YoutubeDL(object):
if self.params.get('forceid', False):
self.to_stdout(info_dict['id'])
if self.params.get('forceurl', False):
- # For RTMP URLs, also include the playpath
- self.to_stdout(info_dict['url'] + info_dict.get('play_path', ''))
+ if info_dict.get('requested_formats') is not None:
+ for f in info_dict['requested_formats']:
+ self.to_stdout(f['url'] + f.get('play_path', ''))
+ else:
+ # For RTMP URLs, also include the playpath
+ self.to_stdout(info_dict['url'] + info_dict.get('play_path', ''))
if self.params.get('forcethumbnail', False) and info_dict.get('thumbnail') is not None:
self.to_stdout(info_dict['thumbnail'])
if self.params.get('forcedescription', False) and info_dict.get('description') is not None:
@@ -897,6 +1012,8 @@ class YoutubeDL(object):
if self.params.get('forcejson', False):
info_dict['_filename'] = filename
self.to_stdout(json.dumps(info_dict))
+ if self.params.get('dump_single_json', False):
+ info_dict['_filename'] = filename
# Do nothing else if in simulate mode
if self.params.get('simulate', False):
@@ -917,13 +1034,13 @@ class YoutubeDL(object):
descfn = filename + '.description'
if self.params.get('nooverwrites', False) and os.path.exists(encodeFilename(descfn)):
self.to_screen('[info] Video description is already present')
+ elif info_dict.get('description') is None:
+ self.report_warning('There\'s no description to write.')
else:
try:
self.to_screen('[info] Writing video description to: ' + descfn)
with io.open(encodeFilename(descfn), 'w', encoding='utf-8') as descfile:
descfile.write(info_dict['description'])
- except (KeyError, TypeError):
- self.report_warning('There\'s no description to write.')
except (OSError, IOError):
self.report_error('Cannot write description file ' + descfn)
return
@@ -962,7 +1079,7 @@ class YoutubeDL(object):
else:
self.to_screen('[info] Writing video subtitles to: ' + sub_filename)
with io.open(encodeFilename(sub_filename), 'w', encoding='utf-8') as subfile:
- subfile.write(sub)
+ subfile.write(sub)
except (OSError, IOError):
self.report_error('Cannot write subtitles file ' + sub_filename)
return
@@ -974,7 +1091,7 @@ class YoutubeDL(object):
else:
self.to_screen('[info] Writing video description metadata as JSON to: ' + infofn)
try:
- write_json_file(info_dict, encodeFilename(infofn))
+ write_json_file(info_dict, infofn)
except (OSError, IOError):
self.report_error('Cannot write metadata to JSON file ' + infofn)
return
@@ -994,10 +1111,10 @@ class YoutubeDL(object):
with open(thumb_filename, 'wb') as thumbf:
shutil.copyfileobj(uf, thumbf)
self.to_screen('[%s] %s: Writing thumbnail to: %s' %
- (info_dict['extractor'], info_dict['id'], thumb_filename))
+ (info_dict['extractor'], info_dict['id'], thumb_filename))
except (compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error) as err:
self.report_warning('Unable to download thumbnail "%s": %s' %
- (info_dict['thumbnail'], compat_str(err)))
+ (info_dict['thumbnail'], compat_str(err)))
if not self.params.get('skip_download', False):
if self.params.get('nooverwrites', False) and os.path.exists(encodeFilename(filename)):
@@ -1015,11 +1132,11 @@ class YoutubeDL(object):
downloaded = []
success = True
merger = FFmpegMergerPP(self, not self.params.get('keepvideo'))
- if not merger._get_executable():
+ if not merger._executable:
postprocessors = []
self.report_warning('You have requested multiple '
- 'formats but ffmpeg or avconv are not installed.'
- ' The formats won\'t be merged')
+ 'formats but ffmpeg or avconv are not installed.'
+ ' The formats won\'t be merged')
else:
postprocessors = [merger]
for f in info_dict['requested_formats']:
@@ -1045,13 +1162,33 @@ class YoutubeDL(object):
return
if success:
+ # Fixup content
+ stretched_ratio = info_dict.get('stretched_ratio')
+ if stretched_ratio is not None and stretched_ratio != 1:
+ fixup_policy = self.params.get('fixup')
+ if fixup_policy is None:
+ fixup_policy = 'detect_or_warn'
+ if fixup_policy == 'warn':
+ self.report_warning('%s: Non-uniform pixel ratio (%s)' % (
+ info_dict['id'], stretched_ratio))
+ elif fixup_policy == 'detect_or_warn':
+ stretched_pp = FFmpegFixupStretchedPP(self)
+ if stretched_pp.available:
+ info_dict.setdefault('__postprocessors', [])
+ info_dict['__postprocessors'].append(stretched_pp)
+ else:
+ self.report_warning(
+ '%s: Non-uniform pixel ratio (%s). Install ffmpeg or avconv to fix this automatically.' % (
+ info_dict['id'], stretched_ratio))
+ else:
+ assert fixup_policy == 'ignore'
+
try:
self.post_process(filename, info_dict)
except (PostProcessingError) as err:
self.report_error('postprocessing: %s' % str(err))
return
-
- self.record_download_archive(info_dict)
+ self.record_download_archive(info_dict)
def download(self, url_list):
"""Download a given list of URLs."""
@@ -1063,13 +1200,16 @@ class YoutubeDL(object):
for url in url_list:
try:
- #It also downloads the videos
- self.extract_info(url)
+ # It also downloads the videos
+ res = self.extract_info(url)
except UnavailableVideoError:
self.report_error('unable to download video')
except MaxDownloadsReached:
self.to_screen('[info] Maximum number of downloaded files reached.')
raise
+ else:
+ if self.params.get('dump_single_json', False):
+ self.to_stdout(json.dumps(res))
return self._download_retcode
@@ -1193,6 +1333,8 @@ class YoutubeDL(object):
res += 'video@'
if fdict.get('vbr') is not None:
res += '%4dk' % fdict['vbr']
+ if fdict.get('fps') is not None:
+ res += ', %sfps' % fdict['fps']
if fdict.get('acodec') is not None:
if res:
res += ', '
@@ -1230,7 +1372,9 @@ class YoutubeDL(object):
formats = info_dict.get('formats', [info_dict])
idlen = max(len('format code'),
max(len(f['format_id']) for f in formats))
- formats_s = [line(f, idlen) for f in formats]
+ formats_s = [
+ line(f, idlen) for f in formats
+ if f.get('preference') is None or f['preference'] >= -1000]
if len(formats) > 1:
formats_s[0] += (' ' if self._format_note(formats[0]) else '') + '(worst)'
formats_s[-1] += (' ' if self._format_note(formats[-1]) else '') + '(best)'
@@ -1274,11 +1418,13 @@ class YoutubeDL(object):
self.report_warning(
'Your Python is broken! Update to a newer and supported version')
+ stdout_encoding = getattr(
+ sys.stdout, 'encoding', 'missing (%s)' % type(sys.stdout).__name__)
encoding_str = (
'[debug] Encodings: locale %s, fs %s, out %s, pref %s\n' % (
locale.getpreferredencoding(),
sys.getfilesystemencoding(),
- sys.stdout.encoding,
+ stdout_encoding,
self.get_encoding()))
write_string(encoding_str, encoding=None)
@@ -1297,8 +1443,19 @@ class YoutubeDL(object):
sys.exc_clear()
except:
pass
- self._write_string('[debug] Python version %s - %s' %
- (platform.python_version(), platform_name()) + '\n')
+ self._write_string('[debug] Python version %s - %s\n' % (
+ platform.python_version(), platform_name()))
+
+ exe_versions = FFmpegPostProcessor.get_versions()
+ exe_versions['rtmpdump'] = rtmpdump_version()
+ exe_str = ', '.join(
+ '%s %s' % (exe, v)
+ for exe, v in sorted(exe_versions.items())
+ if v
+ )
+ if not exe_str:
+ exe_str = 'none'
+ self._write_string('[debug] exe versions: %s\n' % exe_str)
proxy_map = {}
for handler in self._opener.handlers: