aboutsummaryrefslogtreecommitdiff
path: root/youtube_dl/YoutubeDL.py
diff options
context:
space:
mode:
Diffstat (limited to 'youtube_dl/YoutubeDL.py')
-rwxr-xr-xyoutube_dl/YoutubeDL.py187
1 files changed, 154 insertions, 33 deletions
diff --git a/youtube_dl/YoutubeDL.py b/youtube_dl/YoutubeDL.py
index 54e732943..b772f87f1 100755
--- a/youtube_dl/YoutubeDL.py
+++ b/youtube_dl/YoutubeDL.py
@@ -54,8 +54,10 @@ from .utils import (
PostProcessingError,
platform_name,
preferredencoding,
+ render_table,
SameFileError,
sanitize_filename,
+ std_headers,
subtitles_filename,
takewhile_inclusive,
UnavailableVideoError,
@@ -135,6 +137,7 @@ class YoutubeDL(object):
nooverwrites: Prevent overwriting files.
playliststart: Playlist item to start at.
playlistend: Playlist item to end at.
+ playlist_items: Specific indices of playlist to download.
playlistreverse: Download playlist items in reverse order.
matchtitle: Download only matching titles.
rejecttitle: Reject downloads for matching titles.
@@ -144,6 +147,7 @@ class YoutubeDL(object):
writeinfojson: Write the video description to a .info.json file
writeannotations: Write the video annotations to a .annotations.xml file
writethumbnail: Write the thumbnail image to a file
+ write_all_thumbnails: Write all thumbnail formats to files
writesubtitles: Write the video subtitles to a file
writeautomaticsub: Write the automatic subtitles to a file
allsubtitles: Downloads all the subtitles of the video
@@ -194,11 +198,12 @@ class YoutubeDL(object):
postprocessor.
progress_hooks: A list of functions that get called on download
progress, with a dictionary with the entries
- * filename: The final filename
- * status: One of "downloading" and "finished"
-
- The dict may also have some of the following entries:
+ * status: One of "downloading" and "finished".
+ Check this first and ignore unknown values.
+ If status is one of "downloading" or "finished", the
+ following properties may also be present:
+ * filename: The final filename (always present)
* downloaded_bytes: Bytes on disk
* total_bytes: Size of the whole file, None if unknown
* tmpfilename: The filename we're currently writing to
@@ -220,12 +225,15 @@ class YoutubeDL(object):
youtube-dl servers for debugging.
sleep_interval: Number of seconds to sleep before each download.
external_downloader: Executable of the external downloader to call.
+ listformats: Print an overview of available video formats and exit.
+ list_thumbnails: Print a table of all thumbnails and exit.
The following parameters are not used by YoutubeDL itself, they are used by
the FileDownloader:
nopart, updatetime, buffersize, ratelimit, min_filesize, max_filesize, test,
- noresizebuffer, retries, continuedl, noprogress, consoletitle
+ noresizebuffer, retries, continuedl, noprogress, consoletitle,
+ xattr_set_filesize.
The following options are used by the post processors:
prefer_ffmpeg: If True, use ffmpeg instead of avconv if both are available,
@@ -698,24 +706,51 @@ class YoutubeDL(object):
if playlistend == -1:
playlistend = None
+ playlistitems_str = self.params.get('playlist_items', None)
+ playlistitems = None
+ if playlistitems_str is not None:
+ def iter_playlistitems(format):
+ for string_segment in format.split(','):
+ if '-' in string_segment:
+ start, end = string_segment.split('-')
+ for item in range(int(start), int(end) + 1):
+ yield int(item)
+ else:
+ yield int(string_segment)
+ playlistitems = iter_playlistitems(playlistitems_str)
+
ie_entries = ie_result['entries']
if isinstance(ie_entries, list):
n_all_entries = len(ie_entries)
- entries = ie_entries[playliststart:playlistend]
+ if playlistitems:
+ entries = [ie_entries[i - 1] for i in playlistitems]
+ else:
+ entries = ie_entries[playliststart:playlistend]
n_entries = len(entries)
self.to_screen(
"[%s] playlist %s: Collected %d video ids (downloading %d of them)" %
(ie_result['extractor'], playlist, n_all_entries, n_entries))
elif isinstance(ie_entries, PagedList):
- entries = ie_entries.getslice(
- playliststart, playlistend)
+ if playlistitems:
+ entries = []
+ for item in playlistitems:
+ entries.extend(ie_entries.getslice(
+ item - 1, item
+ ))
+ else:
+ entries = ie_entries.getslice(
+ playliststart, playlistend)
n_entries = len(entries)
self.to_screen(
"[%s] playlist %s: Downloading %d videos" %
(ie_result['extractor'], playlist, n_entries))
else: # iterable
- entries = list(itertools.islice(
- ie_entries, playliststart, playlistend))
+ if playlistitems:
+ entry_list = list(ie_entries)
+ entries = [entry_list[i - 1] for i in playlistitems]
+ else:
+ entries = list(itertools.islice(
+ ie_entries, playliststart, playlistend))
n_entries = len(entries)
self.to_screen(
"[%s] playlist %s: Downloading %d videos" %
@@ -865,6 +900,42 @@ class YoutubeDL(object):
return matches[-1]
return None
+ def _calc_headers(self, info_dict):
+ res = std_headers.copy()
+
+ add_headers = info_dict.get('http_headers')
+ if add_headers:
+ res.update(add_headers)
+
+ cookies = self._calc_cookies(info_dict)
+ if cookies:
+ res['Cookie'] = cookies
+
+ return res
+
+ def _calc_cookies(self, info_dict):
+ class _PseudoRequest(object):
+ def __init__(self, url):
+ self.url = url
+ self.headers = {}
+ self.unverifiable = False
+
+ def add_unredirected_header(self, k, v):
+ self.headers[k] = v
+
+ def get_full_url(self):
+ return self.url
+
+ def is_unverifiable(self):
+ return self.unverifiable
+
+ def has_header(self, h):
+ return h in self.headers
+
+ pr = _PseudoRequest(info_dict['url'])
+ self.cookiejar.add_cookie_header(pr)
+ return pr.headers.get('Cookie')
+
def process_video_result(self, info_dict, download=True):
assert info_dict.get('_type', 'video') == 'video'
@@ -879,9 +950,14 @@ class YoutubeDL(object):
info_dict['playlist_index'] = None
thumbnails = info_dict.get('thumbnails')
+ if thumbnails is None:
+ thumbnail = info_dict.get('thumbnail')
+ if thumbnail:
+ thumbnails = [{'url': thumbnail}]
if thumbnails:
thumbnails.sort(key=lambda t: (
- t.get('width'), t.get('height'), t.get('url')))
+ t.get('preference'), t.get('width'), t.get('height'),
+ t.get('id'), t.get('url')))
for t in thumbnails:
if 'width' in t and 'height' in t:
t['resolution'] = '%dx%d' % (t['width'], t['height'])
@@ -933,6 +1009,11 @@ class YoutubeDL(object):
# Automatically determine file extension if missing
if 'ext' not in format:
format['ext'] = determine_ext(format['url']).lower()
+ # Add HTTP headers, so that external programs can use them from the
+ # json output
+ full_format_info = info_dict.copy()
+ full_format_info.update(format)
+ format['http_headers'] = self._calc_headers(full_format_info)
format_limit = self.params.get('format_limit', None)
if format_limit:
@@ -948,9 +1029,12 @@ class YoutubeDL(object):
# element in the 'formats' field in info_dict is info_dict itself,
# wich can't be exported to json
info_dict['formats'] = formats
- if self.params.get('listformats', None):
+ if self.params.get('listformats'):
self.list_formats(info_dict)
return
+ if self.params.get('list_thumbnails'):
+ self.list_thumbnails(info_dict)
+ return
req_format = self.params.get('format')
if req_format is None:
@@ -1157,25 +1241,7 @@ class YoutubeDL(object):
self.report_error('Cannot write metadata to JSON file ' + infofn)
return
- if self.params.get('writethumbnail', False):
- if info_dict.get('thumbnail') is not None:
- thumb_format = determine_ext(info_dict['thumbnail'], 'jpg')
- thumb_filename = os.path.splitext(filename)[0] + '.' + thumb_format
- if self.params.get('nooverwrites', False) and os.path.exists(encodeFilename(thumb_filename)):
- self.to_screen('[%s] %s: Thumbnail is already present' %
- (info_dict['extractor'], info_dict['id']))
- else:
- self.to_screen('[%s] %s: Downloading thumbnail ...' %
- (info_dict['extractor'], info_dict['id']))
- try:
- uf = self.urlopen(info_dict['thumbnail'])
- with open(thumb_filename, 'wb') as thumbf:
- shutil.copyfileobj(uf, thumbf)
- self.to_screen('[%s] %s: Writing thumbnail to: %s' %
- (info_dict['extractor'], info_dict['id'], thumb_filename))
- except (compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error) as err:
- self.report_warning('Unable to download thumbnail "%s": %s' %
- (info_dict['thumbnail'], compat_str(err)))
+ self._write_thumbnails(info_dict, filename)
if not self.params.get('skip_download', False):
try:
@@ -1186,6 +1252,7 @@ class YoutubeDL(object):
if self.params.get('verbose'):
self.to_stdout('[debug] Invoking downloader on %r' % info.get('url'))
return fd.download(name, info)
+
if info_dict.get('requested_formats') is not None:
downloaded = []
success = True
@@ -1458,8 +1525,26 @@ class YoutubeDL(object):
header_line = line({
'format_id': 'format code', 'ext': 'extension',
'resolution': 'resolution', 'format_note': 'note'}, idlen=idlen)
- self.to_screen('[info] Available formats for %s:\n%s\n%s' %
- (info_dict['id'], header_line, '\n'.join(formats_s)))
+ self.to_screen(
+ '[info] Available formats for %s:\n%s\n%s' %
+ (info_dict['id'], header_line, '\n'.join(formats_s)))
+
+ def list_thumbnails(self, info_dict):
+ thumbnails = info_dict.get('thumbnails')
+ if not thumbnails:
+ tn_url = info_dict.get('thumbnail')
+ if tn_url:
+ thumbnails = [{'id': '0', 'url': tn_url}]
+ else:
+ self.to_screen(
+ '[info] No thumbnails present for %s' % info_dict['id'])
+ return
+
+ self.to_screen(
+ '[info] Thumbnails for %s:' % info_dict['id'])
+ self.to_screen(render_table(
+ ['ID', 'width', 'height', 'URL'],
+ [[t['id'], t.get('width', 'unknown'), t.get('height', 'unknown'), t['url']] for t in thumbnails]))
def urlopen(self, req):
""" Start an HTTP download """
@@ -1605,3 +1690,39 @@ class YoutubeDL(object):
if encoding is None:
encoding = preferredencoding()
return encoding
+
+ def _write_thumbnails(self, info_dict, filename):
+ if self.params.get('writethumbnail', False):
+ thumbnails = info_dict.get('thumbnails')
+ if thumbnails:
+ thumbnails = [thumbnails[-1]]
+ elif self.params.get('write_all_thumbnails', False):
+ thumbnails = info_dict.get('thumbnails')
+ else:
+ return
+
+ if not thumbnails:
+ # No thumbnails present, so return immediately
+ return
+
+ for t in thumbnails:
+ thumb_ext = determine_ext(t['url'], 'jpg')
+ suffix = '_%s' % t['id'] if len(thumbnails) > 1 else ''
+ thumb_display_id = '%s ' % t['id'] if len(thumbnails) > 1 else ''
+ thumb_filename = os.path.splitext(filename)[0] + suffix + '.' + thumb_ext
+
+ if self.params.get('nooverwrites', False) and os.path.exists(encodeFilename(thumb_filename)):
+ self.to_screen('[%s] %s: Thumbnail %sis already present' %
+ (info_dict['extractor'], info_dict['id'], thumb_display_id))
+ else:
+ self.to_screen('[%s] %s: Downloading thumbnail %s...' %
+ (info_dict['extractor'], info_dict['id'], thumb_display_id))
+ try:
+ uf = self.urlopen(t['url'])
+ with open(thumb_filename, 'wb') as thumbf:
+ shutil.copyfileobj(uf, thumbf)
+ self.to_screen('[%s] %s: Writing thumbnail %sto: %s' %
+ (info_dict['extractor'], info_dict['id'], thumb_display_id, thumb_filename))
+ except (compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error) as err:
+ self.report_warning('Unable to download thumbnail "%s": %s' %
+ (t['url'], compat_str(err)))