diff options
70 files changed, 5627 insertions, 787 deletions
diff --git a/.gitignore b/.gitignore index 824c0d24df..3ef77d0622 100644 --- a/.gitignore +++ b/.gitignore @@ -12,6 +12,7 @@ trace-dtrace.dtrace *-linux-user *-bsd-user libdis* +libhw libhw32 libhw64 libuser @@ -214,7 +214,7 @@ $(qga-obj-y) qemu-ga.o: $(QGALIB_GEN) qemu-ga$(EXESUF): qemu-ga.o $(qga-obj-y) $(tools-obj-y) $(qapi-obj-y) $(qobject-obj-y) $(version-obj-y) -QEMULIBS=libhw32 libhw64 libuser libdis libdis-user +QEMULIBS=libhw libuser libdis libdis-user clean: # avoid old build problems by removing potentially incorrect old files diff --git a/Makefile.hw b/Makefile.hw index 59f5b48350..86f0bf40f4 100644 --- a/Makefile.hw +++ b/Makefile.hw @@ -2,7 +2,6 @@ include ../config-host.mak include ../config-all-devices.mak -include config.mak include $(SRC_PATH)/rules.mak .PHONY: all diff --git a/Makefile.objs b/Makefile.objs index 4412757309..b1f3e22547 100644 --- a/Makefile.objs +++ b/Makefile.objs @@ -42,7 +42,8 @@ coroutine-obj-$(CONFIG_WIN32) += coroutine-win32.o # block-obj-y is code used by both qemu system emulation and qemu-img block-obj-y = cutils.o iov.o cache-utils.o qemu-option.o module.o async.o -block-obj-y += nbd.o block.o aio.o aes.o qemu-config.o qemu-progress.o qemu-sockets.o +block-obj-y += nbd.o block.o blockjob.o aio.o aes.o qemu-config.o +block-obj-y += qemu-progress.o qemu-sockets.o uri.o block-obj-y += $(coroutine-obj-y) $(qobject-obj-y) $(version-obj-y) block-obj-$(CONFIG_POSIX) += posix-aio-compat.o block-obj-$(CONFIG_LINUX_AIO) += linux-aio.o @@ -59,7 +60,7 @@ endif # suppress *all* target specific code in case of system emulation, i.e. a # single QEMU executable should support all CPUs and machines. -common-obj-y = $(block-obj-y) blockdev.o +common-obj-y = $(block-obj-y) blockdev.o block/ common-obj-y += net.o net/ common-obj-y += qom/ common-obj-y += readline.o console.o cursor.o diff --git a/Makefile.target b/Makefile.target index d9d54b8dd0..4449444a0e 100644 --- a/Makefile.target +++ b/Makefile.target @@ -4,9 +4,6 @@ include ../config-host.mak include config-devices.mak include config-target.mak include $(SRC_PATH)/rules.mak -ifneq ($(HWDIR),) -include $(HWDIR)/config.mak -endif $(call set-vpath, $(SRC_PATH)) ifdef CONFIG_LINUX diff --git a/QMP/qemu-ga-client b/QMP/qemu-ga-client new file mode 100755 index 0000000000..46676c3750 --- /dev/null +++ b/QMP/qemu-ga-client @@ -0,0 +1,299 @@ +#!/usr/bin/python + +# QEMU Guest Agent Client +# +# Copyright (C) 2012 Ryota Ozaki <ozaki.ryota@gmail.com> +# +# This work is licensed under the terms of the GNU GPL, version 2. See +# the COPYING file in the top-level directory. +# +# Usage: +# +# Start QEMU with: +# +# # qemu [...] -chardev socket,path=/tmp/qga.sock,server,nowait,id=qga0 \ +# -device virtio-serial -device virtserialport,chardev=qga0,name=org.qemu.guest_agent.0 +# +# Run the script: +# +# $ qemu-ga-client --address=/tmp/qga.sock <command> [args...] +# +# or +# +# $ export QGA_CLIENT_ADDRESS=/tmp/qga.sock +# $ qemu-ga-client <command> [args...] +# +# For example: +# +# $ qemu-ga-client cat /etc/resolv.conf +# # Generated by NetworkManager +# nameserver 10.0.2.3 +# $ qemu-ga-client fsfreeze status +# thawed +# $ qemu-ga-client fsfreeze freeze +# 2 filesystems frozen +# +# See also: http://wiki.qemu.org/Features/QAPI/GuestAgent +# + +import base64 +import random + +import qmp + + +class QemuGuestAgent(qmp.QEMUMonitorProtocol): + def __getattr__(self, name): + def wrapper(**kwds): + return self.command('guest-' + name.replace('_', '-'), **kwds) + return wrapper + + +class QemuGuestAgentClient: + error = QemuGuestAgent.error + + def __init__(self, address): + self.qga = QemuGuestAgent(address) + self.qga.connect(negotiate=False) + + def sync(self, timeout=3): + # Avoid being blocked forever + if not self.ping(timeout): + raise EnvironmentError('Agent seems not alive') + uid = random.randint(0, (1 << 32) - 1) + while True: + ret = self.qga.sync(id=uid) + if isinstance(ret, int) and int(ret) == uid: + break + + def __file_read_all(self, handle): + eof = False + data = '' + while not eof: + ret = self.qga.file_read(handle=handle, count=1024) + _data = base64.b64decode(ret['buf-b64']) + data += _data + eof = ret['eof'] + return data + + def read(self, path): + handle = self.qga.file_open(path=path) + try: + data = self.__file_read_all(handle) + finally: + self.qga.file_close(handle=handle) + return data + + def info(self): + info = self.qga.info() + + msgs = [] + msgs.append('version: ' + info['version']) + msgs.append('supported_commands:') + enabled = [c['name'] for c in info['supported_commands'] if c['enabled']] + msgs.append('\tenabled: ' + ', '.join(enabled)) + disabled = [c['name'] for c in info['supported_commands'] if not c['enabled']] + msgs.append('\tdisabled: ' + ', '.join(disabled)) + + return '\n'.join(msgs) + + def __gen_ipv4_netmask(self, prefixlen): + mask = int('1' * prefixlen + '0' * (32 - prefixlen), 2) + return '.'.join([str(mask >> 24), + str((mask >> 16) & 0xff), + str((mask >> 8) & 0xff), + str(mask & 0xff)]) + + def ifconfig(self): + nifs = self.qga.network_get_interfaces() + + msgs = [] + for nif in nifs: + msgs.append(nif['name'] + ':') + if 'ip-addresses' in nif: + for ipaddr in nif['ip-addresses']: + if ipaddr['ip-address-type'] == 'ipv4': + addr = ipaddr['ip-address'] + mask = self.__gen_ipv4_netmask(int(ipaddr['prefix'])) + msgs.append("\tinet %s netmask %s" % (addr, mask)) + elif ipaddr['ip-address-type'] == 'ipv6': + addr = ipaddr['ip-address'] + prefix = ipaddr['prefix'] + msgs.append("\tinet6 %s prefixlen %s" % (addr, prefix)) + if nif['hardware-address'] != '00:00:00:00:00:00': + msgs.append("\tether " + nif['hardware-address']) + + return '\n'.join(msgs) + + def ping(self, timeout): + self.qga.settimeout(timeout) + try: + self.qga.ping() + except self.qga.timeout: + return False + return True + + def fsfreeze(self, cmd): + if cmd not in ['status', 'freeze', 'thaw']: + raise StandardError('Invalid command: ' + cmd) + + return getattr(self.qga, 'fsfreeze' + '_' + cmd)() + + def fstrim(self, minimum=0): + return getattr(self.qga, 'fstrim')(minimum=minimum) + + def suspend(self, mode): + if mode not in ['disk', 'ram', 'hybrid']: + raise StandardError('Invalid mode: ' + mode) + + try: + getattr(self.qga, 'suspend' + '_' + mode)() + # On error exception will raise + except self.qga.timeout: + # On success command will timed out + return + + def shutdown(self, mode='powerdown'): + if mode not in ['powerdown', 'halt', 'reboot']: + raise StandardError('Invalid mode: ' + mode) + + try: + self.qga.shutdown(mode=mode) + except self.qga.timeout: + return + + +def _cmd_cat(client, args): + if len(args) != 1: + print('Invalid argument') + print('Usage: cat <file>') + sys.exit(1) + print(client.read(args[0])) + + +def _cmd_fsfreeze(client, args): + usage = 'Usage: fsfreeze status|freeze|thaw' + if len(args) != 1: + print('Invalid argument') + print(usage) + sys.exit(1) + if args[0] not in ['status', 'freeze', 'thaw']: + print('Invalid command: ' + args[0]) + print(usage) + sys.exit(1) + cmd = args[0] + ret = client.fsfreeze(cmd) + if cmd == 'status': + print(ret) + elif cmd == 'freeze': + print("%d filesystems frozen" % ret) + else: + print("%d filesystems thawed" % ret) + + +def _cmd_fstrim(client, args): + if len(args) == 0: + minimum = 0 + else: + minimum = int(args[0]) + print(client.fstrim(minimum)) + + +def _cmd_ifconfig(client, args): + print(client.ifconfig()) + + +def _cmd_info(client, args): + print(client.info()) + + +def _cmd_ping(client, args): + if len(args) == 0: + timeout = 3 + else: + timeout = float(args[0]) + alive = client.ping(timeout) + if not alive: + print("Not responded in %s sec" % args[0]) + sys.exit(1) + + +def _cmd_suspend(client, args): + usage = 'Usage: suspend disk|ram|hybrid' + if len(args) != 1: + print('Less argument') + print(usage) + sys.exit(1) + if args[0] not in ['disk', 'ram', 'hybrid']: + print('Invalid command: ' + args[0]) + print(usage) + sys.exit(1) + client.suspend(args[0]) + + +def _cmd_shutdown(client, args): + client.shutdown() +_cmd_powerdown = _cmd_shutdown + + +def _cmd_halt(client, args): + client.shutdown('halt') + + +def _cmd_reboot(client, args): + client.shutdown('reboot') + + +commands = [m.replace('_cmd_', '') for m in dir() if '_cmd_' in m] + + +def main(address, cmd, args): + if not os.path.exists(address): + print('%s not found' % address) + sys.exit(1) + + if cmd not in commands: + print('Invalid command: ' + cmd) + print('Available commands: ' + ', '.join(commands)) + sys.exit(1) + + try: + client = QemuGuestAgentClient(address) + except QemuGuestAgent.error, e: + import errno + + print(e) + if e.errno == errno.ECONNREFUSED: + print('Hint: qemu is not running?') + sys.exit(1) + + if cmd != 'ping': + client.sync() + + globals()['_cmd_' + cmd](client, args) + + +if __name__ == '__main__': + import sys + import os + import optparse + + address = os.environ['QGA_CLIENT_ADDRESS'] if 'QGA_CLIENT_ADDRESS' in os.environ else None + + usage = "%prog [--address=<unix_path>|<ipv4_address>] <command> [args...]\n" + usage += '<command>: ' + ', '.join(commands) + parser = optparse.OptionParser(usage=usage) + parser.add_option('--address', action='store', type='string', + default=address, help='Specify a ip:port pair or a unix socket path') + options, args = parser.parse_args() + + address = options.address + if address is None: + parser.error('address is not specified') + sys.exit(1) + + if len(args) == 0: + parser.error('Less argument') + sys.exit(1) + + main(address, args[0], args[1:]) diff --git a/QMP/qmp-events.txt b/QMP/qmp-events.txt index 287805825f..987c5756b3 100644 --- a/QMP/qmp-events.txt +++ b/QMP/qmp-events.txt @@ -50,7 +50,8 @@ Emitted when a block job has been cancelled. Data: -- "type": Job type ("stream" for image streaming, json-string) +- "type": Job type (json-string; "stream" for image streaming + "commit" for block commit) - "device": Device name (json-string) - "len": Maximum progress value (json-int) - "offset": Current progress value (json-int) @@ -73,7 +74,8 @@ Emitted when a block job has completed. Data: -- "type": Job type ("stream" for image streaming, json-string) +- "type": Job type (json-string; "stream" for image streaming + "commit" for block commit) - "device": Device name (json-string) - "len": Maximum progress value (json-int) - "offset": Current progress value (json-int) @@ -94,6 +96,28 @@ Example: "speed": 0 }, "timestamp": { "seconds": 1267061043, "microseconds": 959568 } } +BLOCK_JOB_ERROR +--------------- + +Emitted when a block job encounters an error. + +Data: + +- "device": device name (json-string) +- "operation": I/O operation (json-string, "read" or "write") +- "action": action that has been taken, it's one of the following (json-string): + "ignore": error has been ignored, the job may fail later + "report": error will be reported and the job canceled + "stop": error caused job to be paused + +Example: + +{ "event": "BLOCK_JOB_ERROR", + "data": { "device": "ide0-hd1", + "operation": "write", + "action": "stop" }, + "timestamp": { "seconds": 1265044230, "microseconds": 450486 } } + DEVICE_TRAY_MOVED ----------------- diff --git a/QMP/qmp.py b/QMP/qmp.py index 36ecc1dfae..33c7d36d9b 100644 --- a/QMP/qmp.py +++ b/QMP/qmp.py @@ -49,7 +49,6 @@ class QEMUMonitorProtocol: return socket.socket(family, socket.SOCK_STREAM) def __negotiate_capabilities(self): - self.__sockfile = self.__sock.makefile() greeting = self.__json_read() if greeting is None or not greeting.has_key('QMP'): raise QMPConnectError @@ -73,7 +72,7 @@ class QEMUMonitorProtocol: error = socket.error - def connect(self): + def connect(self, negotiate=True): """ Connect to the QMP Monitor and perform capabilities negotiation. @@ -83,7 +82,9 @@ class QEMUMonitorProtocol: @raise QMPCapabilitiesError if fails to negotiate capabilities """ self.__sock.connect(self.__address) - return self.__negotiate_capabilities() + self.__sockfile = self.__sock.makefile() + if negotiate: + return self.__negotiate_capabilities() def accept(self): """ @@ -161,3 +162,8 @@ class QEMUMonitorProtocol: def close(self): self.__sock.close() self.__sockfile.close() + + timeout = socket.timeout + + def settimeout(self, timeout): + self.__sock.settimeout(timeout) @@ -119,7 +119,7 @@ bool qemu_aio_wait(void) return true; } - walking_handlers = 1; + walking_handlers++; FD_ZERO(&rdfds); FD_ZERO(&wrfds); @@ -147,7 +147,7 @@ bool qemu_aio_wait(void) } } - walking_handlers = 0; + walking_handlers--; /* No AIO operations? Get us out of here */ if (!busy) { @@ -159,14 +159,14 @@ bool qemu_aio_wait(void) /* if we have any readable fds, dispatch event */ if (ret > 0) { - walking_handlers = 1; - /* we have to walk very carefully in case * qemu_aio_set_fd_handler is called while we're walking */ node = QLIST_FIRST(&aio_handlers); while (node) { AioHandler *tmp; + walking_handlers++; + if (!node->deleted && FD_ISSET(node->fd, &rdfds) && node->io_read) { @@ -181,13 +181,13 @@ bool qemu_aio_wait(void) tmp = node; node = QLIST_NEXT(node, node); - if (tmp->deleted) { + walking_handlers--; + + if (!walking_handlers && tmp->deleted) { QLIST_REMOVE(tmp, node); g_free(tmp); } } - - walking_handlers = 0; } return true; diff --git a/block-migration.c b/block-migration.c index 7def8ab197..ed933017f9 100644 --- a/block-migration.c +++ b/block-migration.c @@ -519,6 +519,8 @@ static void blk_mig_cleanup(void) BlkMigDevState *bmds; BlkMigBlock *blk; + bdrv_drain_all(); + set_dirty_tracking(0); while ((bmds = QSIMPLEQ_FIRST(&block_mig_state.bmds_list)) != NULL) { @@ -26,8 +26,10 @@ #include "trace.h" #include "monitor.h" #include "block_int.h" +#include "blockjob.h" #include "module.h" #include "qjson.h" +#include "sysemu.h" #include "qemu-coroutine.h" #include "qmp-commands.h" #include "qemu-timer.h" @@ -1386,7 +1388,8 @@ void bdrv_set_dev_ops(BlockDriverState *bs, const BlockDevOps *ops, } void bdrv_emit_qmp_error_event(const BlockDriverState *bdrv, - BlockQMPEventAction action, int is_read) + enum MonitorEvent ev, + BlockErrorAction action, bool is_read) { QObject *data; const char *action_str; @@ -1409,7 +1412,7 @@ void bdrv_emit_qmp_error_event(const BlockDriverState *bdrv, bdrv->device_name, action_str, is_read ? "read" : "write"); - monitor_protocol_event(QEVENT_BLOCK_IO_ERROR, data); + monitor_protocol_event(ev, data); qobject_decref(data); } @@ -1724,6 +1727,149 @@ int bdrv_change_backing_file(BlockDriverState *bs, return ret; } +/* + * Finds the image layer in the chain that has 'bs' as its backing file. + * + * active is the current topmost image. + * + * Returns NULL if bs is not found in active's image chain, + * or if active == bs. + */ +BlockDriverState *bdrv_find_overlay(BlockDriverState *active, + BlockDriverState *bs) +{ + BlockDriverState *overlay = NULL; + BlockDriverState *intermediate; + + assert(active != NULL); + assert(bs != NULL); + + /* if bs is the same as active, then by definition it has no overlay + */ + if (active == bs) { + return NULL; + } + + intermediate = active; + while (intermediate->backing_hd) { + if (intermediate->backing_hd == bs) { + overlay = intermediate; + break; + } + intermediate = intermediate->backing_hd; + } + + return overlay; +} + +typedef struct BlkIntermediateStates { + BlockDriverState *bs; + QSIMPLEQ_ENTRY(BlkIntermediateStates) entry; +} BlkIntermediateStates; + + +/* + * Drops images above 'base' up to and including 'top', and sets the image + * above 'top' to have base as its backing file. + * + * Requires that the overlay to 'top' is opened r/w, so that the backing file + * information in 'bs' can be properly updated. + * + * E.g., this will convert the following chain: + * bottom <- base <- intermediate <- top <- active + * + * to + * + * bottom <- base <- active + * + * It is allowed for bottom==base, in which case it converts: + * + * base <- intermediate <- top <- active + * + * to + * + * base <- active + * + * Error conditions: + * if active == top, that is considered an error + * + */ +int bdrv_drop_intermediate(BlockDriverState *active, BlockDriverState *top, + BlockDriverState *base) +{ + BlockDriverState *intermediate; + BlockDriverState *base_bs = NULL; + BlockDriverState *new_top_bs = NULL; + BlkIntermediateStates *intermediate_state, *next; + int ret = -EIO; + + QSIMPLEQ_HEAD(states_to_delete, BlkIntermediateStates) states_to_delete; + QSIMPLEQ_INIT(&states_to_delete); + + if (!top->drv || !base->drv) { + goto exit; + } + + new_top_bs = bdrv_find_overlay(active, top); + + if (new_top_bs == NULL) { + /* we could not find the image above 'top', this is an error */ + goto exit; + } + + /* special case of new_top_bs->backing_hd already pointing to base - nothing + * to do, no intermediate images */ + if (new_top_bs->backing_hd == base) { + ret = 0; + goto exit; + } + + intermediate = top; + + /* now we will go down through the list, and add each BDS we find + * into our deletion queue, until we hit the 'base' + */ + while (intermediate) { + intermediate_state = g_malloc0(sizeof(BlkIntermediateStates)); + intermediate_state->bs = intermediate; + QSIMPLEQ_INSERT_TAIL(&states_to_delete, intermediate_state, entry); + + if (intermediate->backing_hd == base) { + base_bs = intermediate->backing_hd; + break; + } + intermediate = intermediate->backing_hd; + } + if (base_bs == NULL) { + /* something went wrong, we did not end at the base. safely + * unravel everything, and exit with error */ + goto exit; + } + + /* success - we can delete the intermediate states, and link top->base */ + ret = bdrv_change_backing_file(new_top_bs, base_bs->filename, + base_bs->drv ? base_bs->drv->format_name : ""); + if (ret) { + goto exit; + } + new_top_bs->backing_hd = base_bs; + + + QSIMPLEQ_FOREACH_SAFE(intermediate_state, &states_to_delete, entry, next) { + /* so that bdrv_close() does not recursively close the chain */ + intermediate_state->bs->backing_hd = NULL; + bdrv_delete(intermediate_state->bs); + } + ret = 0; + +exit: + QSIMPLEQ_FOREACH_SAFE(intermediate_state, &states_to_delete, entry, next) { + g_free(intermediate_state); + } + return ret; +} + + static int bdrv_check_byte_request(BlockDriverState *bs, int64_t offset, size_t size) { @@ -2330,18 +2476,51 @@ void bdrv_set_io_limits(BlockDriverState *bs, bs->io_limits_enabled = bdrv_io_limits_enabled(bs); } -void bdrv_set_on_error(BlockDriverState *bs, BlockErrorAction on_read_error, - BlockErrorAction on_write_error) +void bdrv_set_on_error(BlockDriverState *bs, BlockdevOnError on_read_error, + BlockdevOnError on_write_error) { bs->on_read_error = on_read_error; bs->on_write_error = on_write_error; } -BlockErrorAction bdrv_get_on_error(BlockDriverState *bs, int is_read) +BlockdevOnError bdrv_get_on_error(BlockDriverState *bs, bool is_read) { return is_read ? bs->on_read_error : bs->on_write_error; } +BlockErrorAction bdrv_get_error_action(BlockDriverState *bs, bool is_read, int error) +{ + BlockdevOnError on_err = is_read ? bs->on_read_error : bs->on_write_error; + + switch (on_err) { + case BLOCKDEV_ON_ERROR_ENOSPC: + return (error == ENOSPC) ? BDRV_ACTION_STOP : BDRV_ACTION_REPORT; + case BLOCKDEV_ON_ERROR_STOP: + return BDRV_ACTION_STOP; + case BLOCKDEV_ON_ERROR_REPORT: + return BDRV_ACTION_REPORT; + case BLOCKDEV_ON_ERROR_IGNORE: + return BDRV_ACTION_IGNORE; + default: + abort(); + } +} + +/* This is done by device models because, while the block layer knows + * about the error, it does not know whether an operation comes from + * the device or the block layer (from a job, for example). + */ +void bdrv_error_action(BlockDriverState *bs, BlockErrorAction action, + bool is_read, int error) +{ + assert(error >= 0); + bdrv_emit_qmp_error_event(bs, QEVENT_BLOCK_IO_ERROR, action, is_read); + if (action == BDRV_ACTION_STOP) { + vm_stop(RUN_STATE_IO_ERROR); + bdrv_iostatus_set_err(bs, error); + } +} + int bdrv_is_read_only(BlockDriverState *bs) { return bs->read_only; @@ -2974,6 +3153,22 @@ int bdrv_get_backing_file_depth(BlockDriverState *bs) return 1 + bdrv_get_backing_file_depth(bs->backing_hd); } +BlockDriverState *bdrv_find_base(BlockDriverState *bs) +{ + BlockDriverState *curr_bs = NULL; + + if (!bs) { + return NULL; + } + + curr_bs = bs; + + while (curr_bs->backing_hd) { + curr_bs = curr_bs->backing_hd; + } + return curr_bs; +} + #define NB_SUFFIXES 4 char *get_human_readable_size(char *buf, int buf_size, int64_t size) @@ -4049,9 +4244,9 @@ void bdrv_iostatus_enable(BlockDriverState *bs) bool bdrv_iostatus_is_enabled(const BlockDriverState *bs) { return (bs->iostatus_enabled && - (bs->on_write_error == BLOCK_ERR_STOP_ENOSPC || - bs->on_write_error == BLOCK_ERR_STOP_ANY || - bs->on_read_error == BLOCK_ERR_STOP_ANY)); + (bs->on_write_error == BLOCKDEV_ON_ERROR_ENOSPC || + bs->on_write_error == BLOCKDEV_ON_ERROR_STOP || + bs->on_read_error == BLOCKDEV_ON_ERROR_STOP)); } void bdrv_iostatus_disable(BlockDriverState *bs) @@ -4066,14 +4261,10 @@ void bdrv_iostatus_reset(BlockDriverState *bs) } } -/* XXX: Today this is set by device models because it makes the implementation - quite simple. However, the block layer knows about the error, so it's - possible to implement this without device models being involved */ void bdrv_iostatus_set_err(BlockDriverState *bs, int error) { - if (bdrv_iostatus_is_enabled(bs) && - bs->iostatus == BLOCK_DEVICE_IO_STATUS_OK) { - assert(error >= 0); + assert(bdrv_iostatus_is_enabled(bs)); + if (bs->iostatus == BLOCK_DEVICE_IO_STATUS_OK) { bs->iostatus = error == ENOSPC ? BLOCK_DEVICE_IO_STATUS_NOSPACE : BLOCK_DEVICE_IO_STATUS_FAILED; } @@ -4247,130 +4438,3 @@ out: return ret; } - -void *block_job_create(const BlockJobType *job_type, BlockDriverState *bs, - int64_t speed, BlockDriverCompletionFunc *cb, - void *opaque, Error **errp) -{ - BlockJob *job; - - if (bs->job || bdrv_in_use(bs)) { - error_set(errp, QERR_DEVICE_IN_USE, bdrv_get_device_name(bs)); - return NULL; - } - bdrv_set_in_use(bs, 1); - - job = g_malloc0(job_type->instance_size); - job->job_type = job_type; - job->bs = bs; - job->cb = cb; - job->opaque = opaque; - job->busy = true; - bs->job = job; - - /* Only set speed when necessary to avoid NotSupported error */ - if (speed != 0) { - Error *local_err = NULL; - - block_job_set_speed(job, speed, &local_err); - if (error_is_set(&local_err)) { - bs->job = NULL; - g_free(job); - bdrv_set_in_use(bs, 0); - error_propagate(errp, local_err); - return NULL; - } - } - return job; -} - -void block_job_complete(BlockJob *job, int ret) -{ - BlockDriverState *bs = job->bs; - - assert(bs->job == job); - job->cb(job->opaque, ret); - bs->job = NULL; - g_free(job); - bdrv_set_in_use(bs, 0); -} - -void block_job_set_speed(BlockJob *job, int64_t speed, Error **errp) -{ - Error *local_err = NULL; - - if (!job->job_type->set_speed) { - error_set(errp, QERR_NOT_SUPPORTED); - return; - } - job->job_type->set_speed(job, speed, &local_err); - if (error_is_set(&local_err)) { - error_propagate(errp, local_err); - return; - } - - job->speed = speed; -} - -void block_job_cancel(BlockJob *job) -{ - job->cancelled = true; - if (job->co && !job->busy) { - qemu_coroutine_enter(job->co, NULL); - } -} - -bool block_job_is_cancelled(BlockJob *job) -{ - return job->cancelled; -} - -struct BlockCancelData { - BlockJob *job; - BlockDriverCompletionFunc *cb; - void *opaque; - bool cancelled; - int ret; -}; - -static void block_job_cancel_cb(void *opaque, int ret) -{ - struct BlockCancelData *data = opaque; - - data->cancelled = block_job_is_cancelled(data->job); - data->ret = ret; - data->cb(data->opaque, ret); -} - -int block_job_cancel_sync(BlockJob *job) -{ - struct BlockCancelData data; - BlockDriverState *bs = job->bs; - - assert(bs->job == job); - - /* Set up our own callback to store the result and chain to - * the original callback. - */ - data.job = job; - data.cb = job->cb; - data.opaque = job->opaque; - data.ret = -EINPROGRESS; - job->cb = block_job_cancel_cb; - job->opaque = &data; - block_job_cancel(job); - while (data.ret == -EINPROGRESS) { - qemu_aio_wait(); - } - return (data.cancelled && data.ret == 0) ? -ECANCELED : data.ret; -} - -void block_job_sleep_ns(BlockJob *job, QEMUClock *clock, int64_t ns) -{ - /* Check cancellation *before* setting busy = false, too! */ - if (!block_job_is_cancelled(job)) { - job->busy = false; - co_sleep_ns(clock, ns); - job->busy = true; - } -} @@ -6,9 +6,11 @@ #include "qemu-option.h" #include "qemu-coroutine.h" #include "qobject.h" +#include "qapi-types.h" /* block.c */ typedef struct BlockDriver BlockDriver; +typedef struct BlockJob BlockJob; typedef struct BlockDriverInfo { /* in bytes, 0 if irrelevant */ @@ -89,13 +91,8 @@ typedef struct BlockDevOps { #define BDRV_SECTOR_MASK ~(BDRV_SECTOR_SIZE - 1) typedef enum { - BLOCK_ERR_REPORT, BLOCK_ERR_IGNORE, BLOCK_ERR_STOP_ENOSPC, - BLOCK_ERR_STOP_ANY -} BlockErrorAction; - -typedef enum { BDRV_ACTION_REPORT, BDRV_ACTION_IGNORE, BDRV_ACTION_STOP -} BlockQMPEventAction; +} BlockErrorAction; typedef QSIMPLEQ_HEAD(BlockReopenQueue, BlockReopenQueueEntry) BlockReopenQueue; @@ -111,8 +108,6 @@ void bdrv_iostatus_reset(BlockDriverState *bs); void bdrv_iostatus_disable(BlockDriverState *bs); bool bdrv_iostatus_is_enabled(const BlockDriverState *bs); void bdrv_iostatus_set_err(BlockDriverState *bs, int error); -void bdrv_emit_qmp_error_event(const BlockDriverState *bdrv, - BlockQMPEventAction action, int is_read); void bdrv_info_print(Monitor *mon, const QObject *data); void bdrv_info(Monitor *mon, QObject **ret_data); void bdrv_stats_print(Monitor *mon, const QObject *data); @@ -203,6 +198,11 @@ int bdrv_commit_all(void); int bdrv_change_backing_file(BlockDriverState *bs, const char *backing_file, const char *backing_fmt); void bdrv_register(BlockDriver *bdrv); +int bdrv_drop_intermediate(BlockDriverState *active, BlockDriverState *top, + BlockDriverState *base); +BlockDriverState *bdrv_find_overlay(BlockDriverState *active, + BlockDriverState *bs); +BlockDriverState *bdrv_find_base(BlockDriverState *bs); typedef struct BdrvCheckResult { @@ -277,9 +277,12 @@ int bdrv_has_zero_init(BlockDriverState *bs); int bdrv_is_allocated(BlockDriverState *bs, int64_t sector_num, int nb_sectors, int *pnum); -void bdrv_set_on_error(BlockDriverState *bs, BlockErrorAction on_read_error, - BlockErrorAction on_write_error); -BlockErrorAction bdrv_get_on_error(BlockDriverState *bs, int is_read); +void bdrv_set_on_error(BlockDriverState *bs, BlockdevOnError on_read_error, + BlockdevOnError on_write_error); +BlockdevOnError bdrv_get_on_error(BlockDriverState *bs, bool is_read); +BlockErrorAction bdrv_get_error_action(BlockDriverState *bs, bool is_read, int error); +void bdrv_error_action(BlockDriverState *bs, BlockErrorAction action, + bool is_read, int error); int bdrv_is_read_only(BlockDriverState *bs); int bdrv_is_sg(BlockDriverState *bs); int bdrv_enable_write_cache(BlockDriverState *bs); diff --git a/block/Makefile.objs b/block/Makefile.objs index b5754d39bf..554f429d05 100644 --- a/block/Makefile.objs +++ b/block/Makefile.objs @@ -3,9 +3,12 @@ block-obj-y += qcow2.o qcow2-refcount.o qcow2-cluster.o qcow2-snapshot.o qcow2-c block-obj-y += qed.o qed-gencb.o qed-l2-cache.o qed-table.o qed-cluster.o block-obj-y += qed-check.o block-obj-y += parallels.o nbd.o blkdebug.o sheepdog.o blkverify.o -block-obj-y += stream.o block-obj-$(CONFIG_WIN32) += raw-win32.o block-obj-$(CONFIG_POSIX) += raw-posix.o block-obj-$(CONFIG_LIBISCSI) += iscsi.o block-obj-$(CONFIG_CURL) += curl.o block-obj-$(CONFIG_RBD) += rbd.o +block-obj-$(CONFIG_GLUSTERFS) += gluster.o + +common-obj-y += stream.o +common-obj-y += commit.o diff --git a/block/blkdebug.c b/block/blkdebug.c index 59dcea0650..1206d5256b 100644 --- a/block/blkdebug.c +++ b/block/blkdebug.c @@ -28,6 +28,7 @@ typedef struct BDRVBlkdebugState { int state; + int new_state; QLIST_HEAD(, BlkdebugRule) rules[BLKDBG_EVENT_MAX]; QSIMPLEQ_HEAD(, BlkdebugRule) active_rules; } BDRVBlkdebugState; @@ -403,12 +404,12 @@ static void blkdebug_close(BlockDriverState *bs) } static bool process_rule(BlockDriverState *bs, struct BlkdebugRule *rule, - int old_state, bool injected) + bool injected) { BDRVBlkdebugState *s = bs->opaque; /* Only process rules for the current state */ - if (rule->state && rule->state != old_state) { + if (rule->state && rule->state != s->state) { return injected; } @@ -423,7 +424,7 @@ static bool process_rule(BlockDriverState *bs, struct BlkdebugRule *rule, break; case ACTION_SET_STATE: - s->state = rule->options.set_state.new_state; + s->new_state = rule->options.set_state.new_state; break; } return injected; @@ -433,15 +434,16 @@ static void blkdebug_debug_event(BlockDriverState *bs, BlkDebugEvent event) { BDRVBlkdebugState *s = bs->opaque; struct BlkdebugRule *rule; - int old_state = s->state; bool injected; assert((int)event >= 0 && event < BLKDBG_EVENT_MAX); injected = false; + s->new_state = s->state; QLIST_FOREACH(rule, &s->rules[event], next) { - injected = process_rule(bs, rule, old_state, injected); + injected = process_rule(bs, rule, injected); } + s->state = s->new_state; } static int64_t blkdebug_getlength(BlockDriverState *bs) diff --git a/block/commit.c b/block/commit.c new file mode 100644 index 0000000000..733c91403c --- /dev/null +++ b/block/commit.c @@ -0,0 +1,268 @@ +/* + * Live block commit + * + * Copyright Red Hat, Inc. 2012 + * + * Authors: + * Jeff Cody <jcody@redhat.com> + * Based on stream.c by Stefan Hajnoczi + * + * This work is licensed under the terms of the GNU LGPL, version 2 or later. + * See the COPYING.LIB file in the top-level directory. + * + */ + +#include "trace.h" +#include "block_int.h" +#include "blockjob.h" +#include "qemu/ratelimit.h" + +enum { + /* + * Size of data buffer for populating the image file. This should be large + * enough to process multiple clusters in a single call, so that populating + * contiguous regions of the image is efficient. + */ + COMMIT_BUFFER_SIZE = 512 * 1024, /* in bytes */ +}; + +#define SLICE_TIME 100000000ULL /* ns */ + +typedef struct CommitBlockJob { + BlockJob common; + RateLimit limit; + BlockDriverState *active; + BlockDriverState *top; + BlockDriverState *base; + BlockdevOnError on_error; + int base_flags; + int orig_overlay_flags; +} CommitBlockJob; + +static int coroutine_fn commit_populate(BlockDriverState *bs, + BlockDriverState *base, + int64_t sector_num, int nb_sectors, + void *buf) +{ + int ret = 0; + + ret = bdrv_read(bs, sector_num, buf, nb_sectors); + if (ret) { + return ret; + } + + ret = bdrv_write(base, sector_num, buf, nb_sectors); + if (ret) { + return ret; + } + + return 0; +} + +static void coroutine_fn commit_run(void *opaque) +{ + CommitBlockJob *s = opaque; + BlockDriverState *active = s->active; + BlockDriverState *top = s->top; + BlockDriverState *base = s->base; + BlockDriverState *overlay_bs = NULL; + int64_t sector_num, end; + int ret = 0; + int n = 0; + void *buf; + int bytes_written = 0; + int64_t base_len; + + ret = s->common.len = bdrv_getlength(top); + + + if (s->common.len < 0) { + goto exit_restore_reopen; + } + + ret = base_len = bdrv_getlength(base); + if (base_len < 0) { + goto exit_restore_reopen; + } + + if (base_len < s->common.len) { + ret = bdrv_truncate(base, s->common.len); + if (ret) { + goto exit_restore_reopen; + } + } + + overlay_bs = bdrv_find_overlay(active, top); + + end = s->common.len >> BDRV_SECTOR_BITS; + buf = qemu_blockalign(top, COMMIT_BUFFER_SIZE); + + for (sector_num = 0; sector_num < end; sector_num += n) { + uint64_t delay_ns = 0; + bool copy; + +wait: + /* Note that even when no rate limit is applied we need to yield + * with no pending I/O here so that qemu_aio_flush() returns. + */ + block_job_sleep_ns(&s->common, rt_clock, delay_ns); + if (block_job_is_cancelled(&s->common)) { + break; + } + /* Copy if allocated above the base */ + ret = bdrv_co_is_allocated_above(top, base, sector_num, + COMMIT_BUFFER_SIZE / BDRV_SECTOR_SIZE, + &n); + copy = (ret == 1); + trace_commit_one_iteration(s, sector_num, n, ret); + if (copy) { + if (s->common.speed) { + delay_ns = ratelimit_calculate_delay(&s->limit, n); + if (delay_ns > 0) { + goto wait; + } + } + ret = commit_populate(top, base, sector_num, n, buf); + bytes_written += n * BDRV_SECTOR_SIZE; + } + if (ret < 0) { + if (s->on_error == BLOCKDEV_ON_ERROR_STOP || + s->on_error == BLOCKDEV_ON_ERROR_REPORT|| + (s->on_error == BLOCKDEV_ON_ERROR_ENOSPC && ret == -ENOSPC)) { + goto exit_free_buf; + } else { + n = 0; + continue; + } + } + /* Publish progress */ + s->common.offset += n * BDRV_SECTOR_SIZE; + } + + ret = 0; + + if (!block_job_is_cancelled(&s->common) && sector_num == end) { + /* success */ + ret = bdrv_drop_intermediate(active, top, base); + } + +exit_free_buf: + qemu_vfree(buf); + +exit_restore_reopen: + /* restore base open flags here if appropriate (e.g., change the base back + * to r/o). These reopens do not need to be atomic, since we won't abort + * even on failure here */ + if (s->base_flags != bdrv_get_flags(base)) { + bdrv_reopen(base, s->base_flags, NULL); + } + if (s->orig_overlay_flags != bdrv_get_flags(overlay_bs)) { + bdrv_reopen(overlay_bs, s->orig_overlay_flags, NULL); + } + + block_job_complete(&s->common, ret); +} + +static void commit_set_speed(BlockJob *job, int64_t speed, Error **errp) +{ + CommitBlockJob *s = container_of(job, CommitBlockJob, common); + + if (speed < 0) { + error_set(errp, QERR_INVALID_PARAMETER, "speed"); + return; + } + ratelimit_set_speed(&s->limit, speed / BDRV_SECTOR_SIZE, SLICE_TIME); +} + +static BlockJobType commit_job_type = { + .instance_size = sizeof(CommitBlockJob), + .job_type = "commit", + .set_speed = commit_set_speed, +}; + +void commit_start(BlockDriverState *bs, BlockDriverState *base, + BlockDriverState *top, int64_t speed, + BlockdevOnError on_error, BlockDriverCompletionFunc *cb, + void *opaque, Error **errp) +{ + CommitBlockJob *s; + BlockReopenQueue *reopen_queue = NULL; + int orig_overlay_flags; + int orig_base_flags; + BlockDriverState *overlay_bs; + Error *local_err = NULL; + + if ((on_error == BLOCKDEV_ON_ERROR_STOP || + on_error == BLOCKDEV_ON_ERROR_ENOSPC) && + !bdrv_iostatus_is_enabled(bs)) { + error_set(errp, QERR_INVALID_PARAMETER_COMBINATION); + return; + } + + /* Once we support top == active layer, remove this check */ + if (top == bs) { + error_setg(errp, + "Top image as the active layer is currently unsupported"); + return; + } + + if (top == base) { + error_setg(errp, "Invalid files for merge: top and base are the same"); + return; + } + + /* top and base may be valid, but let's make sure that base is reachable + * from top */ + if (bdrv_find_backing_image(top, base->filename) != base) { + error_setg(errp, + "Base (%s) is not reachable from top (%s)", + base->filename, top->filename); + return; + } + + overlay_bs = bdrv_find_overlay(bs, top); + + if (overlay_bs == NULL) { + error_setg(errp, "Could not find overlay image for %s:", top->filename); + return; + } + + orig_base_flags = bdrv_get_flags(base); + orig_overlay_flags = bdrv_get_flags(overlay_bs); + + /* convert base & overlay_bs to r/w, if necessary */ + if (!(orig_base_flags & BDRV_O_RDWR)) { + reopen_queue = bdrv_reopen_queue(reopen_queue, base, + orig_base_flags | BDRV_O_RDWR); + } + if (!(orig_overlay_flags & BDRV_O_RDWR)) { + reopen_queue = bdrv_reopen_queue(reopen_queue, overlay_bs, + orig_overlay_flags | BDRV_O_RDWR); + } + if (reopen_queue) { + bdrv_reopen_multiple(reopen_queue, &local_err); + if (local_err != NULL) { + error_propagate(errp, local_err); + return; + } + } + + + s = block_job_create(&commit_job_type, bs, speed, cb, opaque, errp); + if (!s) { + return; + } + + s->base = base; + s->top = top; + s->active = bs; + + s->base_flags = orig_base_flags; + s->orig_overlay_flags = orig_overlay_flags; + + s->on_error = on_error; + s->common.co = qemu_coroutine_create(commit_run); + + trace_commit_start(bs, base, top, s, s->common.co, opaque); + qemu_coroutine_enter(s->common.co, s); +} diff --git a/block/gluster.c b/block/gluster.c new file mode 100644 index 0000000000..3588d7377f --- /dev/null +++ b/block/gluster.c @@ -0,0 +1,624 @@ +/* + * GlusterFS backend for QEMU + * + * Copyright (C) 2012 Bharata B Rao <bharata@linux.vnet.ibm.com> + * + * Pipe handling mechanism in AIO implementation is derived from + * block/rbd.c. Hence, + * + * Copyright (C) 2010-2011 Christian Brunner <chb@muc.de>, + * Josh Durgin <josh.durgin@dreamhost.com> + * + * This work is licensed under the terms of the GNU GPL, version 2. See + * the COPYING file in the top-level directory. + * + * Contributions after 2012-01-13 are licensed under the terms of the + * GNU GPL, version 2 or (at your option) any later version. + */ +#include <glusterfs/api/glfs.h> +#include "block_int.h" +#include "qemu_socket.h" +#include "uri.h" + +typedef struct GlusterAIOCB { + BlockDriverAIOCB common; + int64_t size; + int ret; + bool *finished; + QEMUBH *bh; +} GlusterAIOCB; + +typedef struct BDRVGlusterState { + struct glfs *glfs; + int fds[2]; + struct glfs_fd *fd; + int qemu_aio_count; + int event_reader_pos; + GlusterAIOCB *event_acb; +} BDRVGlusterState; + +#define GLUSTER_FD_READ 0 +#define GLUSTER_FD_WRITE 1 + +typedef struct GlusterConf { + char *server; + int port; + char *volname; + char *image; + char *transport; +} GlusterConf; + +static void qemu_gluster_gconf_free(GlusterConf *gconf) +{ + g_free(gconf->server); + g_free(gconf->volname); + g_free(gconf->image); + g_free(gconf->transport); + g_free(gconf); +} + +static int parse_volume_options(GlusterConf *gconf, char *path) +{ + char *p, *q; + + if (!path) { + return -EINVAL; + } + + /* volume */ + p = q = path + strspn(path, "/"); + p += strcspn(p, "/"); + if (*p == '\0') { + return -EINVAL; + } + gconf->volname = g_strndup(q, p - q); + + /* image */ + p += strspn(p, "/"); + if (*p == '\0') { + return -EINVAL; + } + gconf->image = g_strdup(p); + return 0; +} + +/* + * file=gluster[+transport]://[server[:port]]/volname/image[?socket=...] + * + * 'gluster' is the protocol. + * + * 'transport' specifies the transport type used to connect to gluster + * management daemon (glusterd). Valid transport types are + * tcp, unix and rdma. If a transport type isn't specified, then tcp + * type is assumed. + * + * 'server' specifies the server where the volume file specification for + * the given volume resides. This can be either hostname, ipv4 address + * or ipv6 address. ipv6 address needs to be within square brackets [ ]. + * If transport type is 'unix', then 'server' field should not be specifed. + * The 'socket' field needs to be populated with the path to unix domain + * socket. + * + * 'port' is the port number on which glusterd is listening. This is optional + * and if not specified, QEMU will send 0 which will make gluster to use the + * default port. If the transport type is unix, then 'port' should not be + * specified. + * + * 'volname' is the name of the gluster volume which contains the VM image. + * + * 'image' is the path to the actual VM image that resides on gluster volume. + * + * Examples: + * + * file=gluster://1.2.3.4/testvol/a.img + * file=gluster+tcp://1.2.3.4/testvol/a.img + * file=gluster+tcp://1.2.3.4:24007/testvol/dir/a.img + * file=gluster+tcp://[1:2:3:4:5:6:7:8]/testvol/dir/a.img + * file=gluster+tcp://[1:2:3:4:5:6:7:8]:24007/testvol/dir/a.img + * file=gluster+tcp://server.domain.com:24007/testvol/dir/a.img + * file=gluster+unix:///testvol/dir/a.img?socket=/tmp/glusterd.socket + * file=gluster+rdma://1.2.3.4:24007/testvol/a.img + */ +static int qemu_gluster_parseuri(GlusterConf *gconf, const char *filename) +{ + URI *uri; + QueryParams *qp = NULL; + bool is_unix = false; + int ret = 0; + + uri = uri_parse(filename); + if (!uri) { + return -EINVAL; + } + + /* transport */ + if (!strcmp(uri->scheme, "gluster")) { + gconf->transport = g_strdup("tcp"); + } else if (!strcmp(uri->scheme, "gluster+tcp")) { + gconf->transport = g_strdup("tcp"); + } else if (!strcmp(uri->scheme, "gluster+unix")) { + gconf->transport = g_strdup("unix"); + is_unix = true; + } else if (!strcmp(uri->scheme, "gluster+rdma")) { + gconf->transport = g_strdup("rdma"); + } else { + ret = -EINVAL; + goto out; + } + + ret = parse_volume_options(gconf, uri->path); + if (ret < 0) { + goto out; + } + + qp = query_params_parse(uri->query); + if (qp->n > 1 || (is_unix && !qp->n) || (!is_unix && qp->n)) { + ret = -EINVAL; + goto out; + } + + if (is_unix) { + if (uri->server || uri->port) { + ret = -EINVAL; + goto out; + } + if (strcmp(qp->p[0].name, "socket")) { + ret = -EINVAL; + goto out; + } + gconf->server = g_strdup(qp->p[0].value); + } else { + gconf->server = g_strdup(uri->server); + gconf->port = uri->port; + } + +out: + if (qp) { + query_params_free(qp); + } + uri_free(uri); + return ret; +} + +static struct glfs *qemu_gluster_init(GlusterConf *gconf, const char *filename) +{ + struct glfs *glfs = NULL; + int ret; + int old_errno; + + ret = qemu_gluster_parseuri(gconf, filename); + if (ret < 0) { + error_report("Usage: file=gluster[+transport]://[server[:port]]/" + "volname/image[?socket=...]"); + errno = -ret; + goto out; + } + + glfs = glfs_new(gconf->volname); + if (!glfs) { + goto out; + } + + ret = glfs_set_volfile_server(glfs, gconf->transport, gconf->server, + gconf->port); + if (ret < 0) { + goto out; + } + + /* + * TODO: Use GF_LOG_ERROR instead of hard code value of 4 here when + * GlusterFS makes GF_LOG_* macros available to libgfapi users. + */ + ret = glfs_set_logging(glfs, "-", 4); + if (ret < 0) { + goto out; + } + + ret = glfs_init(glfs); + if (ret) { + error_report("Gluster connection failed for server=%s port=%d " + "volume=%s image=%s transport=%s\n", gconf->server, gconf->port, + gconf->volname, gconf->image, gconf->transport); + goto out; + } + return glfs; + +out: + if (glfs) { + old_errno = errno; + glfs_fini(glfs); + errno = old_errno; + } + return NULL; +} + +static void qemu_gluster_complete_aio(GlusterAIOCB *acb, BDRVGlusterState *s) +{ + int ret; + bool *finished = acb->finished; + BlockDriverCompletionFunc *cb = acb->common.cb; + void *opaque = acb->common.opaque; + + if (!acb->ret || acb->ret == acb->size) { + ret = 0; /* Success */ + } else if (acb->ret < 0) { + ret = acb->ret; /* Read/Write failed */ + } else { + ret = -EIO; /* Partial read/write - fail it */ + } + + s->qemu_aio_count--; + qemu_aio_release(acb); + cb(opaque, ret); + if (finished) { + *finished = true; + } +} + +static void qemu_gluster_aio_event_reader(void *opaque) +{ + BDRVGlusterState *s = opaque; + ssize_t ret; + + do { + char *p = (char *)&s->event_acb; + + ret = read(s->fds[GLUSTER_FD_READ], p + s->event_reader_pos, + sizeof(s->event_acb) - s->event_reader_pos); + if (ret > 0) { + s->event_reader_pos += ret; + if (s->event_reader_pos == sizeof(s->event_acb)) { + s->event_reader_pos = 0; + qemu_gluster_complete_aio(s->event_acb, s); + } + } + } while (ret < 0 && errno == EINTR); +} + +static int qemu_gluster_aio_flush_cb(void *opaque) +{ + BDRVGlusterState *s = opaque; + + return (s->qemu_aio_count > 0); +} + +static int qemu_gluster_open(BlockDriverState *bs, const char *filename, + int bdrv_flags) +{ + BDRVGlusterState *s = bs->opaque; + int open_flags = O_BINARY; + int ret = 0; + GlusterConf *gconf = g_malloc0(sizeof(GlusterConf)); + + s->glfs = qemu_gluster_init(gconf, filename); + if (!s->glfs) { + ret = -errno; + goto out; + } + + if (bdrv_flags & BDRV_O_RDWR) { + open_flags |= O_RDWR; + } else { + open_flags |= O_RDONLY; + } + + if ((bdrv_flags & BDRV_O_NOCACHE)) { + open_flags |= O_DIRECT; + } + + s->fd = glfs_open(s->glfs, gconf->image, open_flags); + if (!s->fd) { + ret = -errno; + goto out; + } + + ret = qemu_pipe(s->fds); + if (ret < 0) { + ret = -errno; + goto out; + } + fcntl(s->fds[GLUSTER_FD_READ], F_SETFL, O_NONBLOCK); + qemu_aio_set_fd_handler(s->fds[GLUSTER_FD_READ], + qemu_gluster_aio_event_reader, NULL, qemu_gluster_aio_flush_cb, s); + +out: + qemu_gluster_gconf_free(gconf); + if (!ret) { + return ret; + } + if (s->fd) { + glfs_close(s->fd); + } + if (s->glfs) { + glfs_fini(s->glfs); + } + return ret; +} + +static int qemu_gluster_create(const char *filename, + QEMUOptionParameter *options) +{ + struct glfs *glfs; + struct glfs_fd *fd; + int ret = 0; + int64_t total_size = 0; + GlusterConf *gconf = g_malloc0(sizeof(GlusterConf)); + + glfs = qemu_gluster_init(gconf, filename); + if (!glfs) { + ret = -errno; + goto out; + } + + while (options && options->name) { + if (!strcmp(options->name, BLOCK_OPT_SIZE)) { + total_size = options->value.n / BDRV_SECTOR_SIZE; + } + options++; + } + + fd = glfs_creat(glfs, gconf->image, + O_WRONLY | O_CREAT | O_TRUNC | O_BINARY, S_IRUSR | S_IWUSR); + if (!fd) { + ret = -errno; + } else { + if (glfs_ftruncate(fd, total_size * BDRV_SECTOR_SIZE) != 0) { + ret = -errno; + } + if (glfs_close(fd) != 0) { + ret = -errno; + } + } +out: + qemu_gluster_gconf_free(gconf); + if (glfs) { + glfs_fini(glfs); + } + return ret; +} + +static void qemu_gluster_aio_cancel(BlockDriverAIOCB *blockacb) +{ + GlusterAIOCB *acb = (GlusterAIOCB *)blockacb; + bool finished = false; + + acb->finished = &finished; + while (!finished) { + qemu_aio_wait(); + } +} + +static AIOPool gluster_aio_pool = { + .aiocb_size = sizeof(GlusterAIOCB), + .cancel = qemu_gluster_aio_cancel, +}; + +static void gluster_finish_aiocb(struct glfs_fd *fd, ssize_t ret, void *arg) +{ + GlusterAIOCB *acb = (GlusterAIOCB *)arg; + BlockDriverState *bs = acb->common.bs; + BDRVGlusterState *s = bs->opaque; + int retval; + + acb->ret = ret; + retval = qemu_write_full(s->fds[GLUSTER_FD_WRITE], &acb, sizeof(acb)); + if (retval != sizeof(acb)) { + /* + * Gluster AIO callback thread failed to notify the waiting + * QEMU thread about IO completion. + * + * Complete this IO request and make the disk inaccessible for + * subsequent reads and writes. + */ + error_report("Gluster failed to notify QEMU about IO completion"); + + qemu_mutex_lock_iothread(); /* We are in gluster thread context */ + acb->common.cb(acb->common.opaque, -EIO); + qemu_aio_release(acb); + s->qemu_aio_count--; + close(s->fds[GLUSTER_FD_READ]); + close(s->fds[GLUSTER_FD_WRITE]); + qemu_aio_set_fd_handler(s->fds[GLUSTER_FD_READ], NULL, NULL, NULL, + NULL); + bs->drv = NULL; /* Make the disk inaccessible */ + qemu_mutex_unlock_iothread(); + } +} + +static BlockDriverAIOCB *qemu_gluster_aio_rw(BlockDriverState *bs, + int64_t sector_num, QEMUIOVector *qiov, int nb_sectors, + BlockDriverCompletionFunc *cb, void *opaque, int write) +{ + int ret; + GlusterAIOCB *acb; + BDRVGlusterState *s = bs->opaque; + size_t size; + off_t offset; + + offset = sector_num * BDRV_SECTOR_SIZE; + size = nb_sectors * BDRV_SECTOR_SIZE; + s->qemu_aio_count++; + + acb = qemu_aio_get(&gluster_aio_pool, bs, cb, opaque); + acb->size = size; + acb->ret = 0; + acb->finished = NULL; + + if (write) { + ret = glfs_pwritev_async(s->fd, qiov->iov, qiov->niov, offset, 0, + &gluster_finish_aiocb, acb); + } else { + ret = glfs_preadv_async(s->fd, qiov->iov, qiov->niov, offset, 0, + &gluster_finish_aiocb, acb); + } + + if (ret < 0) { + goto out; + } + return &acb->common; + +out: + s->qemu_aio_count--; + qemu_aio_release(acb); + return NULL; +} + +static BlockDriverAIOCB *qemu_gluster_aio_readv(BlockDriverState *bs, + int64_t sector_num, QEMUIOVector *qiov, int nb_sectors, + BlockDriverCompletionFunc *cb, void *opaque) +{ + return qemu_gluster_aio_rw(bs, sector_num, qiov, nb_sectors, cb, opaque, 0); +} + +static BlockDriverAIOCB *qemu_gluster_aio_writev(BlockDriverState *bs, + int64_t sector_num, QEMUIOVector *qiov, int nb_sectors, + BlockDriverCompletionFunc *cb, void *opaque) +{ + return qemu_gluster_aio_rw(bs, sector_num, qiov, nb_sectors, cb, opaque, 1); +} + +static BlockDriverAIOCB *qemu_gluster_aio_flush(BlockDriverState *bs, + BlockDriverCompletionFunc *cb, void *opaque) +{ + int ret; + GlusterAIOCB *acb; + BDRVGlusterState *s = bs->opaque; + + acb = qemu_aio_get(&gluster_aio_pool, bs, cb, opaque); + acb->size = 0; + acb->ret = 0; + acb->finished = NULL; + s->qemu_aio_count++; + + ret = glfs_fsync_async(s->fd, &gluster_finish_aiocb, acb); + if (ret < 0) { + goto out; + } + return &acb->common; + +out: + s->qemu_aio_count--; + qemu_aio_release(acb); + return NULL; +} + +static int64_t qemu_gluster_getlength(BlockDriverState *bs) +{ + BDRVGlusterState *s = bs->opaque; + int64_t ret; + + ret = glfs_lseek(s->fd, 0, SEEK_END); + if (ret < 0) { + return -errno; + } else { + return ret; + } +} + +static int64_t qemu_gluster_allocated_file_size(BlockDriverState *bs) +{ + BDRVGlusterState *s = bs->opaque; + struct stat st; + int ret; + + ret = glfs_fstat(s->fd, &st); + if (ret < 0) { + return -errno; + } else { + return st.st_blocks * 512; + } +} + +static void qemu_gluster_close(BlockDriverState *bs) +{ + BDRVGlusterState *s = bs->opaque; + + close(s->fds[GLUSTER_FD_READ]); + close(s->fds[GLUSTER_FD_WRITE]); + qemu_aio_set_fd_handler(s->fds[GLUSTER_FD_READ], NULL, NULL, NULL, NULL); + + if (s->fd) { + glfs_close(s->fd); + s->fd = NULL; + } + glfs_fini(s->glfs); +} + +static QEMUOptionParameter qemu_gluster_create_options[] = { + { + .name = BLOCK_OPT_SIZE, + .type = OPT_SIZE, + .help = "Virtual disk size" + }, + { NULL } +}; + +static BlockDriver bdrv_gluster = { + .format_name = "gluster", + .protocol_name = "gluster", + .instance_size = sizeof(BDRVGlusterState), + .bdrv_file_open = qemu_gluster_open, + .bdrv_close = qemu_gluster_close, + .bdrv_create = qemu_gluster_create, + .bdrv_getlength = qemu_gluster_getlength, + .bdrv_get_allocated_file_size = qemu_gluster_allocated_file_size, + .bdrv_aio_readv = qemu_gluster_aio_readv, + .bdrv_aio_writev = qemu_gluster_aio_writev, + .bdrv_aio_flush = qemu_gluster_aio_flush, + .create_options = qemu_gluster_create_options, +}; + +static BlockDriver bdrv_gluster_tcp = { + .format_name = "gluster", + .protocol_name = "gluster+tcp", + .instance_size = sizeof(BDRVGlusterState), + .bdrv_file_open = qemu_gluster_open, + .bdrv_close = qemu_gluster_close, + .bdrv_create = qemu_gluster_create, + .bdrv_getlength = qemu_gluster_getlength, + .bdrv_get_allocated_file_size = qemu_gluster_allocated_file_size, + .bdrv_aio_readv = qemu_gluster_aio_readv, + .bdrv_aio_writev = qemu_gluster_aio_writev, + .bdrv_aio_flush = qemu_gluster_aio_flush, + .create_options = qemu_gluster_create_options, +}; + +static BlockDriver bdrv_gluster_unix = { + .format_name = "gluster", + .protocol_name = "gluster+unix", + .instance_size = sizeof(BDRVGlusterState), + .bdrv_file_open = qemu_gluster_open, + .bdrv_close = qemu_gluster_close, + .bdrv_create = qemu_gluster_create, + .bdrv_getlength = qemu_gluster_getlength, + .bdrv_get_allocated_file_size = qemu_gluster_allocated_file_size, + .bdrv_aio_readv = qemu_gluster_aio_readv, + .bdrv_aio_writev = qemu_gluster_aio_writev, + .bdrv_aio_flush = qemu_gluster_aio_flush, + .create_options = qemu_gluster_create_options, +}; + +static BlockDriver bdrv_gluster_rdma = { + .format_name = "gluster", + .protocol_name = "gluster+rdma", + .instance_size = sizeof(BDRVGlusterState), + .bdrv_file_open = qemu_gluster_open, + .bdrv_close = qemu_gluster_close, + .bdrv_create = qemu_gluster_create, + .bdrv_getlength = qemu_gluster_getlength, + .bdrv_get_allocated_file_size = qemu_gluster_allocated_file_size, + .bdrv_aio_readv = qemu_gluster_aio_readv, + .bdrv_aio_writev = qemu_gluster_aio_writev, + .bdrv_aio_flush = qemu_gluster_aio_flush, + .create_options = qemu_gluster_create_options, +}; + +static void bdrv_gluster_init(void) +{ + bdrv_register(&bdrv_gluster_rdma); + bdrv_register(&bdrv_gluster_unix); + bdrv_register(&bdrv_gluster_tcp); + bdrv_register(&bdrv_gluster); +} + +block_init(bdrv_gluster_init); diff --git a/block/stream.c b/block/stream.c index c4f87dd5b6..792665276e 100644 --- a/block/stream.c +++ b/block/stream.c @@ -13,6 +13,7 @@ #include "trace.h" #include "block_int.h" +#include "blockjob.h" #include "qemu/ratelimit.h" enum { @@ -30,6 +31,7 @@ typedef struct StreamBlockJob { BlockJob common; RateLimit limit; BlockDriverState *base; + BlockdevOnError on_error; char backing_file_id[1024]; } StreamBlockJob; @@ -77,6 +79,7 @@ static void coroutine_fn stream_run(void *opaque) BlockDriverState *bs = s->common.bs; BlockDriverState *base = s->base; int64_t sector_num, end; + int error = 0; int ret = 0; int n = 0; void *buf; @@ -141,7 +144,19 @@ wait: ret = stream_populate(bs, sector_num, n, buf); } if (ret < 0) { - break; + BlockErrorAction action = + block_job_error_action(&s->common, s->common.bs, s->on_error, + true, -ret); + if (action == BDRV_ACTION_STOP) { + n = 0; + continue; + } + if (error == 0) { + error = ret; + } + if (action == BDRV_ACTION_REPORT) { + break; + } } ret = 0; @@ -153,6 +168,9 @@ wait: bdrv_disable_copy_on_read(bs); } + /* Do not remove the backing file if an error was there but ignored. */ + ret = error; + if (!block_job_is_cancelled(&s->common) && sector_num == end && ret == 0) { const char *base_id = NULL, *base_fmt = NULL; if (base) { @@ -188,11 +206,19 @@ static BlockJobType stream_job_type = { void stream_start(BlockDriverState *bs, BlockDriverState *base, const char *base_id, int64_t speed, + BlockdevOnError on_error, BlockDriverCompletionFunc *cb, void *opaque, Error **errp) { StreamBlockJob *s; + if ((on_error == BLOCKDEV_ON_ERROR_STOP || + on_error == BLOCKDEV_ON_ERROR_ENOSPC) && + !bdrv_iostatus_is_enabled(bs)) { + error_set(errp, QERR_INVALID_PARAMETER, "on-error"); + return; + } + s = block_job_create(&stream_job_type, bs, speed, cb, opaque, errp); if (!s) { return; @@ -203,6 +229,7 @@ void stream_start(BlockDriverState *bs, BlockDriverState *base, pstrcpy(s->backing_file_id, sizeof(s->backing_file_id), base_id); } + s->on_error = on_error; s->common.co = qemu_coroutine_create(stream_run); trace_stream_start(bs, base, s, s->common.co, opaque); qemu_coroutine_enter(s->common.co, s); diff --git a/block_int.h b/block_int.h index ac4245cb18..f4bae04401 100644 --- a/block_int.h +++ b/block_int.h @@ -31,6 +31,7 @@ #include "qemu-timer.h" #include "qapi-types.h" #include "qerror.h" +#include "monitor.h" #define BLOCK_FLAG_ENCRYPT 1 #define BLOCK_FLAG_COMPAT6 4 @@ -67,73 +68,6 @@ typedef struct BlockIOBaseValue { uint64_t ios[2]; } BlockIOBaseValue; -typedef struct BlockJob BlockJob; - -/** - * BlockJobType: - * - * A class type for block job objects. - */ -typedef struct BlockJobType { - /** Derived BlockJob struct size */ - size_t instance_size; - - /** String describing the operation, part of query-block-jobs QMP API */ - const char *job_type; - - /** Optional callback for job types that support setting a speed limit */ - void (*set_speed)(BlockJob *job, int64_t speed, Error **errp); -} BlockJobType; - -/** - * BlockJob: - * - * Long-running operation on a BlockDriverState. - */ -struct BlockJob { - /** The job type, including the job vtable. */ - const BlockJobType *job_type; - - /** The block device on which the job is operating. */ - BlockDriverState *bs; - - /** - * The coroutine that executes the job. If not NULL, it is - * reentered when busy is false and the job is cancelled. - */ - Coroutine *co; - - /** - * Set to true if the job should cancel itself. The flag must - * always be tested just before toggling the busy flag from false - * to true. After a job has been cancelled, it should only yield - * if #qemu_aio_wait will ("sooner or later") reenter the coroutine. - */ - bool cancelled; - - /** - * Set to false by the job while it is in a quiescent state, where - * no I/O is pending and the job has yielded on any condition - * that is not detected by #qemu_aio_wait, such as a timer. - */ - bool busy; - - /** Offset that is published by the query-block-jobs QMP API */ - int64_t offset; - - /** Length that is published by the query-block-jobs QMP API */ - int64_t len; - - /** Speed that was set with @block_job_set_speed. */ - int64_t speed; - - /** The completion function that will be called when the job completes. */ - BlockDriverCompletionFunc *cb; - - /** The opaque value that is passed to the completion function. */ - void *opaque; -}; - struct BlockDriver { const char *format_name; int instance_size; @@ -329,7 +263,7 @@ struct BlockDriverState { /* NOTE: the following infos are only hints for real hardware drivers. They are not used by the block driver */ - BlockErrorAction on_read_error, on_write_error; + BlockdevOnError on_read_error, on_write_error; bool iostatus_enabled; BlockDeviceIoStatus iostatus; char device_name[32]; @@ -353,92 +287,9 @@ void bdrv_set_io_limits(BlockDriverState *bs, #ifdef _WIN32 int is_windows_drive(const char *filename); #endif - -/** - * block_job_create: - * @job_type: The class object for the newly-created job. - * @bs: The block - * @speed: The maximum speed, in bytes per second, or 0 for unlimited. - * @cb: Completion function for the job. - * @opaque: Opaque pointer value passed to @cb. - * @errp: Error object. - * - * Create a new long-running block device job and return it. The job - * will call @cb asynchronously when the job completes. Note that - * @bs may have been closed at the time the @cb it is called. If - * this is the case, the job may be reported as either cancelled or - * completed. - * - * This function is not part of the public job interface; it should be - * called from a wrapper that is specific to the job type. - */ -void *block_job_create(const BlockJobType *job_type, BlockDriverState *bs, - int64_t speed, BlockDriverCompletionFunc *cb, - void *opaque, Error **errp); - -/** - * block_job_sleep_ns: - * @job: The job that calls the function. - * @clock: The clock to sleep on. - * @ns: How many nanoseconds to stop for. - * - * Put the job to sleep (assuming that it wasn't canceled) for @ns - * nanoseconds. Canceling the job will interrupt the wait immediately. - */ -void block_job_sleep_ns(BlockJob *job, QEMUClock *clock, int64_t ns); - -/** - * block_job_complete: - * @job: The job being completed. - * @ret: The status code. - * - * Call the completion function that was registered at creation time, and - * free @job. - */ -void block_job_complete(BlockJob *job, int ret); - -/** - * block_job_set_speed: - * @job: The job to set the speed for. - * @speed: The new value - * @errp: Error object. - * - * Set a rate-limiting parameter for the job; the actual meaning may - * vary depending on the job type. - */ -void block_job_set_speed(BlockJob *job, int64_t speed, Error **errp); - -/** - * block_job_cancel: - * @job: The job to be canceled. - * - * Asynchronously cancel the specified job. - */ -void block_job_cancel(BlockJob *job); - -/** - * block_job_is_cancelled: - * @job: The job being queried. - * - * Returns whether the job is scheduled for cancellation. - */ -bool block_job_is_cancelled(BlockJob *job); - -/** - * block_job_cancel: - * @job: The job to be canceled. - * - * Asynchronously cancel the job and wait for it to reach a quiescent - * state. Note that the completion callback will still be called - * asynchronously, hence it is *not* valid to call #bdrv_delete - * immediately after #block_job_cancel_sync. Users of block jobs - * will usually protect the BlockDriverState objects with a reference - * count, should this be a concern. - * - * Returns the return value from the job if the job actually completed - * during the call, or -ECANCELED if it was canceled. - */ -int block_job_cancel_sync(BlockJob *job); +void bdrv_emit_qmp_error_event(const BlockDriverState *bdrv, + enum MonitorEvent ev, + BlockErrorAction action, bool is_read); /** * stream_start: @@ -448,6 +299,7 @@ int block_job_cancel_sync(BlockJob *job); * @base_id: The file name that will be written to @bs as the new * backing file if the job completes. Ignored if @base is %NULL. * @speed: The maximum speed, in bytes per second, or 0 for unlimited. + * @on_error: The action to take upon error. * @cb: Completion function for the job. * @opaque: Opaque pointer value passed to @cb. * @errp: Error object. @@ -459,8 +311,24 @@ int block_job_cancel_sync(BlockJob *job); * @base_id in the written image and to @base in the live BlockDriverState. */ void stream_start(BlockDriverState *bs, BlockDriverState *base, - const char *base_id, int64_t speed, + const char *base_id, int64_t speed, BlockdevOnError on_error, BlockDriverCompletionFunc *cb, void *opaque, Error **errp); +/** + * commit_start: + * @bs: Top Block device + * @base: Block device that will be written into, and become the new top + * @speed: The maximum speed, in bytes per second, or 0 for unlimited. + * @on_error: The action to take upon error. + * @cb: Completion function for the job. + * @opaque: Opaque pointer value passed to @cb. + * @errp: Error object. + * + */ +void commit_start(BlockDriverState *bs, BlockDriverState *base, + BlockDriverState *top, int64_t speed, + BlockdevOnError on_error, BlockDriverCompletionFunc *cb, + void *opaque, Error **errp); + #endif /* BLOCK_INT_H */ diff --git a/blockdev.c b/blockdev.c index e5d450f0bb..5f18dfa97b 100644 --- a/blockdev.c +++ b/blockdev.c @@ -9,6 +9,7 @@ #include "blockdev.h" #include "hw/block-common.h" +#include "blockjob.h" #include "monitor.h" #include "qerror.h" #include "qemu-option.h" @@ -237,16 +238,16 @@ static void drive_put_ref_bh_schedule(DriveInfo *dinfo) qemu_bh_schedule(s->bh); } -static int parse_block_error_action(const char *buf, int is_read) +static int parse_block_error_action(const char *buf, bool is_read) { if (!strcmp(buf, "ignore")) { - return BLOCK_ERR_IGNORE; + return BLOCKDEV_ON_ERROR_IGNORE; } else if (!is_read && !strcmp(buf, "enospc")) { - return BLOCK_ERR_STOP_ENOSPC; + return BLOCKDEV_ON_ERROR_ENOSPC; } else if (!strcmp(buf, "stop")) { - return BLOCK_ERR_STOP_ANY; + return BLOCKDEV_ON_ERROR_STOP; } else if (!strcmp(buf, "report")) { - return BLOCK_ERR_REPORT; + return BLOCKDEV_ON_ERROR_REPORT; } else { error_report("'%s' invalid %s error action", buf, is_read ? "read" : "write"); @@ -432,7 +433,7 @@ DriveInfo *drive_init(QemuOpts *opts, int default_to_scsi) return NULL; } - on_write_error = BLOCK_ERR_STOP_ENOSPC; + on_write_error = BLOCKDEV_ON_ERROR_ENOSPC; if ((buf = qemu_opt_get(opts, "werror")) != NULL) { if (type != IF_IDE && type != IF_SCSI && type != IF_VIRTIO && type != IF_NONE) { error_report("werror is not supported by this bus type"); @@ -445,7 +446,7 @@ DriveInfo *drive_init(QemuOpts *opts, int default_to_scsi) } } - on_read_error = BLOCK_ERR_REPORT; + on_read_error = BLOCKDEV_ON_ERROR_REPORT; if ((buf = qemu_opt_get(opts, "rerror")) != NULL) { if (type != IF_IDE && type != IF_VIRTIO && type != IF_SCSI && type != IF_NONE) { error_report("rerror is not supported by this bus type"); @@ -805,6 +806,11 @@ void qmp_transaction(BlockdevActionList *dev_list, Error **errp) QSIMPLEQ_FOREACH(states, &snap_bdrv_states, entry) { /* This removes our old bs from the bdrv_states, and adds the new bs */ bdrv_append(states->new_bs, states->old_bs); + /* We don't need (or want) to use the transactional + * bdrv_reopen_multiple() across all the entries at once, because we + * don't want to abort all of them if one of them fails the reopen */ + bdrv_reopen(states->new_bs, states->new_bs->open_flags & ~BDRV_O_RDWR, + NULL); } /* success */ @@ -1065,12 +1071,12 @@ static QObject *qobject_from_block_job(BlockJob *job) job->speed); } -static void block_stream_cb(void *opaque, int ret) +static void block_job_cb(void *opaque, int ret) { BlockDriverState *bs = opaque; QObject *obj; - trace_block_stream_cb(bs, bs->job, ret); + trace_block_job_cb(bs, bs->job, ret); assert(bs->job); obj = qobject_from_block_job(bs->job); @@ -1090,13 +1096,18 @@ static void block_stream_cb(void *opaque, int ret) } void qmp_block_stream(const char *device, bool has_base, - const char *base, bool has_speed, - int64_t speed, Error **errp) + const char *base, bool has_speed, int64_t speed, + bool has_on_error, BlockdevOnError on_error, + Error **errp) { BlockDriverState *bs; BlockDriverState *base_bs = NULL; Error *local_err = NULL; + if (!has_on_error) { + on_error = BLOCKDEV_ON_ERROR_REPORT; + } + bs = bdrv_find(device); if (!bs) { error_set(errp, QERR_DEVICE_NOT_FOUND, device); @@ -1112,7 +1123,7 @@ void qmp_block_stream(const char *device, bool has_base, } stream_start(bs, base_bs, base, has_speed ? speed : 0, - block_stream_cb, bs, &local_err); + on_error, block_job_cb, bs, &local_err); if (error_is_set(&local_err)) { error_propagate(errp, local_err); return; @@ -1126,6 +1137,64 @@ void qmp_block_stream(const char *device, bool has_base, trace_qmp_block_stream(bs, bs->job); } +void qmp_block_commit(const char *device, + bool has_base, const char *base, const char *top, + bool has_speed, int64_t speed, + Error **errp) +{ + BlockDriverState *bs; + BlockDriverState *base_bs, *top_bs; + Error *local_err = NULL; + /* This will be part of the QMP command, if/when the + * BlockdevOnError change for blkmirror makes it in + */ + BlockdevOnError on_error = BLOCKDEV_ON_ERROR_REPORT; + + /* drain all i/o before commits */ + bdrv_drain_all(); + + bs = bdrv_find(device); + if (!bs) { + error_set(errp, QERR_DEVICE_NOT_FOUND, device); + return; + } + if (base && has_base) { + base_bs = bdrv_find_backing_image(bs, base); + } else { + base_bs = bdrv_find_base(bs); + } + + if (base_bs == NULL) { + error_set(errp, QERR_BASE_NOT_FOUND, base ? base : "NULL"); + return; + } + + /* default top_bs is the active layer */ + top_bs = bs; + + if (top) { + if (strcmp(bs->filename, top) != 0) { + top_bs = bdrv_find_backing_image(bs, top); + } + } + + if (top_bs == NULL) { + error_setg(errp, "Top image file %s not found", top ? top : "NULL"); + return; + } + + commit_start(bs, base_bs, top_bs, speed, on_error, block_job_cb, bs, + &local_err); + if (local_err != NULL) { + error_propagate(errp, local_err); + return; + } + /* Grab a reference so hotplug does not delete the BlockDriverState from + * underneath us. + */ + drive_get_ref(drive_get_by_blockdev(bs)); +} + static BlockJob *find_block_job(const char *device) { BlockDriverState *bs; @@ -1142,19 +1211,28 @@ void qmp_block_job_set_speed(const char *device, int64_t speed, Error **errp) BlockJob *job = find_block_job(device); if (!job) { - error_set(errp, QERR_DEVICE_NOT_ACTIVE, device); + error_set(errp, QERR_BLOCK_JOB_NOT_ACTIVE, device); return; } block_job_set_speed(job, speed, errp); } -void qmp_block_job_cancel(const char *device, Error **errp) +void qmp_block_job_cancel(const char *device, + bool has_force, bool force, Error **errp) { BlockJob *job = find_block_job(device); + if (!has_force) { + force = false; + } + if (!job) { - error_set(errp, QERR_DEVICE_NOT_ACTIVE, device); + error_set(errp, QERR_BLOCK_JOB_NOT_ACTIVE, device); + return; + } + if (job->paused && !force) { + error_set(errp, QERR_BLOCK_JOB_PAUSED, device); return; } @@ -1162,25 +1240,40 @@ void qmp_block_job_cancel(const char *device, Error **errp) block_job_cancel(job); } +void qmp_block_job_pause(const char *device, Error **errp) +{ + BlockJob *job = find_block_job(device); + + if (!job) { + error_set(errp, QERR_BLOCK_JOB_NOT_ACTIVE, device); + return; + } + + trace_qmp_block_job_pause(job); + block_job_pause(job); +} + +void qmp_block_job_resume(const char *device, Error **errp) +{ + BlockJob *job = find_block_job(device); + + if (!job) { + error_set(errp, QERR_BLOCK_JOB_NOT_ACTIVE, device); + return; + } + + trace_qmp_block_job_resume(job); + block_job_resume(job); +} + static void do_qmp_query_block_jobs_one(void *opaque, BlockDriverState *bs) { BlockJobInfoList **prev = opaque; BlockJob *job = bs->job; if (job) { - BlockJobInfoList *elem; - BlockJobInfo *info = g_new(BlockJobInfo, 1); - *info = (BlockJobInfo){ - .type = g_strdup(job->job_type->job_type), - .device = g_strdup(bdrv_get_device_name(bs)), - .len = job->len, - .offset = job->offset, - .speed = job->speed, - }; - - elem = g_new0(BlockJobInfoList, 1); - elem->value = info; - + BlockJobInfoList *elem = g_new0(BlockJobInfoList, 1); + elem->value = block_job_query(bs->job); (*prev)->next = elem; *prev = elem; } diff --git a/blockjob.c b/blockjob.c new file mode 100644 index 0000000000..f55f55a193 --- /dev/null +++ b/blockjob.c @@ -0,0 +1,249 @@ +/* + * QEMU System Emulator block driver + * + * Copyright (c) 2011 IBM Corp. + * Copyright (c) 2012 Red Hat, Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + * THE SOFTWARE. + */ + +#include "config-host.h" +#include "qemu-common.h" +#include "trace.h" +#include "monitor.h" +#include "block.h" +#include "blockjob.h" +#include "block_int.h" +#include "qjson.h" +#include "qemu-coroutine.h" +#include "qmp-commands.h" +#include "qemu-timer.h" + +void *block_job_create(const BlockJobType *job_type, BlockDriverState *bs, + int64_t speed, BlockDriverCompletionFunc *cb, + void *opaque, Error **errp) +{ + BlockJob *job; + + if (bs->job || bdrv_in_use(bs)) { + error_set(errp, QERR_DEVICE_IN_USE, bdrv_get_device_name(bs)); + return NULL; + } + bdrv_set_in_use(bs, 1); + + job = g_malloc0(job_type->instance_size); + job->job_type = job_type; + job->bs = bs; + job->cb = cb; + job->opaque = opaque; + job->busy = true; + bs->job = job; + + /* Only set speed when necessary to avoid NotSupported error */ + if (speed != 0) { + Error *local_err = NULL; + + block_job_set_speed(job, speed, &local_err); + if (error_is_set(&local_err)) { + bs->job = NULL; + g_free(job); + bdrv_set_in_use(bs, 0); + error_propagate(errp, local_err); + return NULL; + } + } + return job; +} + +void block_job_complete(BlockJob *job, int ret) +{ + BlockDriverState *bs = job->bs; + + assert(bs->job == job); + job->cb(job->opaque, ret); + bs->job = NULL; + g_free(job); + bdrv_set_in_use(bs, 0); +} + +void block_job_set_speed(BlockJob *job, int64_t speed, Error **errp) +{ + Error *local_err = NULL; + + if (!job->job_type->set_speed) { + error_set(errp, QERR_NOT_SUPPORTED); + return; + } + job->job_type->set_speed(job, speed, &local_err); + if (error_is_set(&local_err)) { + error_propagate(errp, local_err); + return; + } + + job->speed = speed; +} + +void block_job_pause(BlockJob *job) +{ + job->paused = true; +} + +bool block_job_is_paused(BlockJob *job) +{ + return job->paused; +} + +void block_job_resume(BlockJob *job) +{ + job->paused = false; + block_job_iostatus_reset(job); + if (job->co && !job->busy) { + qemu_coroutine_enter(job->co, NULL); + } +} + +void block_job_cancel(BlockJob *job) +{ + job->cancelled = true; + block_job_resume(job); +} + +bool block_job_is_cancelled(BlockJob *job) +{ + return job->cancelled; +} + +void block_job_iostatus_reset(BlockJob *job) +{ + job->iostatus = BLOCK_DEVICE_IO_STATUS_OK; +} + +struct BlockCancelData { + BlockJob *job; + BlockDriverCompletionFunc *cb; + void *opaque; + bool cancelled; + int ret; +}; + +static void block_job_cancel_cb(void *opaque, int ret) +{ + struct BlockCancelData *data = opaque; + + data->cancelled = block_job_is_cancelled(data->job); + data->ret = ret; + data->cb(data->opaque, ret); +} + +int block_job_cancel_sync(BlockJob *job) +{ + struct BlockCancelData data; + BlockDriverState *bs = job->bs; + + assert(bs->job == job); + + /* Set up our own callback to store the result and chain to + * the original callback. + */ + data.job = job; + data.cb = job->cb; + data.opaque = job->opaque; + data.ret = -EINPROGRESS; + job->cb = block_job_cancel_cb; + job->opaque = &data; + block_job_cancel(job); + while (data.ret == -EINPROGRESS) { + qemu_aio_wait(); + } + return (data.cancelled && data.ret == 0) ? -ECANCELED : data.ret; +} + +void block_job_sleep_ns(BlockJob *job, QEMUClock *clock, int64_t ns) +{ + assert(job->busy); + + /* Check cancellation *before* setting busy = false, too! */ + if (block_job_is_cancelled(job)) { + return; + } + + job->busy = false; + if (block_job_is_paused(job)) { + qemu_coroutine_yield(); + } else { + co_sleep_ns(clock, ns); + } + job->busy = true; +} + +BlockJobInfo *block_job_query(BlockJob *job) +{ + BlockJobInfo *info = g_new0(BlockJobInfo, 1); + info->type = g_strdup(job->job_type->job_type); + info->device = g_strdup(bdrv_get_device_name(job->bs)); + info->len = job->len; + info->busy = job->busy; + info->paused = job->paused; + info->offset = job->offset; + info->speed = job->speed; + info->io_status = job->iostatus; + return info; +} + +static void block_job_iostatus_set_err(BlockJob *job, int error) +{ + if (job->iostatus == BLOCK_DEVICE_IO_STATUS_OK) { + job->iostatus = error == ENOSPC ? BLOCK_DEVICE_IO_STATUS_NOSPACE : + BLOCK_DEVICE_IO_STATUS_FAILED; + } +} + + +BlockErrorAction block_job_error_action(BlockJob *job, BlockDriverState *bs, + BlockdevOnError on_err, + int is_read, int error) +{ + BlockErrorAction action; + + switch (on_err) { + case BLOCKDEV_ON_ERROR_ENOSPC: + action = (error == ENOSPC) ? BDRV_ACTION_STOP : BDRV_ACTION_REPORT; + break; + case BLOCKDEV_ON_ERROR_STOP: + action = BDRV_ACTION_STOP; + break; + case BLOCKDEV_ON_ERROR_REPORT: + action = BDRV_ACTION_REPORT; + break; + case BLOCKDEV_ON_ERROR_IGNORE: + action = BDRV_ACTION_IGNORE; + break; + default: + abort(); + } + bdrv_emit_qmp_error_event(job->bs, QEVENT_BLOCK_JOB_ERROR, action, is_read); + if (action == BDRV_ACTION_STOP) { + block_job_pause(job); + block_job_iostatus_set_err(job, error); + if (bs != job->bs) { + bdrv_iostatus_set_err(bs, error); + } + } + return action; +} diff --git a/blockjob.h b/blockjob.h new file mode 100644 index 0000000000..930cc3c46a --- /dev/null +++ b/blockjob.h @@ -0,0 +1,243 @@ +/* + * Declarations for long-running block device operations + * + * Copyright (c) 2011 IBM Corp. + * Copyright (c) 2012 Red Hat, Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + * THE SOFTWARE. + */ +#ifndef BLOCKJOB_H +#define BLOCKJOB_H 1 + +#include "block.h" + +/** + * BlockJobType: + * + * A class type for block job objects. + */ +typedef struct BlockJobType { + /** Derived BlockJob struct size */ + size_t instance_size; + + /** String describing the operation, part of query-block-jobs QMP API */ + const char *job_type; + + /** Optional callback for job types that support setting a speed limit */ + void (*set_speed)(BlockJob *job, int64_t speed, Error **errp); +} BlockJobType; + +/** + * BlockJob: + * + * Long-running operation on a BlockDriverState. + */ +struct BlockJob { + /** The job type, including the job vtable. */ + const BlockJobType *job_type; + + /** The block device on which the job is operating. */ + BlockDriverState *bs; + + /** + * The coroutine that executes the job. If not NULL, it is + * reentered when busy is false and the job is cancelled. + */ + Coroutine *co; + + /** + * Set to true if the job should cancel itself. The flag must + * always be tested just before toggling the busy flag from false + * to true. After a job has been cancelled, it should only yield + * if #qemu_aio_wait will ("sooner or later") reenter the coroutine. + */ + bool cancelled; + + /** + * Set to true if the job is either paused, or will pause itself + * as soon as possible (if busy == true). + */ + bool paused; + + /** + * Set to false by the job while it is in a quiescent state, where + * no I/O is pending and the job has yielded on any condition + * that is not detected by #qemu_aio_wait, such as a timer. + */ + bool busy; + + /** Status that is published by the query-block-jobs QMP API */ + BlockDeviceIoStatus iostatus; + + /** Offset that is published by the query-block-jobs QMP API */ + int64_t offset; + + /** Length that is published by the query-block-jobs QMP API */ + int64_t len; + + /** Speed that was set with @block_job_set_speed. */ + int64_t speed; + + /** The completion function that will be called when the job completes. */ + BlockDriverCompletionFunc *cb; + + /** The opaque value that is passed to the completion function. */ + void *opaque; +}; + +/** + * block_job_create: + * @job_type: The class object for the newly-created job. + * @bs: The block + * @speed: The maximum speed, in bytes per second, or 0 for unlimited. + * @cb: Completion function for the job. + * @opaque: Opaque pointer value passed to @cb. + * @errp: Error object. + * + * Create a new long-running block device job and return it. The job + * will call @cb asynchronously when the job completes. Note that + * @bs may have been closed at the time the @cb it is called. If + * this is the case, the job may be reported as either cancelled or + * completed. + * + * This function is not part of the public job interface; it should be + * called from a wrapper that is specific to the job type. + */ +void *block_job_create(const BlockJobType *job_type, BlockDriverState *bs, + int64_t speed, BlockDriverCompletionFunc *cb, + void *opaque, Error **errp); + +/** + * block_job_sleep_ns: + * @job: The job that calls the function. + * @clock: The clock to sleep on. + * @ns: How many nanoseconds to stop for. + * + * Put the job to sleep (assuming that it wasn't canceled) for @ns + * nanoseconds. Canceling the job will interrupt the wait immediately. + */ +void block_job_sleep_ns(BlockJob *job, QEMUClock *clock, int64_t ns); + +/** + * block_job_complete: + * @job: The job being completed. + * @ret: The status code. + * + * Call the completion function that was registered at creation time, and + * free @job. + */ +void block_job_complete(BlockJob *job, int ret); + +/** + * block_job_set_speed: + * @job: The job to set the speed for. + * @speed: The new value + * @errp: Error object. + * + * Set a rate-limiting parameter for the job; the actual meaning may + * vary depending on the job type. + */ +void block_job_set_speed(BlockJob *job, int64_t speed, Error **errp); + +/** + * block_job_cancel: + * @job: The job to be canceled. + * + * Asynchronously cancel the specified job. + */ +void block_job_cancel(BlockJob *job); + +/** + * block_job_is_cancelled: + * @job: The job being queried. + * + * Returns whether the job is scheduled for cancellation. + */ +bool block_job_is_cancelled(BlockJob *job); + +/** + * block_job_query: + * @job: The job to get information about. + * + * Return information about a job. + */ +BlockJobInfo *block_job_query(BlockJob *job); + +/** + * block_job_pause: + * @job: The job to be paused. + * + * Asynchronously pause the specified job. + */ +void block_job_pause(BlockJob *job); + +/** + * block_job_resume: + * @job: The job to be resumed. + * + * Resume the specified job. + */ +void block_job_resume(BlockJob *job); + +/** + * block_job_is_paused: + * @job: The job being queried. + * + * Returns whether the job is currently paused, or will pause + * as soon as it reaches a sleeping point. + */ +bool block_job_is_paused(BlockJob *job); + +/** + * block_job_cancel_sync: + * @job: The job to be canceled. + * + * Synchronously cancel the job. The completion callback is called + * before the function returns. The job may actually complete + * instead of canceling itself; the circumstances under which this + * happens depend on the kind of job that is active. + * + * Returns the return value from the job if the job actually completed + * during the call, or -ECANCELED if it was canceled. + */ +int block_job_cancel_sync(BlockJob *job); + +/** + * block_job_iostatus_reset: + * @job: The job whose I/O status should be reset. + * + * Reset I/O status on @job. + */ +void block_job_iostatus_reset(BlockJob *job); + +/** + * block_job_error_action: + * @job: The job to signal an error for. + * @bs: The block device on which to set an I/O error. + * @on_err: The error action setting. + * @is_read: Whether the operation was a read. + * @error: The error that was reported. + * + * Report an I/O error for a block job and possibly stop the VM. Return the + * action that was selected based on @on_err and @error. + */ +BlockErrorAction block_job_error_action(BlockJob *job, BlockDriverState *bs, + BlockdevOnError on_err, + int is_read, int error); +#endif @@ -219,6 +219,7 @@ want_tools="yes" libiscsi="" coroutine="" seccomp="" +glusterfs="" # parse CC options first for opt do @@ -856,6 +857,10 @@ for opt do ;; --disable-seccomp) seccomp="no" ;; + --disable-glusterfs) glusterfs="no" + ;; + --enable-glusterfs) glusterfs="yes" + ;; *) echo "ERROR: unknown option $opt"; show_help="yes" ;; esac @@ -1128,6 +1133,8 @@ echo " --disable-seccomp disable seccomp support" echo " --enable-seccomp enables seccomp support" echo " --with-coroutine=BACKEND coroutine backend. Supported options:" echo " gthread, ucontext, sigaltstack, windows" +echo " --enable-glusterfs enable GlusterFS backend" +echo " --disable-glusterfs disable GlusterFS backend" echo "" echo "NOTE: The object files are built at the place where configure is launched" exit 1 @@ -2303,6 +2310,29 @@ EOF fi fi +########################################## +# glusterfs probe +if test "$glusterfs" != "no" ; then + cat > $TMPC <<EOF +#include <glusterfs/api/glfs.h> +int main(void) { + (void) glfs_new("volume"); + return 0; +} +EOF + glusterfs_libs="-lgfapi -lgfrpc -lgfxdr" + if compile_prog "" "$glusterfs_libs" ; then + glusterfs=yes + libs_tools="$glusterfs_libs $libs_tools" + libs_softmmu="$glusterfs_libs $libs_softmmu" + else + if test "$glusterfs" = "yes" ; then + feature_not_found "GlusterFS backend support" + fi + glusterfs=no + fi +fi + # # Check for xxxat() functions when we are building linux-user # emulator. This is done because older glibc versions don't @@ -2748,10 +2778,10 @@ fi # check for usbredirparser for usb network redirection support if test "$usb_redir" != "no" ; then - if $pkg_config --atleast-version=0.5 libusbredirparser >/dev/null 2>&1 ; then + if $pkg_config --atleast-version=0.5 libusbredirparser-0.5 >/dev/null 2>&1 ; then usb_redir="yes" - usb_redir_cflags=$($pkg_config --cflags libusbredirparser 2>/dev/null) - usb_redir_libs=$($pkg_config --libs libusbredirparser 2>/dev/null) + usb_redir_cflags=$($pkg_config --cflags libusbredirparser-0.5 2>/dev/null) + usb_redir_libs=$($pkg_config --libs libusbredirparser-0.5 2>/dev/null) QEMU_CFLAGS="$QEMU_CFLAGS $usb_redir_cflags" libs_softmmu="$libs_softmmu $usb_redir_libs" else @@ -3170,6 +3200,7 @@ echo "libiscsi support $libiscsi" echo "build guest agent $guest_agent" echo "seccomp support $seccomp" echo "coroutine backend $coroutine_backend" +echo "GlusterFS support $glusterfs" if test "$sdl_too_old" = "yes"; then echo "-> Your SDL version is too old - please upgrade to have SDL support" @@ -3516,6 +3547,10 @@ if test "$has_environ" = "yes" ; then echo "CONFIG_HAS_ENVIRON=y" >> $config_host_mak fi +if test "$glusterfs" = "yes" ; then + echo "CONFIG_GLUSTERFS=y" >> $config_host_mak +fi + # USB host support case "$usb" in linux) @@ -3694,7 +3729,6 @@ TARGET_ABI_DIR="" case "$target_arch2" in i386) - target_phys_bits=64 ;; x86_64) TARGET_BASE_ARCH=i386 @@ -3702,7 +3736,6 @@ case "$target_arch2" in target_long_alignment=8 ;; alpha) - target_phys_bits=64 target_long_alignment=8 target_nptl="yes" ;; @@ -3711,22 +3744,18 @@ case "$target_arch2" in bflt="yes" target_nptl="yes" gdb_xml_files="arm-core.xml arm-vfp.xml arm-vfp3.xml arm-neon.xml" - target_phys_bits=64 target_llong_alignment=4 target_libs_softmmu="$fdt_libs" ;; cris) target_nptl="yes" - target_phys_bits=32 ;; lm32) - target_phys_bits=32 target_libs_softmmu="$opengl_libs" ;; m68k) bflt="yes" gdb_xml_files="cf-core.xml cf-fp.xml" - target_phys_bits=32 target_int_alignment=2 target_long_alignment=2 target_llong_alignment=2 @@ -3735,36 +3764,30 @@ case "$target_arch2" in TARGET_ARCH=microblaze bflt="yes" target_nptl="yes" - target_phys_bits=32 target_libs_softmmu="$fdt_libs" ;; mips|mipsel) TARGET_ARCH=mips echo "TARGET_ABI_MIPSO32=y" >> $config_target_mak target_nptl="yes" - target_phys_bits=64 ;; mipsn32|mipsn32el) TARGET_ARCH=mipsn32 TARGET_BASE_ARCH=mips echo "TARGET_ABI_MIPSN32=y" >> $config_target_mak - target_phys_bits=64 ;; mips64|mips64el) TARGET_ARCH=mips64 TARGET_BASE_ARCH=mips echo "TARGET_ABI_MIPSN64=y" >> $config_target_mak - target_phys_bits=64 target_long_alignment=8 ;; or32) TARGET_ARCH=openrisc TARGET_BASE_ARCH=openrisc - target_phys_bits=32 ;; ppc) gdb_xml_files="power-core.xml power-fpu.xml power-altivec.xml power-spe.xml" - target_phys_bits=64 target_nptl="yes" target_libs_softmmu="$fdt_libs" ;; @@ -3772,7 +3795,6 @@ case "$target_arch2" in TARGET_BASE_ARCH=ppc TARGET_ABI_DIR=ppc gdb_xml_files="power-core.xml power-fpu.xml power-altivec.xml power-spe.xml" - target_phys_bits=64 target_nptl="yes" target_libs_softmmu="$fdt_libs" ;; @@ -3780,7 +3802,6 @@ case "$target_arch2" in TARGET_BASE_ARCH=ppc TARGET_ABI_DIR=ppc gdb_xml_files="power64-core.xml power-fpu.xml power-altivec.xml power-spe.xml" - target_phys_bits=64 target_long_alignment=8 target_libs_softmmu="$fdt_libs" ;; @@ -3790,21 +3811,17 @@ case "$target_arch2" in TARGET_ABI_DIR=ppc echo "TARGET_ABI32=y" >> $config_target_mak gdb_xml_files="power64-core.xml power-fpu.xml power-altivec.xml power-spe.xml" - target_phys_bits=64 target_libs_softmmu="$fdt_libs" ;; sh4|sh4eb) TARGET_ARCH=sh4 bflt="yes" target_nptl="yes" - target_phys_bits=32 ;; sparc) - target_phys_bits=64 ;; sparc64) TARGET_BASE_ARCH=sparc - target_phys_bits=64 target_long_alignment=8 ;; sparc32plus) @@ -3812,11 +3829,9 @@ case "$target_arch2" in TARGET_BASE_ARCH=sparc TARGET_ABI_DIR=sparc echo "TARGET_ABI32=y" >> $config_target_mak - target_phys_bits=64 ;; s390x) target_nptl="yes" - target_phys_bits=64 target_long_alignment=8 ;; unicore32) @@ -3824,7 +3839,6 @@ case "$target_arch2" in ;; xtensa|xtensaeb) TARGET_ARCH=xtensa - target_phys_bits=32 ;; *) echo "Unsupported target CPU" @@ -3859,7 +3873,6 @@ echo "TARGET_ABI_DIR=$TARGET_ABI_DIR" >> $config_target_mak case "$target_arch2" in i386|x86_64) if test "$xen" = "yes" -a "$target_softmmu" = "yes" ; then - target_phys_bits=64 echo "CONFIG_XEN=y" >> $config_target_mak if test "$xen_pci_passthrough" = yes; then echo "CONFIG_XEN_PCI_PASSTHROUGH=y" >> "$config_target_mak" @@ -3899,11 +3912,10 @@ if test "$target_bigendian" = "yes" ; then echo "TARGET_WORDS_BIGENDIAN=y" >> $config_target_mak fi if test "$target_softmmu" = "yes" ; then - echo "TARGET_PHYS_ADDR_BITS=$target_phys_bits" >> $config_target_mak echo "CONFIG_SOFTMMU=y" >> $config_target_mak echo "LIBS+=$libs_softmmu $target_libs_softmmu" >> $config_target_mak - echo "HWDIR=../libhw$target_phys_bits" >> $config_target_mak - echo "subdir-$target: subdir-libhw$target_phys_bits" >> $config_host_mak + echo "HWDIR=../libhw" >> $config_target_mak + echo "subdir-$target: subdir-libhw" >> $config_host_mak if test "$smartcard_nss" = "yes" ; then echo "subdir-$target: subdir-libcacard" >> $config_host_mak fi @@ -4145,11 +4157,8 @@ for rom in seabios vgabios ; do echo "LD=$ld" >> $config_mak done -for hwlib in 32 64; do - d=libhw$hwlib - symlink "$source_path/Makefile.hw" "$d/Makefile" - echo "QEMU_CFLAGS+=-DTARGET_PHYS_ADDR_BITS=$hwlib" > $d/config.mak -done +d=libhw +symlink "$source_path/Makefile.hw" "$d/Makefile" d=libuser symlink "$source_path/Makefile.user" "$d/Makefile" diff --git a/cpu-common.h b/cpu-common.h index 85548de5ea..c0d27afd82 100644 --- a/cpu-common.h +++ b/cpu-common.h @@ -21,7 +21,7 @@ enum device_endian { }; /* address in the RAM (different from a physical address) */ -#if defined(CONFIG_XEN_BACKEND) && TARGET_PHYS_ADDR_BITS == 64 +#if defined(CONFIG_XEN_BACKEND) typedef uint64_t ram_addr_t; # define RAM_ADDR_MAX UINT64_MAX # define RAM_ADDR_FMT "%" PRIx64 @@ -31,7 +31,7 @@ struct QEMUSGList { DMAContext *dma; }; -#if defined(TARGET_PHYS_ADDR_BITS) +#ifndef CONFIG_USER_ONLY /* * When an IOMMU is present, bus addresses become distinct from @@ -100,18 +100,11 @@ static void dump_error(DumpState *s, const char *reason) static int fd_write_vmcore(void *buf, size_t size, void *opaque) { DumpState *s = opaque; - int fd = s->fd; - size_t writen_size; + size_t written_size; - /* The fd may be passed from user, and it can be non-blocked */ - while (size) { - writen_size = qemu_write_full(fd, buf, size); - if (writen_size != size && errno != EAGAIN) { - return -1; - } - - buf += writen_size; - size -= writen_size; + written_size = qemu_write_full(s->fd, buf, size); + if (written_size != size) { + return -1; } return 0; @@ -836,9 +829,8 @@ void qmp_dump_guest_memory(bool paging, const char *file, bool has_begin, #if !defined(WIN32) if (strstart(file, "fd:", &p)) { - fd = monitor_get_fd(cur_mon, p); + fd = monitor_get_fd(cur_mon, p, errp); if (fd == -1) { - error_set(errp, QERR_FD_NOT_FOUND, p); return; } } diff --git a/hmp-commands.hx b/hmp-commands.hx index ed67e997fd..e0b537d0cc 100644 --- a/hmp-commands.hx +++ b/hmp-commands.hx @@ -99,9 +99,10 @@ ETEXI { .name = "block_job_cancel", - .args_type = "device:B", - .params = "device", - .help = "stop an active background block operation", + .args_type = "force:-f,device:B", + .params = "[-f] device", + .help = "stop an active background block operation (use -f" + "\n\t\t\t if the operation is currently paused)", .mhandler.cmd = hmp_block_job_cancel, }, @@ -112,6 +113,34 @@ Stop an active block streaming operation. ETEXI { + .name = "block_job_pause", + .args_type = "device:B", + .params = "device", + .help = "pause an active background block operation", + .mhandler.cmd = hmp_block_job_pause, + }, + +STEXI +@item block_job_pause +@findex block_job_pause +Pause an active block streaming operation. +ETEXI + + { + .name = "block_job_resume", + .args_type = "device:B", + .params = "device", + .help = "resume a paused background block operation", + .mhandler.cmd = hmp_block_job_resume, + }, + +STEXI +@item block_job_resume +@findex block_job_resume +Resume a paused block streaming operation. +ETEXI + + { .name = "eject", .args_type = "force:-f,device:B", .params = "[-f] device", @@ -914,12 +943,11 @@ ETEXI #if defined(CONFIG_HAVE_CORE_DUMP) { .name = "dump-guest-memory", - .args_type = "paging:-p,protocol:s,begin:i?,length:i?", - .params = "[-p] protocol [begin] [length]", + .args_type = "paging:-p,filename:F,begin:i?,length:i?", + .params = "[-p] filename [begin] [length]", .help = "dump guest memory to file" "\n\t\t\t begin(optional): the starting physical address" "\n\t\t\t length(optional): the memory size, in bytes", - .user_print = monitor_user_noop, .mhandler.cmd = hmp_dump_guest_memory, }, @@ -929,8 +957,7 @@ STEXI @findex dump-guest-memory Dump guest memory to @var{protocol}. The file can be processed with crash or gdb. - protocol: destination file(started with "file:") or destination file - descriptor (started with "fd:") + filename: dump file name paging: do paging to get guest's memory mapping begin: the starting physical address. It's optional, and should be specified with length together. @@ -930,7 +930,8 @@ void hmp_block_stream(Monitor *mon, const QDict *qdict) int64_t speed = qdict_get_try_int(qdict, "speed", 0); qmp_block_stream(device, base != NULL, base, - qdict_haskey(qdict, "speed"), speed, &error); + qdict_haskey(qdict, "speed"), speed, + BLOCKDEV_ON_ERROR_REPORT, true, &error); hmp_handle_error(mon, &error); } @@ -950,8 +951,29 @@ void hmp_block_job_cancel(Monitor *mon, const QDict *qdict) { Error *error = NULL; const char *device = qdict_get_str(qdict, "device"); + bool force = qdict_get_try_bool(qdict, "force", 0); - qmp_block_job_cancel(device, &error); + qmp_block_job_cancel(device, true, force, &error); + + hmp_handle_error(mon, &error); +} + +void hmp_block_job_pause(Monitor *mon, const QDict *qdict) +{ + Error *error = NULL; + const char *device = qdict_get_str(qdict, "device"); + + qmp_block_job_pause(device, &error); + + hmp_handle_error(mon, &error); +} + +void hmp_block_job_resume(Monitor *mon, const QDict *qdict) +{ + Error *error = NULL; + const char *device = qdict_get_str(qdict, "device"); + + qmp_block_job_resume(device, &error); hmp_handle_error(mon, &error); } @@ -1042,11 +1064,12 @@ void hmp_dump_guest_memory(Monitor *mon, const QDict *qdict) { Error *errp = NULL; int paging = qdict_get_try_bool(qdict, "paging", 0); - const char *file = qdict_get_str(qdict, "protocol"); + const char *file = qdict_get_str(qdict, "filename"); bool has_begin = qdict_haskey(qdict, "begin"); bool has_length = qdict_haskey(qdict, "length"); int64_t begin = 0; int64_t length = 0; + char *prot; if (has_begin) { begin = qdict_get_int(qdict, "begin"); @@ -1055,9 +1078,12 @@ void hmp_dump_guest_memory(Monitor *mon, const QDict *qdict) length = qdict_get_int(qdict, "length"); } - qmp_dump_guest_memory(paging, file, has_begin, begin, has_length, length, + prot = g_strconcat("file:", file, NULL); + + qmp_dump_guest_memory(paging, prot, has_begin, begin, has_length, length, &errp); hmp_handle_error(mon, &errp); + g_free(prot); } void hmp_netdev_add(Monitor *mon, const QDict *qdict) @@ -1109,13 +1135,13 @@ void hmp_closefd(Monitor *mon, const QDict *qdict) void hmp_send_key(Monitor *mon, const QDict *qdict) { const char *keys = qdict_get_str(qdict, "keys"); - QKeyCodeList *keylist, *head = NULL, *tmp = NULL; + KeyValueList *keylist, *head = NULL, *tmp = NULL; int has_hold_time = qdict_haskey(qdict, "hold-time"); int hold_time = qdict_get_try_int(qdict, "hold-time", -1); Error *err = NULL; char keyname_buf[16]; char *separator; - int keyname_len, idx; + int keyname_len; while (1) { separator = strchr(keys, '-'); @@ -1129,15 +1155,8 @@ void hmp_send_key(Monitor *mon, const QDict *qdict) } keyname_buf[keyname_len] = 0; - idx = index_from_key(keyname_buf); - if (idx == Q_KEY_CODE_MAX) { - monitor_printf(mon, "invalid parameter: %s\n", keyname_buf); - break; - } - keylist = g_malloc0(sizeof(*keylist)); - keylist->value = idx; - keylist->next = NULL; + keylist->value = g_malloc0(sizeof(*keylist->value)); if (!head) { head = keylist; @@ -1147,17 +1166,39 @@ void hmp_send_key(Monitor *mon, const QDict *qdict) } tmp = keylist; + if (strstart(keyname_buf, "0x", NULL)) { + char *endp; + int value = strtoul(keyname_buf, &endp, 0); + if (*endp != '\0') { + goto err_out; + } + keylist->value->kind = KEY_VALUE_KIND_NUMBER; + keylist->value->number = value; + } else { + int idx = index_from_key(keyname_buf); + if (idx == Q_KEY_CODE_MAX) { + goto err_out; + } + keylist->value->kind = KEY_VALUE_KIND_QCODE; + keylist->value->qcode = idx; + } + if (!separator) { break; } keys = separator + 1; } - if (idx != Q_KEY_CODE_MAX) { - qmp_send_key(head, has_hold_time, hold_time, &err); - } + qmp_send_key(head, has_hold_time, hold_time, &err); hmp_handle_error(mon, &err); - qapi_free_QKeyCodeList(head); + +out: + qapi_free_KeyValueList(head); + return; + +err_out: + monitor_printf(mon, "invalid parameter: %s\n", keyname_buf); + goto out; } void hmp_screen_dump(Monitor *mon, const QDict *qdict) @@ -64,6 +64,8 @@ void hmp_block_set_io_throttle(Monitor *mon, const QDict *qdict); void hmp_block_stream(Monitor *mon, const QDict *qdict); void hmp_block_job_set_speed(Monitor *mon, const QDict *qdict); void hmp_block_job_cancel(Monitor *mon, const QDict *qdict); +void hmp_block_job_pause(Monitor *mon, const QDict *qdict); +void hmp_block_job_resume(Monitor *mon, const QDict *qdict); void hmp_migrate(Monitor *mon, const QDict *qdict); void hmp_device_del(Monitor *mon, const QDict *qdict); void hmp_dump_guest_memory(Monitor *mon, const QDict *qdict); @@ -1994,11 +1994,11 @@ static int fdctrl_connect_drives(FDCtrl *fdctrl) drive->fdctrl = fdctrl; if (drive->bs) { - if (bdrv_get_on_error(drive->bs, 0) != BLOCK_ERR_STOP_ENOSPC) { + if (bdrv_get_on_error(drive->bs, 0) != BLOCKDEV_ON_ERROR_ENOSPC) { error_report("fdc doesn't support drive option werror"); return -1; } - if (bdrv_get_on_error(drive->bs, 1) != BLOCK_ERR_REPORT) { + if (bdrv_get_on_error(drive->bs, 1) != BLOCKDEV_ON_ERROR_REPORT) { error_report("fdc doesn't support drive option rerror"); return -1; } @@ -4,7 +4,7 @@ #include "qemu-common.h" -#if defined(TARGET_PHYS_ADDR_BITS) && !defined(NEED_CPU_H) +#if !defined(CONFIG_USER_ONLY) && !defined(NEED_CPU_H) #include "cpu-common.h" #endif diff --git a/hw/ide/core.c b/hw/ide/core.c index d6fb69c634..d683a8cc84 100644 --- a/hw/ide/core.c +++ b/hw/ide/core.c @@ -556,32 +556,22 @@ void ide_dma_error(IDEState *s) static int ide_handle_rw_error(IDEState *s, int error, int op) { - int is_read = (op & BM_STATUS_RETRY_READ); - BlockErrorAction action = bdrv_get_on_error(s->bs, is_read); + bool is_read = (op & BM_STATUS_RETRY_READ) != 0; + BlockErrorAction action = bdrv_get_error_action(s->bs, is_read, error); - if (action == BLOCK_ERR_IGNORE) { - bdrv_emit_qmp_error_event(s->bs, BDRV_ACTION_IGNORE, is_read); - return 0; - } - - if ((error == ENOSPC && action == BLOCK_ERR_STOP_ENOSPC) - || action == BLOCK_ERR_STOP_ANY) { + if (action == BDRV_ACTION_STOP) { s->bus->dma->ops->set_unit(s->bus->dma, s->unit); s->bus->error_status = op; - bdrv_emit_qmp_error_event(s->bs, BDRV_ACTION_STOP, is_read); - vm_stop(RUN_STATE_IO_ERROR); - bdrv_iostatus_set_err(s->bs, error); - } else { + } else if (action == BDRV_ACTION_REPORT) { if (op & BM_STATUS_DMA_RETRY) { dma_buf_commit(s); ide_dma_error(s); } else { ide_rw_error(s); } - bdrv_emit_qmp_error_event(s->bs, BDRV_ACTION_REPORT, is_read); } - - return 1; + bdrv_error_action(s->bs, action, is_read, error); + return action != BDRV_ACTION_IGNORE; } void ide_dma_cb(void *opaque, int ret) diff --git a/hw/ide/pci.c b/hw/ide/pci.c index 88c0942e34..644533f777 100644 --- a/hw/ide/pci.c +++ b/hw/ide/pci.c @@ -188,7 +188,7 @@ static void bmdma_restart_bh(void *opaque) { BMDMAState *bm = opaque; IDEBus *bus = bm->bus; - int is_read; + bool is_read; int error_status; qemu_bh_delete(bm->bh); @@ -198,7 +198,7 @@ static void bmdma_restart_bh(void *opaque) return; } - is_read = !!(bus->error_status & BM_STATUS_RETRY_READ); + is_read = (bus->error_status & BM_STATUS_RETRY_READ) != 0; /* The error status must be cleared before resubmitting the request: The * request may fail again, and this case can only be distinguished if the diff --git a/hw/intel-hda.c b/hw/intel-hda.c index 127e81888b..d8e1b23a60 100644 --- a/hw/intel-hda.c +++ b/hw/intel-hda.c @@ -210,13 +210,7 @@ static target_phys_addr_t intel_hda_addr(uint32_t lbase, uint32_t ubase) { target_phys_addr_t addr; -#if TARGET_PHYS_ADDR_BITS == 32 - addr = lbase; -#else - addr = ubase; - addr <<= 32; - addr |= lbase; -#endif + addr = ((uint64_t)ubase << 32) | lbase; return addr; } diff --git a/hw/kvm/pci-assign.c b/hw/kvm/pci-assign.c index 05b93d9a51..7a0998c518 100644 --- a/hw/kvm/pci-assign.c +++ b/hw/kvm/pci-assign.c @@ -579,15 +579,9 @@ static int get_real_device(AssignedDevice *pci_dev, uint16_t r_seg, snprintf(name, sizeof(name), "%sconfig", dir); if (pci_dev->configfd_name && *pci_dev->configfd_name) { - if (qemu_isdigit(pci_dev->configfd_name[0])) { - dev->config_fd = strtol(pci_dev->configfd_name, NULL, 0); - } else { - dev->config_fd = monitor_get_fd(cur_mon, pci_dev->configfd_name); - if (dev->config_fd < 0) { - error_report("%s: (%s) unkown", __func__, - pci_dev->configfd_name); - return 1; - } + dev->config_fd = monitor_handle_fd_param(cur_mon, pci_dev->configfd_name); + if (dev->config_fd < 0) { + return 1; } } else { dev->config_fd = open(name, O_RDWR); diff --git a/hw/rtl8139.c b/hw/rtl8139.c index 844f1b8c3f..b7c82ee027 100644 --- a/hw/rtl8139.c +++ b/hw/rtl8139.c @@ -774,11 +774,7 @@ static void rtl8139_write_buffer(RTL8139State *s, const void *buf, int size) #define MIN_BUF_SIZE 60 static inline dma_addr_t rtl8139_addr64(uint32_t low, uint32_t high) { -#if TARGET_PHYS_ADDR_BITS > 32 - return low | ((target_phys_addr_t)high << 32); -#else - return low; -#endif + return low | ((uint64_t)high << 32); } /* Workaround for buggy guest driver such as linux who allocates rx diff --git a/hw/scsi-disk.c b/hw/scsi-disk.c index 95e91585e6..99bb02ebf8 100644 --- a/hw/scsi-disk.c +++ b/hw/scsi-disk.c @@ -386,23 +386,11 @@ static void scsi_read_data(SCSIRequest *req) */ static int scsi_handle_rw_error(SCSIDiskReq *r, int error) { - int is_read = (r->req.cmd.xfer == SCSI_XFER_FROM_DEV); + bool is_read = (r->req.cmd.xfer == SCSI_XFER_FROM_DEV); SCSIDiskState *s = DO_UPCAST(SCSIDiskState, qdev, r->req.dev); - BlockErrorAction action = bdrv_get_on_error(s->qdev.conf.bs, is_read); + BlockErrorAction action = bdrv_get_error_action(s->qdev.conf.bs, is_read, error); - if (action == BLOCK_ERR_IGNORE) { - bdrv_emit_qmp_error_event(s->qdev.conf.bs, BDRV_ACTION_IGNORE, is_read); - return 0; - } - - if ((error == ENOSPC && action == BLOCK_ERR_STOP_ENOSPC) - || action == BLOCK_ERR_STOP_ANY) { - - bdrv_emit_qmp_error_event(s->qdev.conf.bs, BDRV_ACTION_STOP, is_read); - vm_stop(RUN_STATE_IO_ERROR); - bdrv_iostatus_set_err(s->qdev.conf.bs, error); - scsi_req_retry(&r->req); - } else { + if (action == BDRV_ACTION_REPORT) { switch (error) { case ENOMEDIUM: scsi_check_condition(r, SENSE_CODE(NO_MEDIUM)); @@ -417,9 +405,12 @@ static int scsi_handle_rw_error(SCSIDiskReq *r, int error) scsi_check_condition(r, SENSE_CODE(IO_ERROR)); break; } - bdrv_emit_qmp_error_event(s->qdev.conf.bs, BDRV_ACTION_REPORT, is_read); } - return 1; + bdrv_error_action(s->qdev.conf.bs, action, is_read, error); + if (action == BDRV_ACTION_STOP) { + scsi_req_retry(&r->req); + } + return action != BDRV_ACTION_IGNORE; } static void scsi_write_complete(void * opaque, int ret) diff --git a/hw/scsi-generic.c b/hw/scsi-generic.c index a5eb663ecf..d9045341ba 100644 --- a/hw/scsi-generic.c +++ b/hw/scsi-generic.c @@ -400,11 +400,11 @@ static int scsi_generic_initfn(SCSIDevice *s) return -1; } - if (bdrv_get_on_error(s->conf.bs, 0) != BLOCK_ERR_STOP_ENOSPC) { + if (bdrv_get_on_error(s->conf.bs, 0) != BLOCKDEV_ON_ERROR_ENOSPC) { error_report("Device doesn't support drive option werror"); return -1; } - if (bdrv_get_on_error(s->conf.bs, 1) != BLOCK_ERR_REPORT) { + if (bdrv_get_on_error(s->conf.bs, 1) != BLOCKDEV_ON_ERROR_REPORT) { error_report("Device doesn't support drive option rerror"); return -1; } diff --git a/hw/usb/hcd-ehci.c b/hw/usb/hcd-ehci.c index 6a5da8413f..8bdb806b9b 100644 --- a/hw/usb/hcd-ehci.c +++ b/hw/usb/hcd-ehci.c @@ -373,6 +373,7 @@ struct EHCIQueue { uint32_t seen; uint64_t ts; int async; + int transact_ctr; /* cached data from guest - needs to be flushed * when guest removes an entry (doorbell, handshake sequence) @@ -1837,6 +1838,11 @@ static EHCIQueue *ehci_state_fetchqh(EHCIState *ehci, int async) } q->qh = qh; + q->transact_ctr = get_field(q->qh.epcap, QH_EPCAP_MULT); + if (q->transact_ctr == 0) { /* Guest bug in some versions of windows */ + q->transact_ctr = 4; + } + if (q->dev == NULL) { q->dev = ehci_find_device(q->ehci, devaddr); } @@ -2014,11 +2020,8 @@ static int ehci_state_fetchqtd(EHCIQueue *q) } else if (p != NULL) { switch (p->async) { case EHCI_ASYNC_NONE: - /* Should never happen packet should at least be initialized */ - assert(0); - break; case EHCI_ASYNC_INITIALIZED: - /* Previously nacked packet (likely interrupt ep) */ + /* Not yet executed (MULT), or previously nacked (int) packet */ ehci_set_state(q->ehci, q->async, EST_EXECUTE); break; case EHCI_ASYNC_INFLIGHT: @@ -2107,15 +2110,12 @@ static int ehci_state_execute(EHCIQueue *q) // TODO verify enough time remains in the uframe as in 4.4.1.1 // TODO write back ptr to async list when done or out of time - // TODO Windows does not seem to ever set the MULT field - if (!q->async) { - int transactCtr = get_field(q->qh.epcap, QH_EPCAP_MULT); - if (!transactCtr) { - ehci_set_state(q->ehci, q->async, EST_HORIZONTALQH); - again = 1; - goto out; - } + /* 4.10.3, bottom of page 82, go horizontal on transaction counter == 0 */ + if (!q->async && q->transact_ctr == 0) { + ehci_set_state(q->ehci, q->async, EST_HORIZONTALQH); + again = 1; + goto out; } if (q->async) { @@ -2132,7 +2132,11 @@ static int ehci_state_execute(EHCIQueue *q) trace_usb_ehci_packet_action(p->queue, p, "async"); p->async = EHCI_ASYNC_INFLIGHT; ehci_set_state(q->ehci, q->async, EST_HORIZONTALQH); - again = (ehci_fill_queue(p) == USB_RET_PROCERR) ? -1 : 1; + if (q->async) { + again = (ehci_fill_queue(p) == USB_RET_PROCERR) ? -1 : 1; + } else { + again = 1; + } goto out; } @@ -2152,13 +2156,9 @@ static int ehci_state_executing(EHCIQueue *q) ehci_execute_complete(q); - // 4.10.3 - if (!q->async) { - int transactCtr = get_field(q->qh.epcap, QH_EPCAP_MULT); - transactCtr--; - set_field(&q->qh.epcap, transactCtr, QH_EPCAP_MULT); - // 4.10.3, bottom of page 82, should exit this state when transaction - // counter decrements to 0 + /* 4.10.3 */ + if (!q->async && q->transact_ctr > 0) { + q->transact_ctr--; } /* 4.10.5 */ diff --git a/hw/usb/hcd-xhci.c b/hw/usb/hcd-xhci.c index e0ca69044a..e79a8724c5 100644 --- a/hw/usb/hcd-xhci.c +++ b/hw/usb/hcd-xhci.c @@ -37,12 +37,12 @@ #define FIXME() do { fprintf(stderr, "FIXME %s:%d\n", \ __func__, __LINE__); abort(); } while (0) -#define MAXPORTS_2 8 -#define MAXPORTS_3 8 +#define MAXPORTS_2 15 +#define MAXPORTS_3 15 #define MAXPORTS (MAXPORTS_2+MAXPORTS_3) -#define MAXSLOTS MAXPORTS -#define MAXINTRS MAXPORTS +#define MAXSLOTS 64 +#define MAXINTRS 16 #define TD_QUEUE 24 @@ -285,6 +285,8 @@ typedef enum TRBCCode { #define SLOT_CONTEXT_ENTRIES_MASK 0x1f #define SLOT_CONTEXT_ENTRIES_SHIFT 27 +typedef struct XHCIState XHCIState; + typedef enum EPType { ET_INVALID = 0, ET_ISO_OUT, @@ -303,15 +305,15 @@ typedef struct XHCIRing { } XHCIRing; typedef struct XHCIPort { + XHCIState *xhci; uint32_t portsc; uint32_t portnr; USBPort *uport; uint32_t speedmask; + char name[16]; + MemoryRegion mem; } XHCIPort; -struct XHCIState; -typedef struct XHCIState XHCIState; - typedef struct XHCITransfer { XHCIState *xhci; USBPacket packet; @@ -363,7 +365,7 @@ typedef struct XHCIEPContext { typedef struct XHCISlot { bool enabled; dma_addr_t ctx; - unsigned int port; + USBPort *uport; unsigned int devaddr; XHCIEPContext * eps[31]; } XHCISlot; @@ -1230,7 +1232,7 @@ static TRBCCode xhci_reset_ep(XHCIState *xhci, unsigned int slotid, ep |= 0x80; } - dev = xhci->ports[xhci->slots[slotid-1].port-1].uport->dev; + dev = xhci->slots[slotid-1].uport->dev; if (!dev) { return CC_USB_TRANSACTION_ERROR; } @@ -1412,18 +1414,9 @@ static void xhci_stall_ep(XHCITransfer *xfer) static int xhci_submit(XHCIState *xhci, XHCITransfer *xfer, XHCIEPContext *epctx); -static USBDevice *xhci_find_device(XHCIPort *port, uint8_t addr) -{ - if (!(port->portsc & PORTSC_PED)) { - return NULL; - } - return usb_find_device(port->uport, addr); -} - static int xhci_setup_packet(XHCITransfer *xfer) { XHCIState *xhci = xfer->xhci; - XHCIPort *port; USBDevice *dev; USBEndpoint *ep; int dir; @@ -1434,13 +1427,12 @@ static int xhci_setup_packet(XHCITransfer *xfer) ep = xfer->packet.ep; dev = ep->dev; } else { - port = &xhci->ports[xhci->slots[xfer->slotid-1].port-1]; - dev = xhci_find_device(port, xhci->slots[xfer->slotid-1].devaddr); - if (!dev) { - fprintf(stderr, "xhci: slot %d port %d has no device\n", - xfer->slotid, xhci->slots[xfer->slotid-1].port); + if (!xhci->slots[xfer->slotid-1].uport) { + fprintf(stderr, "xhci: slot %d has no device\n", + xfer->slotid); return -1; } + dev = xhci->slots[xfer->slotid-1].uport->dev; ep = usb_ep_get(dev, dir, xfer->epid >> 1); } @@ -1772,7 +1764,7 @@ static TRBCCode xhci_enable_slot(XHCIState *xhci, unsigned int slotid) trace_usb_xhci_slot_enable(slotid); assert(slotid >= 1 && slotid <= MAXSLOTS); xhci->slots[slotid-1].enabled = 1; - xhci->slots[slotid-1].port = 0; + xhci->slots[slotid-1].uport = NULL; memset(xhci->slots[slotid-1].eps, 0, sizeof(XHCIEPContext*)*31); return CC_SUCCESS; @@ -1795,17 +1787,42 @@ static TRBCCode xhci_disable_slot(XHCIState *xhci, unsigned int slotid) return CC_SUCCESS; } +static USBPort *xhci_lookup_uport(XHCIState *xhci, uint32_t *slot_ctx) +{ + USBPort *uport; + char path[32]; + int i, pos, port; + + port = (slot_ctx[1]>>16) & 0xFF; + port = xhci->ports[port-1].uport->index+1; + pos = snprintf(path, sizeof(path), "%d", port); + for (i = 0; i < 5; i++) { + port = (slot_ctx[0] >> 4*i) & 0x0f; + if (!port) { + break; + } + pos += snprintf(path + pos, sizeof(path) - pos, ".%d", port); + } + + QTAILQ_FOREACH(uport, &xhci->bus.used, next) { + if (strcmp(uport->path, path) == 0) { + return uport; + } + } + return NULL; +} + static TRBCCode xhci_address_slot(XHCIState *xhci, unsigned int slotid, uint64_t pictx, bool bsr) { XHCISlot *slot; + USBPort *uport; USBDevice *dev; dma_addr_t ictx, octx, dcbaap; uint64_t poctx; uint32_t ictl_ctx[2]; uint32_t slot_ctx[4]; uint32_t ep0_ctx[5]; - unsigned int port; int i; TRBCCode res; @@ -1837,27 +1854,28 @@ static TRBCCode xhci_address_slot(XHCIState *xhci, unsigned int slotid, DPRINTF("xhci: input ep0 context: %08x %08x %08x %08x %08x\n", ep0_ctx[0], ep0_ctx[1], ep0_ctx[2], ep0_ctx[3], ep0_ctx[4]); - port = (slot_ctx[1]>>16) & 0xFF; - dev = xhci->ports[port-1].uport->dev; - - if (port < 1 || port > xhci->numports) { - fprintf(stderr, "xhci: bad port %d\n", port); + uport = xhci_lookup_uport(xhci, slot_ctx); + if (uport == NULL) { + fprintf(stderr, "xhci: port not found\n"); return CC_TRB_ERROR; - } else if (!dev) { - fprintf(stderr, "xhci: port %d not connected\n", port); + } + + dev = uport->dev; + if (!dev) { + fprintf(stderr, "xhci: port %s not connected\n", uport->path); return CC_USB_TRANSACTION_ERROR; } for (i = 0; i < MAXSLOTS; i++) { - if (xhci->slots[i].port == port) { - fprintf(stderr, "xhci: port %d already assigned to slot %d\n", - port, i+1); + if (xhci->slots[i].uport == uport) { + fprintf(stderr, "xhci: port %s already assigned to slot %d\n", + uport->path, i+1); return CC_TRB_ERROR; } } slot = &xhci->slots[slotid-1]; - slot->port = port; + slot->uport = uport; slot->ctx = octx; if (bsr) { @@ -2414,20 +2432,14 @@ static uint64_t xhci_cap_read(void *ptr, target_phys_addr_t reg, unsigned size) return ret; } -static uint32_t xhci_port_read(XHCIState *xhci, uint32_t reg) +static uint64_t xhci_port_read(void *ptr, target_phys_addr_t reg, unsigned size) { - uint32_t port = reg >> 4; + XHCIPort *port = ptr; uint32_t ret; - if (port >= xhci->numports) { - fprintf(stderr, "xhci_port_read: port %d out of bounds\n", port); - ret = 0; - goto out; - } - - switch (reg & 0xf) { + switch (reg) { case 0x00: /* PORTSC */ - ret = xhci->ports[port].portsc; + ret = port->portsc; break; case 0x04: /* PORTPMSC */ case 0x08: /* PORTLI */ @@ -2436,30 +2448,25 @@ static uint32_t xhci_port_read(XHCIState *xhci, uint32_t reg) case 0x0c: /* reserved */ default: fprintf(stderr, "xhci_port_read (port %d): reg 0x%x unimplemented\n", - port, reg); + port->portnr, (uint32_t)reg); ret = 0; } -out: - trace_usb_xhci_port_read(port, reg & 0x0f, ret); + trace_usb_xhci_port_read(port->portnr, reg, ret); return ret; } -static void xhci_port_write(XHCIState *xhci, uint32_t reg, uint32_t val) +static void xhci_port_write(void *ptr, target_phys_addr_t reg, + uint64_t val, unsigned size) { - uint32_t port = reg >> 4; + XHCIPort *port = ptr; uint32_t portsc; - trace_usb_xhci_port_write(port, reg & 0x0f, val); - - if (port >= xhci->numports) { - fprintf(stderr, "xhci_port_read: port %d out of bounds\n", port); - return; - } + trace_usb_xhci_port_write(port->portnr, reg, val); - switch (reg & 0xf) { + switch (reg) { case 0x00: /* PORTSC */ - portsc = xhci->ports[port].portsc; + portsc = port->portsc; /* write-1-to-clear bits*/ portsc &= ~(val & (PORTSC_CSC|PORTSC_PEC|PORTSC_WRC|PORTSC_OCC| PORTSC_PRC|PORTSC_PLC|PORTSC_CEC)); @@ -2474,16 +2481,16 @@ static void xhci_port_write(XHCIState *xhci, uint32_t reg, uint32_t val) /* write-1-to-start bits */ if (val & PORTSC_PR) { DPRINTF("xhci: port %d reset\n", port); - usb_device_reset(xhci->ports[port].uport->dev); + usb_device_reset(port->uport->dev); portsc |= PORTSC_PRC | PORTSC_PED; } - xhci->ports[port].portsc = portsc; + port->portsc = portsc; break; case 0x04: /* PORTPMSC */ case 0x08: /* PORTLI */ default: fprintf(stderr, "xhci_port_write (port %d): reg 0x%x unimplemented\n", - port, reg); + port->portnr, (uint32_t)reg); } } @@ -2492,10 +2499,6 @@ static uint64_t xhci_oper_read(void *ptr, target_phys_addr_t reg, unsigned size) XHCIState *xhci = ptr; uint32_t ret; - if (reg >= 0x400) { - return xhci_port_read(xhci, reg - 0x400); - } - switch (reg) { case 0x00: /* USBCMD */ ret = xhci->usbcmd; @@ -2538,11 +2541,6 @@ static void xhci_oper_write(void *ptr, target_phys_addr_t reg, { XHCIState *xhci = ptr; - if (reg >= 0x400) { - xhci_port_write(xhci, reg - 0x400, val); - return; - } - trace_usb_xhci_oper_write(reg, val); switch (reg) { @@ -2761,6 +2759,14 @@ static const MemoryRegionOps xhci_oper_ops = { .endianness = DEVICE_LITTLE_ENDIAN, }; +static const MemoryRegionOps xhci_port_ops = { + .read = xhci_port_read, + .write = xhci_port_write, + .valid.min_access_size = 4, + .valid.max_access_size = 4, + .endianness = DEVICE_LITTLE_ENDIAN, +}; + static const MemoryRegionOps xhci_runtime_ops = { .read = xhci_runtime_read, .write = xhci_runtime_write, @@ -2821,12 +2827,20 @@ static void xhci_complete(USBPort *port, USBPacket *packet) xhci_kick_ep(xfer->xhci, xfer->slotid, xfer->epid); } -static void xhci_child_detach(USBPort *port, USBDevice *child) +static void xhci_child_detach(USBPort *uport, USBDevice *child) { - FIXME(); + USBBus *bus = usb_bus_from_device(child); + XHCIState *xhci = container_of(bus, XHCIState, bus); + int i; + + for (i = 0; i < MAXSLOTS; i++) { + if (xhci->slots[i].uport == uport) { + xhci->slots[i].uport = NULL; + } + } } -static USBPortOps xhci_port_ops = { +static USBPortOps xhci_uport_ops = { .attach = xhci_attach, .detach = xhci_detach, .wakeup = xhci_wakeup, @@ -2906,6 +2920,7 @@ static void usb_xhci_init(XHCIState *xhci, DeviceState *dev) USB_SPEED_MASK_LOW | USB_SPEED_MASK_FULL | USB_SPEED_MASK_HIGH; + snprintf(port->name, sizeof(port->name), "usb2 port #%d", i+1); speedmask |= port->speedmask; } if (i < xhci->numports_3) { @@ -2913,16 +2928,17 @@ static void usb_xhci_init(XHCIState *xhci, DeviceState *dev) port->portnr = i + 1 + xhci->numports_2; port->uport = &xhci->uports[i]; port->speedmask = USB_SPEED_MASK_SUPER; + snprintf(port->name, sizeof(port->name), "usb3 port #%d", i+1); speedmask |= port->speedmask; } usb_register_port(&xhci->bus, &xhci->uports[i], xhci, i, - &xhci_port_ops, speedmask); + &xhci_uport_ops, speedmask); } } static int usb_xhci_initfn(struct PCIDevice *dev) { - int ret; + int i, ret; XHCIState *xhci = DO_UPCAST(XHCIState, pci_dev, dev); @@ -2941,7 +2957,7 @@ static int usb_xhci_initfn(struct PCIDevice *dev) memory_region_init_io(&xhci->mem_cap, &xhci_cap_ops, xhci, "capabilities", LEN_CAP); memory_region_init_io(&xhci->mem_oper, &xhci_oper_ops, xhci, - "operational", 0x400 + 0x10 * xhci->numports); + "operational", 0x400); memory_region_init_io(&xhci->mem_runtime, &xhci_runtime_ops, xhci, "runtime", LEN_RUNTIME); memory_region_init_io(&xhci->mem_doorbell, &xhci_doorbell_ops, xhci, @@ -2952,6 +2968,15 @@ static int usb_xhci_initfn(struct PCIDevice *dev) memory_region_add_subregion(&xhci->mem, OFF_RUNTIME, &xhci->mem_runtime); memory_region_add_subregion(&xhci->mem, OFF_DOORBELL, &xhci->mem_doorbell); + for (i = 0; i < xhci->numports; i++) { + XHCIPort *port = &xhci->ports[i]; + uint32_t offset = OFF_OPER + 0x400 + 0x10 * i; + port->xhci = xhci; + memory_region_init_io(&port->mem, &xhci_port_ops, port, + port->name, 0x10); + memory_region_add_subregion(&xhci->mem, offset, &port->mem); + } + pci_register_bar(&xhci->pci_dev, 0, PCI_BASE_ADDRESS_SPACE_MEMORY|PCI_BASE_ADDRESS_MEM_TYPE_64, &xhci->mem); diff --git a/hw/usb/libhw.c b/hw/usb/libhw.c index c0de30ea88..703e2d213b 100644 --- a/hw/usb/libhw.c +++ b/hw/usb/libhw.c @@ -28,19 +28,25 @@ int usb_packet_map(USBPacket *p, QEMUSGList *sgl) { DMADirection dir = (p->pid == USB_TOKEN_IN) ? DMA_DIRECTION_FROM_DEVICE : DMA_DIRECTION_TO_DEVICE; - dma_addr_t len; void *mem; int i; for (i = 0; i < sgl->nsg; i++) { - len = sgl->sg[i].len; - mem = dma_memory_map(sgl->dma, sgl->sg[i].base, &len, dir); - if (!mem) { - goto err; - } - qemu_iovec_add(&p->iov, mem, len); - if (len != sgl->sg[i].len) { - goto err; + dma_addr_t base = sgl->sg[i].base; + dma_addr_t len = sgl->sg[i].len; + + while (len) { + dma_addr_t xlen = len; + mem = dma_memory_map(sgl->dma, sgl->sg[i].base, &xlen, dir); + if (!mem) { + goto err; + } + if (xlen > len) { + xlen = len; + } + qemu_iovec_add(&p->iov, mem, xlen); + len -= xlen; + base += xlen; } } return 0; diff --git a/hw/virtio-blk.c b/hw/virtio-blk.c index 6f6d172fd0..e25cc96477 100644 --- a/hw/virtio-blk.c +++ b/hw/virtio-blk.c @@ -64,31 +64,22 @@ static void virtio_blk_req_complete(VirtIOBlockReq *req, int status) } static int virtio_blk_handle_rw_error(VirtIOBlockReq *req, int error, - int is_read) + bool is_read) { - BlockErrorAction action = bdrv_get_on_error(req->dev->bs, is_read); + BlockErrorAction action = bdrv_get_error_action(req->dev->bs, is_read, error); VirtIOBlock *s = req->dev; - if (action == BLOCK_ERR_IGNORE) { - bdrv_emit_qmp_error_event(s->bs, BDRV_ACTION_IGNORE, is_read); - return 0; - } - - if ((error == ENOSPC && action == BLOCK_ERR_STOP_ENOSPC) - || action == BLOCK_ERR_STOP_ANY) { + if (action == BDRV_ACTION_STOP) { req->next = s->rq; s->rq = req; - bdrv_emit_qmp_error_event(s->bs, BDRV_ACTION_STOP, is_read); - vm_stop(RUN_STATE_IO_ERROR); - bdrv_iostatus_set_err(s->bs, error); - } else { + } else if (action == BDRV_ACTION_REPORT) { virtio_blk_req_complete(req, VIRTIO_BLK_S_IOERR); bdrv_acct_done(s->bs, &req->acct); g_free(req); - bdrv_emit_qmp_error_event(s->bs, BDRV_ACTION_REPORT, is_read); } - return 1; + bdrv_error_action(s->bs, action, is_read, error); + return action != BDRV_ACTION_IGNORE; } static void virtio_blk_rw_complete(void *opaque, int ret) @@ -98,7 +89,7 @@ static void virtio_blk_rw_complete(void *opaque, int ret) trace_virtio_blk_rw_complete(req, ret); if (ret) { - int is_read = !(ldl_p(&req->out->type) & VIRTIO_BLK_T_OUT); + bool is_read = !(ldl_p(&req->out->type) & VIRTIO_BLK_T_OUT); if (virtio_blk_handle_rw_error(req, -ret, is_read)) return; } diff --git a/hw/virtio-net.c b/hw/virtio-net.c index 6490743290..247d7bef56 100644 --- a/hw/virtio-net.c +++ b/hw/virtio-net.c @@ -690,7 +690,7 @@ static void virtio_net_tx_complete(NetClientState *nc, ssize_t len) { VirtIONet *n = DO_UPCAST(NICState, nc, nc)->opaque; - virtqueue_push(n->tx_vq, &n->async_tx.elem, n->async_tx.len); + virtqueue_push(n->tx_vq, &n->async_tx.elem, 0); virtio_notify(&n->vdev, n->tx_vq); n->async_tx.elem.out_num = n->async_tx.len = 0; @@ -754,7 +754,7 @@ static int32_t virtio_net_flush_tx(VirtIONet *n, VirtQueue *vq) len += ret; - virtqueue_push(vq, &elem, len); + virtqueue_push(vq, &elem, 0); virtio_notify(&n->vdev, vq); if (++num_packets >= n->tx_burst) { diff --git a/hw/virtio-serial-bus.c b/hw/virtio-serial-bus.c index 82073f5dc2..d20bd8bf75 100644 --- a/hw/virtio-serial-bus.c +++ b/hw/virtio-serial-bus.c @@ -287,6 +287,7 @@ ssize_t virtio_serial_write(VirtIOSerialPort *port, const uint8_t *buf, size_t virtio_serial_guest_ready(VirtIOSerialPort *port) { VirtQueue *vq = port->ivq; + unsigned int bytes; if (!virtio_queue_ready(vq) || !(port->vser->vdev.status & VIRTIO_CONFIG_S_DRIVER_OK) || @@ -296,14 +297,8 @@ size_t virtio_serial_guest_ready(VirtIOSerialPort *port) if (use_multiport(port->vser) && !port->guest_connected) { return 0; } - - if (virtqueue_avail_bytes(vq, 4096, 0)) { - return 4096; - } - if (virtqueue_avail_bytes(vq, 1, 0)) { - return 1; - } - return 0; + virtqueue_get_avail_bytes(vq, &bytes, NULL); + return bytes; } static void flush_queued_data_bh(void *opaque) diff --git a/hw/virtio.c b/hw/virtio.c index 209c763751..6821092df2 100644 --- a/hw/virtio.c +++ b/hw/virtio.c @@ -241,7 +241,7 @@ void virtqueue_fill(VirtQueue *vq, const VirtQueueElement *elem, elem->in_sg[i].iov_len, 1, size); - offset += elem->in_sg[i].iov_len; + offset += size; } for (i = 0; i < elem->out_num; i++) @@ -335,10 +335,11 @@ static unsigned virtqueue_next_desc(target_phys_addr_t desc_pa, return next; } -int virtqueue_avail_bytes(VirtQueue *vq, int in_bytes, int out_bytes) +void virtqueue_get_avail_bytes(VirtQueue *vq, unsigned int *in_bytes, + unsigned int *out_bytes) { unsigned int idx; - int total_bufs, in_total, out_total; + unsigned int total_bufs, in_total, out_total; idx = vq->last_avail_idx; @@ -380,13 +381,9 @@ int virtqueue_avail_bytes(VirtQueue *vq, int in_bytes, int out_bytes) } if (vring_desc_flags(desc_pa, i) & VRING_DESC_F_WRITE) { - if (in_bytes > 0 && - (in_total += vring_desc_len(desc_pa, i)) >= in_bytes) - return 1; + in_total += vring_desc_len(desc_pa, i); } else { - if (out_bytes > 0 && - (out_total += vring_desc_len(desc_pa, i)) >= out_bytes) - return 1; + out_total += vring_desc_len(desc_pa, i); } } while ((i = virtqueue_next_desc(desc_pa, i, max)) != max); @@ -395,7 +392,24 @@ int virtqueue_avail_bytes(VirtQueue *vq, int in_bytes, int out_bytes) else total_bufs++; } + if (in_bytes) { + *in_bytes = in_total; + } + if (out_bytes) { + *out_bytes = out_total; + } +} +int virtqueue_avail_bytes(VirtQueue *vq, unsigned int in_bytes, + unsigned int out_bytes) +{ + unsigned int in_total, out_total; + + virtqueue_get_avail_bytes(vq, &in_total, &out_total); + if ((in_bytes && in_bytes < in_total) + || (out_bytes && out_bytes < out_total)) { + return 1; + } return 0; } diff --git a/hw/virtio.h b/hw/virtio.h index 7a4f564529..80de3757e3 100644 --- a/hw/virtio.h +++ b/hw/virtio.h @@ -147,7 +147,10 @@ void virtqueue_fill(VirtQueue *vq, const VirtQueueElement *elem, void virtqueue_map_sg(struct iovec *sg, target_phys_addr_t *addr, size_t num_sg, int is_write); int virtqueue_pop(VirtQueue *vq, VirtQueueElement *elem); -int virtqueue_avail_bytes(VirtQueue *vq, int in_bytes, int out_bytes); +int virtqueue_avail_bytes(VirtQueue *vq, unsigned int in_bytes, + unsigned int out_bytes); +void virtqueue_get_avail_bytes(VirtQueue *vq, unsigned int *in_bytes, + unsigned int *out_bytes); void virtio_notify(VirtIODevice *vdev, VirtQueue *vq); @@ -186,8 +186,7 @@ static const int key_defs[] = { int index_from_key(const char *key) { - int i, keycode; - char *endp; + int i; for (i = 0; QKeyCode_lookup[i] != NULL; i++) { if (!strcmp(key, QKeyCode_lookup[i])) { @@ -195,17 +194,6 @@ int index_from_key(const char *key) } } - if (strstart(key, "0x", NULL)) { - keycode = strtoul(key, &endp, 0); - if (*endp == '\0' && keycode >= 0x01 && keycode <= 0xff) { - for (i = 0; i < Q_KEY_CODE_MAX; i++) { - if (keycode == key_defs[i]) { - break; - } - } - } - } - /* Return Q_KEY_CODE_MAX if the key is invalid */ return i; } @@ -224,30 +212,46 @@ int index_from_keycode(int code) return i; } -static QKeyCodeList *keycodes; +static int *keycodes; +static int keycodes_size; static QEMUTimer *key_timer; +static int keycode_from_keyvalue(const KeyValue *value) +{ + if (value->kind == KEY_VALUE_KIND_QCODE) { + return key_defs[value->qcode]; + } else { + assert(value->kind == KEY_VALUE_KIND_NUMBER); + return value->number; + } +} + +static void free_keycodes(void) +{ + g_free(keycodes); + keycodes = NULL; + keycodes_size = 0; +} + static void release_keys(void *opaque) { - int keycode; - QKeyCodeList *p; + int i; - for (p = keycodes; p != NULL; p = p->next) { - keycode = key_defs[p->value]; - if (keycode & 0x80) { + for (i = 0; i < keycodes_size; i++) { + if (keycodes[i] & 0x80) { kbd_put_keycode(0xe0); } - kbd_put_keycode(keycode | 0x80); + kbd_put_keycode(keycodes[i]| 0x80); } - qapi_free_QKeyCodeList(keycodes); - keycodes = NULL; + + free_keycodes(); } -void qmp_send_key(QKeyCodeList *keys, bool has_hold_time, int64_t hold_time, +void qmp_send_key(KeyValueList *keys, bool has_hold_time, int64_t hold_time, Error **errp) { int keycode; - QKeyCodeList *p, *keylist, *head = NULL, *tmp = NULL; + KeyValueList *p; if (!key_timer) { key_timer = qemu_new_timer_ns(vm_clock, release_keys, NULL); @@ -257,31 +261,28 @@ void qmp_send_key(QKeyCodeList *keys, bool has_hold_time, int64_t hold_time, qemu_del_timer(key_timer); release_keys(NULL); } + if (!has_hold_time) { hold_time = 100; } for (p = keys; p != NULL; p = p->next) { - keylist = g_malloc0(sizeof(*keylist)); - keylist->value = p->value; - keylist->next = NULL; - - if (!head) { - head = keylist; - } - if (tmp) { - tmp->next = keylist; + /* key down events */ + keycode = keycode_from_keyvalue(p->value); + if (keycode < 0x01 || keycode > 0xff) { + error_setg(errp, "invalid hex keycode 0x%x\n", keycode); + free_keycodes(); + return; } - tmp = keylist; - /* key down events */ - keycode = key_defs[p->value]; if (keycode & 0x80) { kbd_put_keycode(0xe0); } kbd_put_keycode(keycode & 0x7f); + + keycodes = g_realloc(keycodes, sizeof(int) * (keycodes_size + 1)); + keycodes[keycodes_size++] = keycode; } - keycodes = head; /* delayed key up events */ qemu_mod_timer(key_timer, qemu_get_clock_ns(vm_clock) + @@ -26,7 +26,7 @@ # include <sys/socket.h> #endif -size_t iov_from_buf(struct iovec *iov, unsigned int iov_cnt, +size_t iov_from_buf(const struct iovec *iov, unsigned int iov_cnt, size_t offset, const void *buf, size_t bytes) { size_t done; @@ -36,7 +36,7 @@ size_t iov_size(const struct iovec *iov, const unsigned int iov_cnt); * such "large" value is -1 (sinice size_t is unsigned), * so specifying `-1' as `bytes' means 'up to the end of iovec'. */ -size_t iov_from_buf(struct iovec *iov, unsigned int iov_cnt, +size_t iov_from_buf(const struct iovec *iov, unsigned int iov_cnt, size_t offset, const void *buf, size_t bytes); size_t iov_to_buf(const struct iovec *iov, const unsigned int iov_cnt, size_t offset, void *buf, size_t bytes); diff --git a/migration-fd.c b/migration-fd.c index 50138edb34..73351678e0 100644 --- a/migration-fd.c +++ b/migration-fd.c @@ -75,7 +75,7 @@ static int fd_close(MigrationState *s) int fd_start_outgoing_migration(MigrationState *s, const char *fdname) { - s->fd = monitor_get_fd(cur_mon, fdname); + s->fd = monitor_get_fd(cur_mon, fdname, NULL); if (s->fd == -1) { DPRINTF("fd_migration: invalid file descriptor identifier\n"); goto err_after_get_fd; @@ -450,6 +450,7 @@ static const char *monitor_event_names[] = { [QEVENT_SPICE_DISCONNECTED] = "SPICE_DISCONNECTED", [QEVENT_BLOCK_JOB_COMPLETED] = "BLOCK_JOB_COMPLETED", [QEVENT_BLOCK_JOB_CANCELLED] = "BLOCK_JOB_CANCELLED", + [QEVENT_BLOCK_JOB_ERROR] = "BLOCK_JOB_ERROR", [QEVENT_DEVICE_TRAY_MOVED] = "DEVICE_TRAY_MOVED", [QEVENT_SUSPEND] = "SUSPEND", [QEVENT_SUSPEND_DISK] = "SUSPEND_DISK", @@ -944,45 +945,6 @@ static void do_trace_print_events(Monitor *mon) trace_print_events((FILE *)mon, &monitor_fprintf); } -static int add_graphics_client(Monitor *mon, const QDict *qdict, QObject **ret_data) -{ - const char *protocol = qdict_get_str(qdict, "protocol"); - const char *fdname = qdict_get_str(qdict, "fdname"); - CharDriverState *s; - - if (strcmp(protocol, "spice") == 0) { - int fd = monitor_get_fd(mon, fdname); - int skipauth = qdict_get_try_bool(qdict, "skipauth", 0); - int tls = qdict_get_try_bool(qdict, "tls", 0); - if (!using_spice) { - /* correct one? spice isn't a device ,,, */ - qerror_report(QERR_DEVICE_NOT_ACTIVE, "spice"); - return -1; - } - if (qemu_spice_display_add_client(fd, skipauth, tls) < 0) { - close(fd); - } - return 0; -#ifdef CONFIG_VNC - } else if (strcmp(protocol, "vnc") == 0) { - int fd = monitor_get_fd(mon, fdname); - int skipauth = qdict_get_try_bool(qdict, "skipauth", 0); - vnc_display_add_client(NULL, fd, skipauth); - return 0; -#endif - } else if ((s = qemu_chr_find(protocol)) != NULL) { - int fd = monitor_get_fd(mon, fdname); - if (qemu_chr_add_client(s, fd) < 0) { - qerror_report(QERR_ADD_CLIENT_FAILED); - return -1; - } - return 0; - } - - qerror_report(QERR_INVALID_PARAMETER, "protocol"); - return -1; -} - static int client_migrate_info(Monitor *mon, const QDict *qdict, MonitorCompletion cb, void *opaque) { @@ -2118,7 +2080,7 @@ static void do_loadvm(Monitor *mon, const QDict *qdict) } } -int monitor_get_fd(Monitor *mon, const char *fdname) +int monitor_get_fd(Monitor *mon, const char *fdname, Error **errp) { mon_fd_t *monfd; @@ -2139,6 +2101,7 @@ int monitor_get_fd(Monitor *mon, const char *fdname) return fd; } + error_setg(errp, "File descriptor named '%s' has not been found", fdname); return -1; } @@ -2410,12 +2373,14 @@ int monitor_fdset_dup_fd_remove(int dup_fd) int monitor_handle_fd_param(Monitor *mon, const char *fdname) { int fd; + Error *local_err = NULL; if (!qemu_isdigit(fdname[0]) && mon) { - fd = monitor_get_fd(mon, fdname); + fd = monitor_get_fd(mon, fdname, &local_err); if (fd == -1) { - error_report("No file descriptor named %s found", fdname); + qerror_report_err(local_err); + error_free(local_err); return -1; } } else { @@ -3259,11 +3224,7 @@ static int64_t expr_unary(Monitor *mon) break; default: errno = 0; -#if TARGET_PHYS_ADDR_BITS > 32 n = strtoull(pch, &p, 0); -#else - n = strtoul(pch, &p, 0); -#endif if (errno == ERANGE) { expr_error(mon, "number too large"); } @@ -38,6 +38,7 @@ typedef enum MonitorEvent { QEVENT_SPICE_DISCONNECTED, QEVENT_BLOCK_JOB_COMPLETED, QEVENT_BLOCK_JOB_CANCELLED, + QEVENT_BLOCK_JOB_ERROR, QEVENT_DEVICE_TRAY_MOVED, QEVENT_SUSPEND, QEVENT_SUSPEND_DISK, @@ -66,7 +67,7 @@ int monitor_read_block_device_key(Monitor *mon, const char *device, BlockDriverCompletionFunc *completion_cb, void *opaque); -int monitor_get_fd(Monitor *mon, const char *fdname); +int monitor_get_fd(Monitor *mon, const char *fdname, Error **errp); int monitor_handle_fd_param(Monitor *mon, const char *fdname); void monitor_vprintf(Monitor *mon, const char *fmt, va_list ap) diff --git a/qapi-schema.json b/qapi-schema.json index 4a4a850f5f..f9dbdae699 100644 --- a/qapi-schema.json +++ b/qapi-schema.json @@ -33,6 +33,31 @@ 'MigrationExpected' ] } ## +# @add_client +# +# Allow client connections for VNC, Spice and socket based +# character devices to be passed in to QEMU via SCM_RIGHTS. +# +# @protocol: protocol name. Valid names are "vnc", "spice" or the +# name of a character device (eg. from -chardev id=XXXX) +# +# @fdname: file descriptor name previously passed via 'getfd' command +# +# @skipauth: #optional whether to skip authentication. Only applies +# to "vnc" and "spice" protocols +# +# @tls: #optional whether to perform TLS. Only applies to the "spice" +# protocol +# +# Returns: nothing on success. +# +# Since: 0.14.0 +## +{ 'command': 'add_client', + 'data': { 'protocol': 'str', 'fdname': 'str', '*skipauth': 'bool', + '*tls': 'bool' } } + +## # @NameInfo: # # Guest name information. @@ -1088,6 +1113,29 @@ { 'command': 'query-pci', 'returns': ['PciInfo'] } ## +# @BlockdevOnError: +# +# An enumeration of possible behaviors for errors on I/O operations. +# The exact meaning depends on whether the I/O was initiated by a guest +# or by a block job +# +# @report: for guest operations, report the error to the guest; +# for jobs, cancel the job +# +# @ignore: ignore the error, only report a QMP event (BLOCK_IO_ERROR +# or BLOCK_JOB_ERROR) +# +# @enospc: same as @stop on ENOSPC, same as @report otherwise. +# +# @stop: for guest operations, stop the virtual machine; +# for jobs, pause the job +# +# Since: 1.3 +## +{ 'enum': 'BlockdevOnError', + 'data': ['report', 'ignore', 'enospc', 'stop'] } + +## # @BlockJobInfo: # # Information about a long-running block device operation. @@ -1098,15 +1146,24 @@ # # @len: the maximum progress value # +# @busy: false if the job is known to be in a quiescent state, with +# no pending I/O. Since 1.3. +# +# @paused: whether the job is paused or, if @busy is true, will +# pause itself as soon as possible. Since 1.3. +# # @offset: the current progress value # # @speed: the rate limit, bytes per second # +# @io-status: the status of the job (since 1.3) +# # Since: 1.1 ## { 'type': 'BlockJobInfo', 'data': {'type': 'str', 'device': 'str', 'len': 'int', - 'offset': 'int', 'speed': 'int'} } + 'offset': 'int', 'busy': 'bool', 'paused': 'bool', 'speed': 'int', + 'io-status': 'BlockDeviceIoStatus'} } ## # @query-block-jobs: @@ -1374,7 +1431,7 @@ # @format: #optional the format of the snapshot image, default is 'qcow2'. # # @mode: #optional whether and how QEMU should create a new image, default is -# 'absolute-paths'. +# 'absolute-paths'. ## { 'type': 'BlockdevSnapshot', 'data': { 'device': 'str', 'snapshot-file': 'str', '*format': 'str', @@ -1428,7 +1485,7 @@ # @format: #optional the format of the snapshot image, default is 'qcow2'. # # @mode: #optional whether and how QEMU should create a new image, default is -# 'absolute-paths'. +# 'absolute-paths'. # # Returns: nothing on success # If @device is not a valid block device, DeviceNotFound @@ -1468,6 +1525,40 @@ 'returns': 'str' } ## +# @block-commit +# +# Live commit of data from overlay image nodes into backing nodes - i.e., +# writes data between 'top' and 'base' into 'base'. +# +# @device: the name of the device +# +# @base: #optional The file name of the backing image to write data into. +# If not specified, this is the deepest backing image +# +# @top: The file name of the backing image within the image chain, +# which contains the topmost data to be committed down. +# Note, the active layer as 'top' is currently unsupported. +# +# If top == base, that is an error. +# +# +# @speed: #optional the maximum speed, in bytes per second +# +# Returns: Nothing on success +# If commit or stream is already active on this device, DeviceInUse +# If @device does not exist, DeviceNotFound +# If image commit is not supported by this device, NotSupported +# If @base or @top is invalid, a generic error is returned +# If @top is the active layer, or omitted, a generic error is returned +# If @speed is invalid, InvalidParameter +# +# Since: 1.3 +# +## +{ 'command': 'block-commit', + 'data': { 'device': 'str', '*base': 'str', 'top': 'str', + '*speed': 'int' } } + # @migrate_cancel # # Cancel the current executing migration process. @@ -1803,13 +1894,18 @@ # # @speed: #optional the maximum speed, in bytes per second # +# @on-error: #optional the action to take on an error (default report). +# 'stop' and 'enospc' can only be used if the block device +# supports io-status (see BlockInfo). Since 1.3. +# # Returns: Nothing on success # If @device does not exist, DeviceNotFound # # Since: 1.1 ## -{ 'command': 'block-stream', 'data': { 'device': 'str', '*base': 'str', - '*speed': 'int' } } +{ 'command': 'block-stream', + 'data': { 'device': 'str', '*base': 'str', '*speed': 'int', + '*on-error': 'BlockdevOnError' } } ## # @block-job-set-speed: @@ -1853,12 +1949,58 @@ # # @device: the device name # +# @force: #optional whether to allow cancellation of a paused job (default +# false). Since 1.3. +# # Returns: Nothing on success # If no background operation is active on this device, DeviceNotActive # # Since: 1.1 ## -{ 'command': 'block-job-cancel', 'data': { 'device': 'str' } } +{ 'command': 'block-job-cancel', 'data': { 'device': 'str', '*force': 'bool' } } + +## +# @block-job-pause: +# +# Pause an active background block operation. +# +# This command returns immediately after marking the active background block +# operation for pausing. It is an error to call this command if no +# operation is in progress. Pausing an already paused job has no cumulative +# effect; a single block-job-resume command will resume the job. +# +# The operation will pause as soon as possible. No event is emitted when +# the operation is actually paused. Cancelling a paused job automatically +# resumes it. +# +# @device: the device name +# +# Returns: Nothing on success +# If no background operation is active on this device, DeviceNotActive +# +# Since: 1.3 +## +{ 'command': 'block-job-pause', 'data': { 'device': 'str' } } + +## +# @block-job-resume: +# +# Resume an active background block operation. +# +# This command returns immediately after resuming a paused background block +# operation. It is an error to call this command if no operation is in +# progress. Resuming an already running job is not an error. +# +# This command also clears the error status of the job. +# +# @device: the device name +# +# Returns: Nothing on success +# If no background operation is active on this device, DeviceNotActive +# +# Since: 1.3 +## +{ 'command': 'block-job-resume', 'data': { 'device': 'str' } } ## # @ObjectTypeInfo: @@ -1995,26 +2137,33 @@ # supported on i386 and x86_64. # # @paging: if true, do paging to get guest's memory mapping. This allows -# using gdb to process the core file. However, setting @paging to false -# may be desirable because of two reasons: +# using gdb to process the core file. +# +# IMPORTANT: this option can make QEMU allocate several gigabytes +# of RAM. This can happen for a large guest, or a +# malicious guest pretending to be large. # -# 1. The guest may be in a catastrophic state or can have corrupted -# memory, which cannot be trusted -# 2. The guest can be in real-mode even if paging is enabled. For example, -# the guest uses ACPI to sleep, and ACPI sleep state goes in real-mode +# Also, paging=true has the following limitations: +# +# 1. The guest may be in a catastrophic state or can have corrupted +# memory, which cannot be trusted +# 2. The guest can be in real-mode even if paging is enabled. For +# example, the guest uses ACPI to sleep, and ACPI sleep state +# goes in real-mode # # @protocol: the filename or file descriptor of the vmcore. The supported -# protocols are: +# protocols are: # -# 1. file: the protocol starts with "file:", and the following string is -# the file's path. -# 2. fd: the protocol starts with "fd:", and the following string is the -# fd's name. +# 1. file: the protocol starts with "file:", and the following +# string is the file's path. +# 2. fd: the protocol starts with "fd:", and the following string +# is the fd's name. # # @begin: #optional if specified, the starting physical address. # # @length: #optional if specified, the memory size, in bytes. If you don't -# want to dump all guest's memory, please specify the start @begin and @length +# want to dump all guest's memory, please specify the start @begin +# and @length # # Returns: nothing on success # @@ -2023,6 +2172,7 @@ { 'command': 'dump-guest-memory', 'data': { 'paging': 'bool', 'protocol': 'str', '*begin': 'int', '*length': 'int' } } + ## # @netdev_add: # @@ -2601,12 +2751,26 @@ 'lf', 'help', 'meta_l', 'meta_r', 'compose' ] } ## +# @KeyValue +# +# Represents a keyboard key. +# +# Since: 1.3.0 +## +{ 'union': 'KeyValue', + 'data': { + 'number': 'int', + 'qcode': 'QKeyCode' } } + +## # @send-key: # # Send keys to guest. # -# @keys: key sequence. 'keys' is the name of the key. Use a JSON array to -# press several keys simultaneously. +# @keys: An array of @KeyValue elements. All @KeyValues in this array are +# simultaneously sent to the guest. A @KeyValue.number value is sent +# directly to the guest, while @KeyValue.qcode must be a valid +# @QKeyCode value # # @hold-time: #optional time to delay key up events, milliseconds. Defaults # to 100 @@ -2618,7 +2782,7 @@ # ## { 'command': 'send-key', - 'data': { 'keys': ['QKeyCode'], '*hold-time': 'int' } } + 'data': { 'keys': ['KeyValue'], '*hold-time': 'int' } } ## # @screendump: diff --git a/qemu-tool.c b/qemu-tool.c index 18205babab..f2f98138ce 100644 --- a/qemu-tool.c +++ b/qemu-tool.c @@ -19,6 +19,7 @@ #include "qemu-log.h" #include "migration.h" #include "main-loop.h" +#include "sysemu.h" #include "qemu_socket.h" #include "slirp/libslirp.h" @@ -37,6 +38,11 @@ const char *qemu_get_vm_name(void) Monitor *cur_mon; +void vm_stop(RunState state) +{ + abort(); +} + int monitor_cur_is_qmp(void) { return 0; @@ -48,6 +48,12 @@ void assert_no_error(Error *err); #define QERR_BASE_NOT_FOUND \ ERROR_CLASS_GENERIC_ERROR, "Base '%s' not found" +#define QERR_BLOCK_JOB_NOT_ACTIVE \ + ERROR_CLASS_DEVICE_NOT_ACTIVE, "No active block job on device '%s'" + +#define QERR_BLOCK_JOB_PAUSED \ + ERROR_CLASS_GENERIC_ERROR, "The block job for device '%s' is currently paused" + #define QERR_BLOCK_FORMAT_FEATURE_NOT_SUPPORTED \ ERROR_CLASS_GENERIC_ERROR, "Block format '%s' used by device '%s' does not support feature '%s'" diff --git a/qmp-commands.hx b/qmp-commands.hx index 662b7cf32c..2f8477e2a8 100644 --- a/qmp-commands.hx +++ b/qmp-commands.hx @@ -811,11 +811,17 @@ EQMP { .name = "block-stream", - .args_type = "device:B,base:s?,speed:o?", + .args_type = "device:B,base:s?,speed:o?,on-error:s?", .mhandler.cmd_new = qmp_marshal_input_block_stream, }, { + .name = "block-commit", + .args_type = "device:B,base:s?,top:s,speed:o?", + .mhandler.cmd_new = qmp_marshal_input_block_commit, + }, + + { .name = "block-job-set-speed", .args_type = "device:B,speed:o", .mhandler.cmd_new = qmp_marshal_input_block_job_set_speed, @@ -823,10 +829,20 @@ EQMP { .name = "block-job-cancel", - .args_type = "device:B", + .args_type = "device:B,force:b?", .mhandler.cmd_new = qmp_marshal_input_block_job_cancel, }, { + .name = "block-job-pause", + .args_type = "device:B", + .mhandler.cmd_new = qmp_marshal_input_block_job_pause, + }, + { + .name = "block-job-resume", + .args_type = "device:B", + .mhandler.cmd_new = qmp_marshal_input_block_job_resume, + }, + { .name = "transaction", .args_type = "actions:q", .mhandler.cmd_new = qmp_marshal_input_transaction, @@ -1255,10 +1271,7 @@ EQMP { .name = "add_client", .args_type = "protocol:s,fdname:s,skipauth:b?,tls:b?", - .params = "protocol fdname skipauth tls", - .help = "add a graphics client", - .user_print = monitor_user_noop, - .mhandler.cmd_new = add_graphics_client, + .mhandler.cmd_new = qmp_marshal_input_add_client, }, SQMP @@ -479,3 +479,46 @@ CpuDefinitionInfoList *qmp_query_cpu_definitions(Error **errp) return arch_query_cpu_definitions(errp); } +void qmp_add_client(const char *protocol, const char *fdname, + bool has_skipauth, bool skipauth, bool has_tls, bool tls, + Error **errp) +{ + CharDriverState *s; + int fd; + + fd = monitor_get_fd(cur_mon, fdname, errp); + if (fd < 0) { + return; + } + + if (strcmp(protocol, "spice") == 0) { + if (!using_spice) { + error_set(errp, QERR_DEVICE_NOT_ACTIVE, "spice"); + close(fd); + return; + } + skipauth = has_skipauth ? skipauth : false; + tls = has_tls ? tls : false; + if (qemu_spice_display_add_client(fd, skipauth, tls) < 0) { + error_setg(errp, "spice failed to add client"); + close(fd); + } + return; +#ifdef CONFIG_VNC + } else if (strcmp(protocol, "vnc") == 0) { + skipauth = has_skipauth ? skipauth : false; + vnc_display_add_client(NULL, fd, skipauth); + return; +#endif + } else if ((s = qemu_chr_find(protocol)) != NULL) { + if (qemu_chr_add_client(s, fd) < 0) { + error_setg(errp, "failed to add client"); + close(fd); + return; + } + return; + } + + error_setg(errp, "protocol '%s' is invalid", protocol); + close(fd); +} diff --git a/scripts/qapi-types.py b/scripts/qapi-types.py index 49ef569a2f..1b84834959 100644 --- a/scripts/qapi-types.py +++ b/scripts/qapi-types.py @@ -91,9 +91,9 @@ const char *%(name)s_lookup[] = { def generate_enum_name(name): if name.isupper(): - return c_fun(name) + return c_fun(name, False) new_name = '' - for c in c_fun(name): + for c in c_fun(name, False): if c.isupper(): new_name += '_' new_name += c diff --git a/scripts/qapi-visit.py b/scripts/qapi-visit.py index e2093e8947..a360de719f 100644 --- a/scripts/qapi-visit.py +++ b/scripts/qapi-visit.py @@ -173,7 +173,7 @@ void visit_type_%(name)s(Visitor *m, %(name)s ** obj, const char *name, Error ** break; ''', abbrev = de_camel_case(name).upper(), - enum = c_fun(de_camel_case(key)).upper(), + enum = c_fun(de_camel_case(key),False).upper(), c_type=members[key], c_name=c_fun(key)) diff --git a/scripts/qapi.py b/scripts/qapi.py index 122b4cb6d1..afc5f32aeb 100644 --- a/scripts/qapi.py +++ b/scripts/qapi.py @@ -141,7 +141,7 @@ def camel_case(name): new_name += ch.lower() return new_name -def c_var(name): +def c_var(name, protect=True): # ANSI X3J11/88-090, 3.1.1 c89_words = set(['auto', 'break', 'case', 'char', 'const', 'continue', 'default', 'do', 'double', 'else', 'enum', 'extern', 'float', @@ -156,12 +156,14 @@ def c_var(name): # GCC http://gcc.gnu.org/onlinedocs/gcc-4.7.1/gcc/C-Extensions.html # excluding _.* gcc_words = set(['asm', 'typeof']) - if name in c89_words | c99_words | c11_words | gcc_words: + # namespace pollution: + polluted_words = set(['unix']) + if protect and (name in c89_words | c99_words | c11_words | gcc_words | polluted_words): return "q_" + name return name.replace('-', '_').lstrip("*") -def c_fun(name): - return c_var(name).replace('.', '_') +def c_fun(name, protect=True): + return c_var(name, protect).replace('.', '_') def c_list_type(name): return '%sList' % name diff --git a/target-ppc/mmu_helper.c b/target-ppc/mmu_helper.c index d2664acef0..532b114aed 100644 --- a/target-ppc/mmu_helper.c +++ b/target-ppc/mmu_helper.c @@ -1032,12 +1032,10 @@ static int ppcemb_tlb_check(CPUPPCState *env, ppcemb_tlb_t *tlb, return -1; } *raddrp = (tlb->RPN & mask) | (address & ~mask); -#if (TARGET_PHYS_ADDR_BITS >= 36) if (ext) { /* Extend the physical address to 36 bits */ - *raddrp |= (target_phys_addr_t)(tlb->RPN & 0xF) << 32; + *raddrp |= (uint64_t)(tlb->RPN & 0xF) << 32; } -#endif return 0; } diff --git a/targphys.h b/targphys.h index bd4938fc02..08cade9096 100644 --- a/targphys.h +++ b/targphys.h @@ -3,25 +3,10 @@ #ifndef TARGPHYS_H #define TARGPHYS_H -#ifdef TARGET_PHYS_ADDR_BITS +#define TARGET_PHYS_ADDR_BITS 64 /* target_phys_addr_t is the type of a physical address (its size can be different from 'target_ulong'). */ -#if TARGET_PHYS_ADDR_BITS == 32 -typedef uint32_t target_phys_addr_t; -#define TARGET_PHYS_ADDR_MAX UINT32_MAX -#define TARGET_FMT_plx "%08x" -/* Format strings for printing target_phys_addr_t types. - * These are recommended over the less flexible TARGET_FMT_plx, - * which is retained for the benefit of existing code. - */ -#define TARGET_PRIdPHYS PRId32 -#define TARGET_PRIiPHYS PRIi32 -#define TARGET_PRIoPHYS PRIo32 -#define TARGET_PRIuPHYS PRIu32 -#define TARGET_PRIxPHYS PRIx32 -#define TARGET_PRIXPHYS PRIX32 -#elif TARGET_PHYS_ADDR_BITS == 64 typedef uint64_t target_phys_addr_t; #define TARGET_PHYS_ADDR_MAX UINT64_MAX #define TARGET_FMT_plx "%016" PRIx64 @@ -31,7 +16,5 @@ typedef uint64_t target_phys_addr_t; #define TARGET_PRIuPHYS PRIu64 #define TARGET_PRIxPHYS PRIx64 #define TARGET_PRIXPHYS PRIX64 -#endif -#endif #endif diff --git a/tests/qemu-iotests/030 b/tests/qemu-iotests/030 index 55b16f81dd..dd4ef11996 100755 --- a/tests/qemu-iotests/030 +++ b/tests/qemu-iotests/030 @@ -18,6 +18,7 @@ # along with this program. If not, see <http://www.gnu.org/licenses/>. # +import time import os import iotests from iotests import qemu_img, qemu_io @@ -98,6 +99,43 @@ class TestSingleDrive(ImageStreamingTestCase): qemu_io('-c', 'map', test_img), 'image file map does not match backing file after streaming') + def test_stream_pause(self): + self.assert_no_active_streams() + + result = self.vm.qmp('block-stream', device='drive0') + self.assert_qmp(result, 'return', {}) + + result = self.vm.qmp('block-job-pause', device='drive0') + self.assert_qmp(result, 'return', {}) + + time.sleep(1) + result = self.vm.qmp('query-block-jobs') + offset = self.dictpath(result, 'return[0]/offset') + + time.sleep(1) + result = self.vm.qmp('query-block-jobs') + self.assert_qmp(result, 'return[0]/offset', offset) + + result = self.vm.qmp('block-job-resume', device='drive0') + self.assert_qmp(result, 'return', {}) + + completed = False + while not completed: + for event in self.vm.get_qmp_events(wait=True): + if event['event'] == 'BLOCK_JOB_COMPLETED': + self.assert_qmp(event, 'data/type', 'stream') + self.assert_qmp(event, 'data/device', 'drive0') + self.assert_qmp(event, 'data/offset', self.image_len) + self.assert_qmp(event, 'data/len', self.image_len) + completed = True + + self.assert_no_active_streams() + self.vm.shutdown() + + self.assertEqual(qemu_io('-c', 'map', backing_img), + qemu_io('-c', 'map', test_img), + 'image file map does not match backing file after streaming') + def test_stream_partial(self): self.assert_no_active_streams() @@ -157,6 +195,226 @@ class TestSmallerBackingFile(ImageStreamingTestCase): self.assert_no_active_streams() self.vm.shutdown() +class TestErrors(ImageStreamingTestCase): + image_len = 2 * 1024 * 1024 # MB + + # this should match STREAM_BUFFER_SIZE/512 in block/stream.c + STREAM_BUFFER_SIZE = 512 * 1024 + + def create_blkdebug_file(self, name, event, errno): + file = open(name, 'w') + file.write(''' +[inject-error] +state = "1" +event = "%s" +errno = "%d" +immediately = "off" +once = "on" +sector = "%d" + +[set-state] +state = "1" +event = "%s" +new_state = "2" + +[set-state] +state = "2" +event = "%s" +new_state = "1" +''' % (event, errno, self.STREAM_BUFFER_SIZE / 512, event, event)) + file.close() + +class TestEIO(TestErrors): + def setUp(self): + self.blkdebug_file = backing_img + ".blkdebug" + self.create_image(backing_img, TestErrors.image_len) + self.create_blkdebug_file(self.blkdebug_file, "read_aio", 5) + qemu_img('create', '-f', iotests.imgfmt, + '-o', 'backing_file=blkdebug:%s:%s,backing_fmt=raw' + % (self.blkdebug_file, backing_img), + test_img) + self.vm = iotests.VM().add_drive(test_img) + self.vm.launch() + + def tearDown(self): + self.vm.shutdown() + os.remove(test_img) + os.remove(backing_img) + os.remove(self.blkdebug_file) + + def test_report(self): + self.assert_no_active_streams() + + result = self.vm.qmp('block-stream', device='drive0') + self.assert_qmp(result, 'return', {}) + + completed = False + error = False + while not completed: + for event in self.vm.get_qmp_events(wait=True): + if event['event'] == 'BLOCK_JOB_ERROR': + self.assert_qmp(event, 'data/device', 'drive0') + self.assert_qmp(event, 'data/operation', 'read') + error = True + elif event['event'] == 'BLOCK_JOB_COMPLETED': + self.assertTrue(error, 'job completed unexpectedly') + self.assert_qmp(event, 'data/type', 'stream') + self.assert_qmp(event, 'data/device', 'drive0') + self.assert_qmp(event, 'data/error', 'Input/output error') + self.assert_qmp(event, 'data/offset', self.STREAM_BUFFER_SIZE) + self.assert_qmp(event, 'data/len', self.image_len) + completed = True + + self.assert_no_active_streams() + self.vm.shutdown() + + def test_ignore(self): + self.assert_no_active_streams() + + result = self.vm.qmp('block-stream', device='drive0', on_error='ignore') + self.assert_qmp(result, 'return', {}) + + error = False + completed = False + while not completed: + for event in self.vm.get_qmp_events(wait=True): + if event['event'] == 'BLOCK_JOB_ERROR': + self.assert_qmp(event, 'data/device', 'drive0') + self.assert_qmp(event, 'data/operation', 'read') + result = self.vm.qmp('query-block-jobs') + self.assert_qmp(result, 'return[0]/paused', False) + error = True + elif event['event'] == 'BLOCK_JOB_COMPLETED': + self.assertTrue(error, 'job completed unexpectedly') + self.assert_qmp(event, 'data/type', 'stream') + self.assert_qmp(event, 'data/device', 'drive0') + self.assert_qmp(event, 'data/error', 'Input/output error') + self.assert_qmp(event, 'data/offset', self.image_len) + self.assert_qmp(event, 'data/len', self.image_len) + completed = True + + self.assert_no_active_streams() + self.vm.shutdown() + + def test_stop(self): + self.assert_no_active_streams() + + result = self.vm.qmp('block-stream', device='drive0', on_error='stop') + self.assert_qmp(result, 'return', {}) + + error = False + completed = False + while not completed: + for event in self.vm.get_qmp_events(wait=True): + if event['event'] == 'BLOCK_JOB_ERROR': + self.assert_qmp(event, 'data/device', 'drive0') + self.assert_qmp(event, 'data/operation', 'read') + + result = self.vm.qmp('query-block-jobs') + self.assert_qmp(result, 'return[0]/paused', True) + self.assert_qmp(result, 'return[0]/offset', self.STREAM_BUFFER_SIZE) + self.assert_qmp(result, 'return[0]/io-status', 'failed') + + result = self.vm.qmp('block-job-resume', device='drive0') + self.assert_qmp(result, 'return', {}) + + result = self.vm.qmp('query-block-jobs') + self.assert_qmp(result, 'return[0]/paused', False) + self.assert_qmp(result, 'return[0]/io-status', 'ok') + error = True + elif event['event'] == 'BLOCK_JOB_COMPLETED': + self.assertTrue(error, 'job completed unexpectedly') + self.assert_qmp(event, 'data/type', 'stream') + self.assert_qmp(event, 'data/device', 'drive0') + self.assert_qmp_absent(event, 'data/error') + self.assert_qmp(event, 'data/offset', self.image_len) + self.assert_qmp(event, 'data/len', self.image_len) + completed = True + + self.assert_no_active_streams() + self.vm.shutdown() + + def test_enospc(self): + self.assert_no_active_streams() + + result = self.vm.qmp('block-stream', device='drive0', on_error='enospc') + self.assert_qmp(result, 'return', {}) + + completed = False + error = False + while not completed: + for event in self.vm.get_qmp_events(wait=True): + if event['event'] == 'BLOCK_JOB_ERROR': + self.assert_qmp(event, 'data/device', 'drive0') + self.assert_qmp(event, 'data/operation', 'read') + error = True + elif event['event'] == 'BLOCK_JOB_COMPLETED': + self.assertTrue(error, 'job completed unexpectedly') + self.assert_qmp(event, 'data/type', 'stream') + self.assert_qmp(event, 'data/device', 'drive0') + self.assert_qmp(event, 'data/error', 'Input/output error') + self.assert_qmp(event, 'data/offset', self.STREAM_BUFFER_SIZE) + self.assert_qmp(event, 'data/len', self.image_len) + completed = True + + self.assert_no_active_streams() + self.vm.shutdown() + +class TestENOSPC(TestErrors): + def setUp(self): + self.blkdebug_file = backing_img + ".blkdebug" + self.create_image(backing_img, TestErrors.image_len) + self.create_blkdebug_file(self.blkdebug_file, "read_aio", 28) + qemu_img('create', '-f', iotests.imgfmt, + '-o', 'backing_file=blkdebug:%s:%s,backing_fmt=raw' + % (self.blkdebug_file, backing_img), + test_img) + self.vm = iotests.VM().add_drive(test_img) + self.vm.launch() + + def tearDown(self): + self.vm.shutdown() + os.remove(test_img) + os.remove(backing_img) + os.remove(self.blkdebug_file) + + def test_enospc(self): + self.assert_no_active_streams() + + result = self.vm.qmp('block-stream', device='drive0', on_error='enospc') + self.assert_qmp(result, 'return', {}) + + error = False + completed = False + while not completed: + for event in self.vm.get_qmp_events(wait=True): + if event['event'] == 'BLOCK_JOB_ERROR': + self.assert_qmp(event, 'data/device', 'drive0') + self.assert_qmp(event, 'data/operation', 'read') + + result = self.vm.qmp('query-block-jobs') + self.assert_qmp(result, 'return[0]/paused', True) + self.assert_qmp(result, 'return[0]/offset', self.STREAM_BUFFER_SIZE) + self.assert_qmp(result, 'return[0]/io-status', 'nospace') + + result = self.vm.qmp('block-job-resume', device='drive0') + self.assert_qmp(result, 'return', {}) + + result = self.vm.qmp('query-block-jobs') + self.assert_qmp(result, 'return[0]/paused', False) + self.assert_qmp(result, 'return[0]/io-status', 'ok') + error = True + elif event['event'] == 'BLOCK_JOB_COMPLETED': + self.assertTrue(error, 'job completed unexpectedly') + self.assert_qmp(event, 'data/type', 'stream') + self.assert_qmp(event, 'data/device', 'drive0') + self.assert_qmp_absent(event, 'data/error') + self.assert_qmp(event, 'data/offset', self.image_len) + self.assert_qmp(event, 'data/len', self.image_len) + completed = True + + self.assert_no_active_streams() + self.vm.shutdown() class TestStreamStop(ImageStreamingTestCase): image_len = 8 * 1024 * 1024 * 1024 # GB @@ -173,8 +431,6 @@ class TestStreamStop(ImageStreamingTestCase): os.remove(backing_img) def test_stream_stop(self): - import time - self.assert_no_active_streams() result = self.vm.qmp('block-stream', device='drive0') diff --git a/tests/qemu-iotests/030.out b/tests/qemu-iotests/030.out index 2f7d3902f2..fa16b5ccef 100644 --- a/tests/qemu-iotests/030.out +++ b/tests/qemu-iotests/030.out @@ -1,5 +1,5 @@ -....... +............. ---------------------------------------------------------------------- -Ran 7 tests +Ran 13 tests OK diff --git a/tests/qemu-iotests/040 b/tests/qemu-iotests/040 new file mode 100755 index 0000000000..258e7eae38 --- /dev/null +++ b/tests/qemu-iotests/040 @@ -0,0 +1,178 @@ +#!/usr/bin/env python +# +# Tests for image block commit. +# +# Copyright (C) 2012 IBM, Corp. +# Copyright (C) 2012 Red Hat, Inc. +# +# This program is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation; either version 2 of the License, or +# (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with this program. If not, see <http://www.gnu.org/licenses/>. +# +# Test for live block commit +# Derived from Image Streaming Test 030 + +import time +import os +import iotests +from iotests import qemu_img, qemu_io +import struct + +backing_img = os.path.join(iotests.test_dir, 'backing.img') +mid_img = os.path.join(iotests.test_dir, 'mid.img') +test_img = os.path.join(iotests.test_dir, 'test.img') + +class ImageCommitTestCase(iotests.QMPTestCase): + '''Abstract base class for image commit test cases''' + + def assert_no_active_commit(self): + result = self.vm.qmp('query-block-jobs') + self.assert_qmp(result, 'return', []) + + def cancel_and_wait(self, drive='drive0'): + '''Cancel a block job and wait for it to finish''' + result = self.vm.qmp('block-job-cancel', device=drive) + self.assert_qmp(result, 'return', {}) + + cancelled = False + while not cancelled: + for event in self.vm.get_qmp_events(wait=True): + if event['event'] == 'BLOCK_JOB_CANCELLED': + self.assert_qmp(event, 'data/type', 'commit') + self.assert_qmp(event, 'data/device', drive) + cancelled = True + + self.assert_no_active_commit() + + def create_image(self, name, size): + file = open(name, 'w') + i = 0 + while i < size: + sector = struct.pack('>l504xl', i / 512, i / 512) + file.write(sector) + i = i + 512 + file.close() + + +class TestSingleDrive(ImageCommitTestCase): + image_len = 1 * 1024 * 1024 + test_len = 1 * 1024 * 256 + + def setUp(self): + self.create_image(backing_img, TestSingleDrive.image_len) + qemu_img('create', '-f', iotests.imgfmt, '-o', 'backing_file=%s' % backing_img, mid_img) + qemu_img('create', '-f', iotests.imgfmt, '-o', 'backing_file=%s' % mid_img, test_img) + qemu_io('-c', 'write -P 0xab 0 524288', backing_img) + qemu_io('-c', 'write -P 0xef 524288 524288', mid_img) + self.vm = iotests.VM().add_drive(test_img) + self.vm.launch() + + def tearDown(self): + self.vm.shutdown() + os.remove(test_img) + os.remove(mid_img) + os.remove(backing_img) + + def test_commit(self): + self.assert_no_active_commit() + result = self.vm.qmp('block-commit', device='drive0', top='%s' % mid_img) + self.assert_qmp(result, 'return', {}) + + completed = False + while not completed: + for event in self.vm.get_qmp_events(wait=True): + if event['event'] == 'BLOCK_JOB_COMPLETED': + self.assert_qmp(event, 'data/type', 'commit') + self.assert_qmp(event, 'data/device', 'drive0') + self.assert_qmp(event, 'data/offset', self.image_len) + self.assert_qmp(event, 'data/len', self.image_len) + completed = True + + self.assert_no_active_commit() + self.vm.shutdown() + + self.assertEqual(-1, qemu_io('-c', 'read -P 0xab 0 524288', backing_img).find("verification failed")) + self.assertEqual(-1, qemu_io('-c', 'read -P 0xef 524288 524288', backing_img).find("verification failed")) + + def test_device_not_found(self): + result = self.vm.qmp('block-commit', device='nonexistent', top='%s' % mid_img) + self.assert_qmp(result, 'error/class', 'DeviceNotFound') + + def test_top_same_base(self): + self.assert_no_active_commit() + result = self.vm.qmp('block-commit', device='drive0', top='%s' % backing_img, base='%s' % backing_img) + self.assert_qmp(result, 'error/class', 'GenericError') + self.assert_qmp(result, 'error/desc', 'Invalid files for merge: top and base are the same') + + def test_top_invalid(self): + self.assert_no_active_commit() + result = self.vm.qmp('block-commit', device='drive0', top='badfile', base='%s' % backing_img) + self.assert_qmp(result, 'error/class', 'GenericError') + self.assert_qmp(result, 'error/desc', 'Top image file badfile not found') + + def test_base_invalid(self): + self.assert_no_active_commit() + result = self.vm.qmp('block-commit', device='drive0', top='%s' % mid_img, base='badfile') + self.assert_qmp(result, 'error/class', 'GenericError') + self.assert_qmp(result, 'error/desc', 'Base \'badfile\' not found') + + def test_top_is_active(self): + self.assert_no_active_commit() + result = self.vm.qmp('block-commit', device='drive0', top='%s' % test_img, base='%s' % backing_img) + self.assert_qmp(result, 'error/class', 'GenericError') + self.assert_qmp(result, 'error/desc', 'Top image as the active layer is currently unsupported') + + def test_top_and_base_reversed(self): + self.assert_no_active_commit() + result = self.vm.qmp('block-commit', device='drive0', top='%s' % backing_img, base='%s' % mid_img) + self.assert_qmp(result, 'error/class', 'GenericError') + self.assert_qmp(result, 'error/desc', 'Base (%(1)s) is not reachable from top (%(2)s)' % {"1" : mid_img, "2" : backing_img}) + + def test_top_omitted(self): + self.assert_no_active_commit() + result = self.vm.qmp('block-commit', device='drive0') + self.assert_qmp(result, 'error/class', 'GenericError') + self.assert_qmp(result, 'error/desc', "Parameter 'top' is missing") + + +class TestSetSpeed(ImageCommitTestCase): + image_len = 80 * 1024 * 1024 # MB + + def setUp(self): + qemu_img('create', backing_img, str(TestSetSpeed.image_len)) + qemu_img('create', '-f', iotests.imgfmt, '-o', 'backing_file=%s' % backing_img, mid_img) + qemu_img('create', '-f', iotests.imgfmt, '-o', 'backing_file=%s' % mid_img, test_img) + self.vm = iotests.VM().add_drive(test_img) + self.vm.launch() + + def tearDown(self): + self.vm.shutdown() + os.remove(test_img) + os.remove(mid_img) + os.remove(backing_img) + + def test_set_speed(self): + self.assert_no_active_commit() + + result = self.vm.qmp('block-commit', device='drive0', top=mid_img, speed=1024 * 1024) + self.assert_qmp(result, 'return', {}) + + # Ensure the speed we set was accepted + result = self.vm.qmp('query-block-jobs') + self.assert_qmp(result, 'return[0]/device', 'drive0') + self.assert_qmp(result, 'return[0]/speed', 1024 * 1024) + + self.cancel_and_wait() + + +if __name__ == '__main__': + iotests.main(supported_fmts=['qcow2', 'qed']) diff --git a/tests/qemu-iotests/040.out b/tests/qemu-iotests/040.out new file mode 100644 index 0000000000..dae404e278 --- /dev/null +++ b/tests/qemu-iotests/040.out @@ -0,0 +1,5 @@ +......... +---------------------------------------------------------------------- +Ran 9 tests + +OK diff --git a/tests/qemu-iotests/group b/tests/qemu-iotests/group index ebb5ca4b41..66d2ba9689 100644 --- a/tests/qemu-iotests/group +++ b/tests/qemu-iotests/group @@ -36,7 +36,7 @@ 027 rw auto quick 028 rw backing auto 029 rw auto quick -030 rw auto +030 rw auto backing 031 rw auto quick 032 rw auto 033 rw auto @@ -46,3 +46,4 @@ 037 rw auto backing 038 rw auto backing 039 rw auto +040 rw auto diff --git a/tests/qemu-iotests/iotests.py b/tests/qemu-iotests/iotests.py index e05b1d640b..3c60b2d163 100644 --- a/tests/qemu-iotests/iotests.py +++ b/tests/qemu-iotests/iotests.py @@ -19,6 +19,7 @@ import os import re import subprocess +import string import unittest import sys; sys.path.append(os.path.join(os.path.dirname(__file__), '..', '..', 'QMP')) import qmp @@ -96,9 +97,14 @@ class VM(object): os.remove(self._qemu_log_path) self._popen = None + underscore_to_dash = string.maketrans('_', '-') def qmp(self, cmd, **args): '''Invoke a QMP command and return the result dict''' - return self._qmp.cmd(cmd, args=args) + qmp_args = dict() + for k in args.keys(): + qmp_args[k.translate(self.underscore_to_dash)] = args[k] + + return self._qmp.cmd(cmd, args=qmp_args) def get_qmp_events(self, wait=False): '''Poll for queued QMP events and return a list of dicts''' @@ -132,6 +138,13 @@ class QMPTestCase(unittest.TestCase): self.fail('invalid index "%s" in path "%s" in "%s"' % (idx, path, str(d))) return d + def assert_qmp_absent(self, d, path): + try: + result = self.dictpath(d, path) + except AssertionError: + return + self.fail('path "%s" has value "%s"' % (path, str(result))) + def assert_qmp(self, d, path, value): '''Assert that the value for a specific path in a QMP dict matches''' result = self.dictpath(d, path) diff --git a/trace-events b/trace-events index f5b5097552..42b66f19f4 100644 --- a/trace-events +++ b/trace-events @@ -74,10 +74,14 @@ bdrv_co_do_copy_on_readv(void *bs, int64_t sector_num, int nb_sectors, int64_t c # block/stream.c stream_one_iteration(void *s, int64_t sector_num, int nb_sectors, int is_allocated) "s %p sector_num %"PRId64" nb_sectors %d is_allocated %d" stream_start(void *bs, void *base, void *s, void *co, void *opaque) "bs %p base %p s %p co %p opaque %p" +commit_one_iteration(void *s, int64_t sector_num, int nb_sectors, int is_allocated) "s %p sector_num %"PRId64" nb_sectors %d is_allocated %d" +commit_start(void *bs, void *base, void *top, void *s, void *co, void *opaque) "bs %p base %p top %p s %p co %p opaque %p" # blockdev.c qmp_block_job_cancel(void *job) "job %p" -block_stream_cb(void *bs, void *job, int ret) "bs %p job %p ret %d" +qmp_block_job_pause(void *job) "job %p" +qmp_block_job_resume(void *job) "job %p" +block_job_cb(void *bs, void *job, int ret) "bs %p job %p ret %d" qmp_block_stream(void *bs, void *job) "bs %p job %p" # hw/virtio-blk.c @@ -0,0 +1,2249 @@ +/** + * uri.c: set of generic URI related routines + * + * Reference: RFCs 3986, 2732 and 2373 + * + * Copyright (C) 1998-2003 Daniel Veillard. All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * DANIEL VEILLARD BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER + * IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + * Except as contained in this notice, the name of Daniel Veillard shall not + * be used in advertising or otherwise to promote the sale, use or other + * dealings in this Software without prior written authorization from him. + * + * daniel@veillard.com + * + ** + * + * Copyright (C) 2007, 2009-2010 Red Hat, Inc. + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + * + * Authors: + * Richard W.M. Jones <rjones@redhat.com> + * + */ + +#include <glib.h> +#include <string.h> +#include <stdio.h> + +#include "uri.h" + +static void uri_clean(URI *uri); + +/* + * Old rule from 2396 used in legacy handling code + * alpha = lowalpha | upalpha + */ +#define IS_ALPHA(x) (IS_LOWALPHA(x) || IS_UPALPHA(x)) + + +/* + * lowalpha = "a" | "b" | "c" | "d" | "e" | "f" | "g" | "h" | "i" | "j" | + * "k" | "l" | "m" | "n" | "o" | "p" | "q" | "r" | "s" | "t" | + * "u" | "v" | "w" | "x" | "y" | "z" + */ + +#define IS_LOWALPHA(x) (((x) >= 'a') && ((x) <= 'z')) + +/* + * upalpha = "A" | "B" | "C" | "D" | "E" | "F" | "G" | "H" | "I" | "J" | + * "K" | "L" | "M" | "N" | "O" | "P" | "Q" | "R" | "S" | "T" | + * "U" | "V" | "W" | "X" | "Y" | "Z" + */ +#define IS_UPALPHA(x) (((x) >= 'A') && ((x) <= 'Z')) + +#ifdef IS_DIGIT +#undef IS_DIGIT +#endif +/* + * digit = "0" | "1" | "2" | "3" | "4" | "5" | "6" | "7" | "8" | "9" + */ +#define IS_DIGIT(x) (((x) >= '0') && ((x) <= '9')) + +/* + * alphanum = alpha | digit + */ + +#define IS_ALPHANUM(x) (IS_ALPHA(x) || IS_DIGIT(x)) + +/* + * mark = "-" | "_" | "." | "!" | "~" | "*" | "'" | "(" | ")" + */ + +#define IS_MARK(x) (((x) == '-') || ((x) == '_') || ((x) == '.') || \ + ((x) == '!') || ((x) == '~') || ((x) == '*') || ((x) == '\'') || \ + ((x) == '(') || ((x) == ')')) + +/* + * unwise = "{" | "}" | "|" | "\" | "^" | "`" + */ + +#define IS_UNWISE(p) \ + (((*(p) == '{')) || ((*(p) == '}')) || ((*(p) == '|')) || \ + ((*(p) == '\\')) || ((*(p) == '^')) || ((*(p) == '[')) || \ + ((*(p) == ']')) || ((*(p) == '`'))) +/* + * reserved = ";" | "/" | "?" | ":" | "@" | "&" | "=" | "+" | "$" | "," | + * "[" | "]" + */ + +#define IS_RESERVED(x) (((x) == ';') || ((x) == '/') || ((x) == '?') || \ + ((x) == ':') || ((x) == '@') || ((x) == '&') || ((x) == '=') || \ + ((x) == '+') || ((x) == '$') || ((x) == ',') || ((x) == '[') || \ + ((x) == ']')) + +/* + * unreserved = alphanum | mark + */ + +#define IS_UNRESERVED(x) (IS_ALPHANUM(x) || IS_MARK(x)) + +/* + * Skip to next pointer char, handle escaped sequences + */ + +#define NEXT(p) ((*p == '%')? p += 3 : p++) + +/* + * Productions from the spec. + * + * authority = server | reg_name + * reg_name = 1*( unreserved | escaped | "$" | "," | + * ";" | ":" | "@" | "&" | "=" | "+" ) + * + * path = [ abs_path | opaque_part ] + */ + + +/************************************************************************ + * * + * RFC 3986 parser * + * * + ************************************************************************/ + +#define ISA_DIGIT(p) ((*(p) >= '0') && (*(p) <= '9')) +#define ISA_ALPHA(p) (((*(p) >= 'a') && (*(p) <= 'z')) || \ + ((*(p) >= 'A') && (*(p) <= 'Z'))) +#define ISA_HEXDIG(p) \ + (ISA_DIGIT(p) || ((*(p) >= 'a') && (*(p) <= 'f')) || \ + ((*(p) >= 'A') && (*(p) <= 'F'))) + +/* + * sub-delims = "!" / "$" / "&" / "'" / "(" / ")" + * / "*" / "+" / "," / ";" / "=" + */ +#define ISA_SUB_DELIM(p) \ + (((*(p) == '!')) || ((*(p) == '$')) || ((*(p) == '&')) || \ + ((*(p) == '(')) || ((*(p) == ')')) || ((*(p) == '*')) || \ + ((*(p) == '+')) || ((*(p) == ',')) || ((*(p) == ';')) || \ + ((*(p) == '=')) || ((*(p) == '\''))) + +/* + * gen-delims = ":" / "/" / "?" / "#" / "[" / "]" / "@" + */ +#define ISA_GEN_DELIM(p) \ + (((*(p) == ':')) || ((*(p) == '/')) || ((*(p) == '?')) || \ + ((*(p) == '#')) || ((*(p) == '[')) || ((*(p) == ']')) || \ + ((*(p) == '@'))) + +/* + * reserved = gen-delims / sub-delims + */ +#define ISA_RESERVED(p) (ISA_GEN_DELIM(p) || (ISA_SUB_DELIM(p))) + +/* + * unreserved = ALPHA / DIGIT / "-" / "." / "_" / "~" + */ +#define ISA_UNRESERVED(p) \ + ((ISA_ALPHA(p)) || (ISA_DIGIT(p)) || ((*(p) == '-')) || \ + ((*(p) == '.')) || ((*(p) == '_')) || ((*(p) == '~'))) + +/* + * pct-encoded = "%" HEXDIG HEXDIG + */ +#define ISA_PCT_ENCODED(p) \ + ((*(p) == '%') && (ISA_HEXDIG(p + 1)) && (ISA_HEXDIG(p + 2))) + +/* + * pchar = unreserved / pct-encoded / sub-delims / ":" / "@" + */ +#define ISA_PCHAR(p) \ + (ISA_UNRESERVED(p) || ISA_PCT_ENCODED(p) || ISA_SUB_DELIM(p) || \ + ((*(p) == ':')) || ((*(p) == '@'))) + +/** + * rfc3986_parse_scheme: + * @uri: pointer to an URI structure + * @str: pointer to the string to analyze + * + * Parse an URI scheme + * + * ALPHA *( ALPHA / DIGIT / "+" / "-" / "." ) + * + * Returns 0 or the error code + */ +static int +rfc3986_parse_scheme(URI *uri, const char **str) { + const char *cur; + + if (str == NULL) + return(-1); + + cur = *str; + if (!ISA_ALPHA(cur)) + return(2); + cur++; + while (ISA_ALPHA(cur) || ISA_DIGIT(cur) || + (*cur == '+') || (*cur == '-') || (*cur == '.')) cur++; + if (uri != NULL) { + if (uri->scheme != NULL) g_free(uri->scheme); + uri->scheme = g_strndup(*str, cur - *str); + } + *str = cur; + return(0); +} + +/** + * rfc3986_parse_fragment: + * @uri: pointer to an URI structure + * @str: pointer to the string to analyze + * + * Parse the query part of an URI + * + * fragment = *( pchar / "/" / "?" ) + * NOTE: the strict syntax as defined by 3986 does not allow '[' and ']' + * in the fragment identifier but this is used very broadly for + * xpointer scheme selection, so we are allowing it here to not break + * for example all the DocBook processing chains. + * + * Returns 0 or the error code + */ +static int +rfc3986_parse_fragment(URI *uri, const char **str) +{ + const char *cur; + + if (str == NULL) + return (-1); + + cur = *str; + + while ((ISA_PCHAR(cur)) || (*cur == '/') || (*cur == '?') || + (*cur == '[') || (*cur == ']') || + ((uri != NULL) && (uri->cleanup & 1) && (IS_UNWISE(cur)))) + NEXT(cur); + if (uri != NULL) { + if (uri->fragment != NULL) + g_free(uri->fragment); + if (uri->cleanup & 2) + uri->fragment = g_strndup(*str, cur - *str); + else + uri->fragment = uri_string_unescape(*str, cur - *str, NULL); + } + *str = cur; + return (0); +} + +/** + * rfc3986_parse_query: + * @uri: pointer to an URI structure + * @str: pointer to the string to analyze + * + * Parse the query part of an URI + * + * query = *uric + * + * Returns 0 or the error code + */ +static int +rfc3986_parse_query(URI *uri, const char **str) +{ + const char *cur; + + if (str == NULL) + return (-1); + + cur = *str; + + while ((ISA_PCHAR(cur)) || (*cur == '/') || (*cur == '?') || + ((uri != NULL) && (uri->cleanup & 1) && (IS_UNWISE(cur)))) + NEXT(cur); + if (uri != NULL) { + if (uri->query != NULL) + g_free (uri->query); + uri->query = g_strndup (*str, cur - *str); + } + *str = cur; + return (0); +} + +/** + * rfc3986_parse_port: + * @uri: pointer to an URI structure + * @str: the string to analyze + * + * Parse a port part and fills in the appropriate fields + * of the @uri structure + * + * port = *DIGIT + * + * Returns 0 or the error code + */ +static int +rfc3986_parse_port(URI *uri, const char **str) +{ + const char *cur = *str; + + if (ISA_DIGIT(cur)) { + if (uri != NULL) + uri->port = 0; + while (ISA_DIGIT(cur)) { + if (uri != NULL) + uri->port = uri->port * 10 + (*cur - '0'); + cur++; + } + *str = cur; + return(0); + } + return(1); +} + +/** + * rfc3986_parse_user_info: + * @uri: pointer to an URI structure + * @str: the string to analyze + * + * Parse an user informations part and fills in the appropriate fields + * of the @uri structure + * + * userinfo = *( unreserved / pct-encoded / sub-delims / ":" ) + * + * Returns 0 or the error code + */ +static int +rfc3986_parse_user_info(URI *uri, const char **str) +{ + const char *cur; + + cur = *str; + while (ISA_UNRESERVED(cur) || ISA_PCT_ENCODED(cur) || + ISA_SUB_DELIM(cur) || (*cur == ':')) + NEXT(cur); + if (*cur == '@') { + if (uri != NULL) { + if (uri->user != NULL) g_free(uri->user); + if (uri->cleanup & 2) + uri->user = g_strndup(*str, cur - *str); + else + uri->user = uri_string_unescape(*str, cur - *str, NULL); + } + *str = cur; + return(0); + } + return(1); +} + +/** + * rfc3986_parse_dec_octet: + * @str: the string to analyze + * + * dec-octet = DIGIT ; 0-9 + * / %x31-39 DIGIT ; 10-99 + * / "1" 2DIGIT ; 100-199 + * / "2" %x30-34 DIGIT ; 200-249 + * / "25" %x30-35 ; 250-255 + * + * Skip a dec-octet. + * + * Returns 0 if found and skipped, 1 otherwise + */ +static int +rfc3986_parse_dec_octet(const char **str) { + const char *cur = *str; + + if (!(ISA_DIGIT(cur))) + return(1); + if (!ISA_DIGIT(cur+1)) + cur++; + else if ((*cur != '0') && (ISA_DIGIT(cur + 1)) && (!ISA_DIGIT(cur+2))) + cur += 2; + else if ((*cur == '1') && (ISA_DIGIT(cur + 1)) && (ISA_DIGIT(cur + 2))) + cur += 3; + else if ((*cur == '2') && (*(cur + 1) >= '0') && + (*(cur + 1) <= '4') && (ISA_DIGIT(cur + 2))) + cur += 3; + else if ((*cur == '2') && (*(cur + 1) == '5') && + (*(cur + 2) >= '0') && (*(cur + 1) <= '5')) + cur += 3; + else + return(1); + *str = cur; + return(0); +} +/** + * rfc3986_parse_host: + * @uri: pointer to an URI structure + * @str: the string to analyze + * + * Parse an host part and fills in the appropriate fields + * of the @uri structure + * + * host = IP-literal / IPv4address / reg-name + * IP-literal = "[" ( IPv6address / IPvFuture ) "]" + * IPv4address = dec-octet "." dec-octet "." dec-octet "." dec-octet + * reg-name = *( unreserved / pct-encoded / sub-delims ) + * + * Returns 0 or the error code + */ +static int +rfc3986_parse_host(URI *uri, const char **str) +{ + const char *cur = *str; + const char *host; + + host = cur; + /* + * IPv6 and future adressing scheme are enclosed between brackets + */ + if (*cur == '[') { + cur++; + while ((*cur != ']') && (*cur != 0)) + cur++; + if (*cur != ']') + return(1); + cur++; + goto found; + } + /* + * try to parse an IPv4 + */ + if (ISA_DIGIT(cur)) { + if (rfc3986_parse_dec_octet(&cur) != 0) + goto not_ipv4; + if (*cur != '.') + goto not_ipv4; + cur++; + if (rfc3986_parse_dec_octet(&cur) != 0) + goto not_ipv4; + if (*cur != '.') + goto not_ipv4; + if (rfc3986_parse_dec_octet(&cur) != 0) + goto not_ipv4; + if (*cur != '.') + goto not_ipv4; + if (rfc3986_parse_dec_octet(&cur) != 0) + goto not_ipv4; + goto found; +not_ipv4: + cur = *str; + } + /* + * then this should be a hostname which can be empty + */ + while (ISA_UNRESERVED(cur) || ISA_PCT_ENCODED(cur) || ISA_SUB_DELIM(cur)) + NEXT(cur); +found: + if (uri != NULL) { + if (uri->authority != NULL) g_free(uri->authority); + uri->authority = NULL; + if (uri->server != NULL) g_free(uri->server); + if (cur != host) { + if (uri->cleanup & 2) + uri->server = g_strndup(host, cur - host); + else + uri->server = uri_string_unescape(host, cur - host, NULL); + } else + uri->server = NULL; + } + *str = cur; + return(0); +} + +/** + * rfc3986_parse_authority: + * @uri: pointer to an URI structure + * @str: the string to analyze + * + * Parse an authority part and fills in the appropriate fields + * of the @uri structure + * + * authority = [ userinfo "@" ] host [ ":" port ] + * + * Returns 0 or the error code + */ +static int +rfc3986_parse_authority(URI *uri, const char **str) +{ + const char *cur; + int ret; + + cur = *str; + /* + * try to parse an userinfo and check for the trailing @ + */ + ret = rfc3986_parse_user_info(uri, &cur); + if ((ret != 0) || (*cur != '@')) + cur = *str; + else + cur++; + ret = rfc3986_parse_host(uri, &cur); + if (ret != 0) return(ret); + if (*cur == ':') { + cur++; + ret = rfc3986_parse_port(uri, &cur); + if (ret != 0) return(ret); + } + *str = cur; + return(0); +} + +/** + * rfc3986_parse_segment: + * @str: the string to analyze + * @forbid: an optional forbidden character + * @empty: allow an empty segment + * + * Parse a segment and fills in the appropriate fields + * of the @uri structure + * + * segment = *pchar + * segment-nz = 1*pchar + * segment-nz-nc = 1*( unreserved / pct-encoded / sub-delims / "@" ) + * ; non-zero-length segment without any colon ":" + * + * Returns 0 or the error code + */ +static int +rfc3986_parse_segment(const char **str, char forbid, int empty) +{ + const char *cur; + + cur = *str; + if (!ISA_PCHAR(cur)) { + if (empty) + return(0); + return(1); + } + while (ISA_PCHAR(cur) && (*cur != forbid)) + NEXT(cur); + *str = cur; + return (0); +} + +/** + * rfc3986_parse_path_ab_empty: + * @uri: pointer to an URI structure + * @str: the string to analyze + * + * Parse an path absolute or empty and fills in the appropriate fields + * of the @uri structure + * + * path-abempty = *( "/" segment ) + * + * Returns 0 or the error code + */ +static int +rfc3986_parse_path_ab_empty(URI *uri, const char **str) +{ + const char *cur; + int ret; + + cur = *str; + + while (*cur == '/') { + cur++; + ret = rfc3986_parse_segment(&cur, 0, 1); + if (ret != 0) return(ret); + } + if (uri != NULL) { + if (uri->path != NULL) g_free(uri->path); + if (*str != cur) { + if (uri->cleanup & 2) + uri->path = g_strndup(*str, cur - *str); + else + uri->path = uri_string_unescape(*str, cur - *str, NULL); + } else { + uri->path = NULL; + } + } + *str = cur; + return (0); +} + +/** + * rfc3986_parse_path_absolute: + * @uri: pointer to an URI structure + * @str: the string to analyze + * + * Parse an path absolute and fills in the appropriate fields + * of the @uri structure + * + * path-absolute = "/" [ segment-nz *( "/" segment ) ] + * + * Returns 0 or the error code + */ +static int +rfc3986_parse_path_absolute(URI *uri, const char **str) +{ + const char *cur; + int ret; + + cur = *str; + + if (*cur != '/') + return(1); + cur++; + ret = rfc3986_parse_segment(&cur, 0, 0); + if (ret == 0) { + while (*cur == '/') { + cur++; + ret = rfc3986_parse_segment(&cur, 0, 1); + if (ret != 0) return(ret); + } + } + if (uri != NULL) { + if (uri->path != NULL) g_free(uri->path); + if (cur != *str) { + if (uri->cleanup & 2) + uri->path = g_strndup(*str, cur - *str); + else + uri->path = uri_string_unescape(*str, cur - *str, NULL); + } else { + uri->path = NULL; + } + } + *str = cur; + return (0); +} + +/** + * rfc3986_parse_path_rootless: + * @uri: pointer to an URI structure + * @str: the string to analyze + * + * Parse an path without root and fills in the appropriate fields + * of the @uri structure + * + * path-rootless = segment-nz *( "/" segment ) + * + * Returns 0 or the error code + */ +static int +rfc3986_parse_path_rootless(URI *uri, const char **str) +{ + const char *cur; + int ret; + + cur = *str; + + ret = rfc3986_parse_segment(&cur, 0, 0); + if (ret != 0) return(ret); + while (*cur == '/') { + cur++; + ret = rfc3986_parse_segment(&cur, 0, 1); + if (ret != 0) return(ret); + } + if (uri != NULL) { + if (uri->path != NULL) g_free(uri->path); + if (cur != *str) { + if (uri->cleanup & 2) + uri->path = g_strndup(*str, cur - *str); + else + uri->path = uri_string_unescape(*str, cur - *str, NULL); + } else { + uri->path = NULL; + } + } + *str = cur; + return (0); +} + +/** + * rfc3986_parse_path_no_scheme: + * @uri: pointer to an URI structure + * @str: the string to analyze + * + * Parse an path which is not a scheme and fills in the appropriate fields + * of the @uri structure + * + * path-noscheme = segment-nz-nc *( "/" segment ) + * + * Returns 0 or the error code + */ +static int +rfc3986_parse_path_no_scheme(URI *uri, const char **str) +{ + const char *cur; + int ret; + + cur = *str; + + ret = rfc3986_parse_segment(&cur, ':', 0); + if (ret != 0) return(ret); + while (*cur == '/') { + cur++; + ret = rfc3986_parse_segment(&cur, 0, 1); + if (ret != 0) return(ret); + } + if (uri != NULL) { + if (uri->path != NULL) g_free(uri->path); + if (cur != *str) { + if (uri->cleanup & 2) + uri->path = g_strndup(*str, cur - *str); + else + uri->path = uri_string_unescape(*str, cur - *str, NULL); + } else { + uri->path = NULL; + } + } + *str = cur; + return (0); +} + +/** + * rfc3986_parse_hier_part: + * @uri: pointer to an URI structure + * @str: the string to analyze + * + * Parse an hierarchical part and fills in the appropriate fields + * of the @uri structure + * + * hier-part = "//" authority path-abempty + * / path-absolute + * / path-rootless + * / path-empty + * + * Returns 0 or the error code + */ +static int +rfc3986_parse_hier_part(URI *uri, const char **str) +{ + const char *cur; + int ret; + + cur = *str; + + if ((*cur == '/') && (*(cur + 1) == '/')) { + cur += 2; + ret = rfc3986_parse_authority(uri, &cur); + if (ret != 0) return(ret); + ret = rfc3986_parse_path_ab_empty(uri, &cur); + if (ret != 0) return(ret); + *str = cur; + return(0); + } else if (*cur == '/') { + ret = rfc3986_parse_path_absolute(uri, &cur); + if (ret != 0) return(ret); + } else if (ISA_PCHAR(cur)) { + ret = rfc3986_parse_path_rootless(uri, &cur); + if (ret != 0) return(ret); + } else { + /* path-empty is effectively empty */ + if (uri != NULL) { + if (uri->path != NULL) g_free(uri->path); + uri->path = NULL; + } + } + *str = cur; + return (0); +} + +/** + * rfc3986_parse_relative_ref: + * @uri: pointer to an URI structure + * @str: the string to analyze + * + * Parse an URI string and fills in the appropriate fields + * of the @uri structure + * + * relative-ref = relative-part [ "?" query ] [ "#" fragment ] + * relative-part = "//" authority path-abempty + * / path-absolute + * / path-noscheme + * / path-empty + * + * Returns 0 or the error code + */ +static int +rfc3986_parse_relative_ref(URI *uri, const char *str) { + int ret; + + if ((*str == '/') && (*(str + 1) == '/')) { + str += 2; + ret = rfc3986_parse_authority(uri, &str); + if (ret != 0) return(ret); + ret = rfc3986_parse_path_ab_empty(uri, &str); + if (ret != 0) return(ret); + } else if (*str == '/') { + ret = rfc3986_parse_path_absolute(uri, &str); + if (ret != 0) return(ret); + } else if (ISA_PCHAR(str)) { + ret = rfc3986_parse_path_no_scheme(uri, &str); + if (ret != 0) return(ret); + } else { + /* path-empty is effectively empty */ + if (uri != NULL) { + if (uri->path != NULL) g_free(uri->path); + uri->path = NULL; + } + } + + if (*str == '?') { + str++; + ret = rfc3986_parse_query(uri, &str); + if (ret != 0) return(ret); + } + if (*str == '#') { + str++; + ret = rfc3986_parse_fragment(uri, &str); + if (ret != 0) return(ret); + } + if (*str != 0) { + uri_clean(uri); + return(1); + } + return(0); +} + + +/** + * rfc3986_parse: + * @uri: pointer to an URI structure + * @str: the string to analyze + * + * Parse an URI string and fills in the appropriate fields + * of the @uri structure + * + * scheme ":" hier-part [ "?" query ] [ "#" fragment ] + * + * Returns 0 or the error code + */ +static int +rfc3986_parse(URI *uri, const char *str) { + int ret; + + ret = rfc3986_parse_scheme(uri, &str); + if (ret != 0) return(ret); + if (*str != ':') { + return(1); + } + str++; + ret = rfc3986_parse_hier_part(uri, &str); + if (ret != 0) return(ret); + if (*str == '?') { + str++; + ret = rfc3986_parse_query(uri, &str); + if (ret != 0) return(ret); + } + if (*str == '#') { + str++; + ret = rfc3986_parse_fragment(uri, &str); + if (ret != 0) return(ret); + } + if (*str != 0) { + uri_clean(uri); + return(1); + } + return(0); +} + +/** + * rfc3986_parse_uri_reference: + * @uri: pointer to an URI structure + * @str: the string to analyze + * + * Parse an URI reference string and fills in the appropriate fields + * of the @uri structure + * + * URI-reference = URI / relative-ref + * + * Returns 0 or the error code + */ +static int +rfc3986_parse_uri_reference(URI *uri, const char *str) { + int ret; + + if (str == NULL) + return(-1); + uri_clean(uri); + + /* + * Try first to parse absolute refs, then fallback to relative if + * it fails. + */ + ret = rfc3986_parse(uri, str); + if (ret != 0) { + uri_clean(uri); + ret = rfc3986_parse_relative_ref(uri, str); + if (ret != 0) { + uri_clean(uri); + return(ret); + } + } + return(0); +} + +/** + * uri_parse: + * @str: the URI string to analyze + * + * Parse an URI based on RFC 3986 + * + * URI-reference = [ absoluteURI | relativeURI ] [ "#" fragment ] + * + * Returns a newly built URI or NULL in case of error + */ +URI * +uri_parse(const char *str) { + URI *uri; + int ret; + + if (str == NULL) + return(NULL); + uri = uri_new(); + if (uri != NULL) { + ret = rfc3986_parse_uri_reference(uri, str); + if (ret) { + uri_free(uri); + return(NULL); + } + } + return(uri); +} + +/** + * uri_parse_into: + * @uri: pointer to an URI structure + * @str: the string to analyze + * + * Parse an URI reference string based on RFC 3986 and fills in the + * appropriate fields of the @uri structure + * + * URI-reference = URI / relative-ref + * + * Returns 0 or the error code + */ +int +uri_parse_into(URI *uri, const char *str) { + return(rfc3986_parse_uri_reference(uri, str)); +} + +/** + * uri_parse_raw: + * @str: the URI string to analyze + * @raw: if 1 unescaping of URI pieces are disabled + * + * Parse an URI but allows to keep intact the original fragments. + * + * URI-reference = URI / relative-ref + * + * Returns a newly built URI or NULL in case of error + */ +URI * +uri_parse_raw(const char *str, int raw) { + URI *uri; + int ret; + + if (str == NULL) + return(NULL); + uri = uri_new(); + if (uri != NULL) { + if (raw) { + uri->cleanup |= 2; + } + ret = uri_parse_into(uri, str); + if (ret) { + uri_free(uri); + return(NULL); + } + } + return(uri); +} + +/************************************************************************ + * * + * Generic URI structure functions * + * * + ************************************************************************/ + +/** + * uri_new: + * + * Simply creates an empty URI + * + * Returns the new structure or NULL in case of error + */ +URI * +uri_new(void) { + URI *ret; + + ret = (URI *) g_malloc(sizeof(URI)); + memset(ret, 0, sizeof(URI)); + return(ret); +} + +/** + * realloc2n: + * + * Function to handle properly a reallocation when saving an URI + * Also imposes some limit on the length of an URI string output + */ +static char * +realloc2n(char *ret, int *max) { + char *temp; + int tmp; + + tmp = *max * 2; + temp = g_realloc(ret, (tmp + 1)); + *max = tmp; + return(temp); +} + +/** + * uri_to_string: + * @uri: pointer to an URI + * + * Save the URI as an escaped string + * + * Returns a new string (to be deallocated by caller) + */ +char * +uri_to_string(URI *uri) { + char *ret = NULL; + char *temp; + const char *p; + int len; + int max; + + if (uri == NULL) return(NULL); + + + max = 80; + ret = g_malloc(max + 1); + len = 0; + + if (uri->scheme != NULL) { + p = uri->scheme; + while (*p != 0) { + if (len >= max) { + temp = realloc2n(ret, &max); + if (temp == NULL) goto mem_error; + ret = temp; + } + ret[len++] = *p++; + } + if (len >= max) { + temp = realloc2n(ret, &max); + if (temp == NULL) goto mem_error; + ret = temp; + } + ret[len++] = ':'; + } + if (uri->opaque != NULL) { + p = uri->opaque; + while (*p != 0) { + if (len + 3 >= max) { + temp = realloc2n(ret, &max); + if (temp == NULL) goto mem_error; + ret = temp; + } + if (IS_RESERVED(*(p)) || IS_UNRESERVED(*(p))) + ret[len++] = *p++; + else { + int val = *(unsigned char *)p++; + int hi = val / 0x10, lo = val % 0x10; + ret[len++] = '%'; + ret[len++] = hi + (hi > 9? 'A'-10 : '0'); + ret[len++] = lo + (lo > 9? 'A'-10 : '0'); + } + } + } else { + if (uri->server != NULL) { + if (len + 3 >= max) { + temp = realloc2n(ret, &max); + if (temp == NULL) goto mem_error; + ret = temp; + } + ret[len++] = '/'; + ret[len++] = '/'; + if (uri->user != NULL) { + p = uri->user; + while (*p != 0) { + if (len + 3 >= max) { + temp = realloc2n(ret, &max); + if (temp == NULL) goto mem_error; + ret = temp; + } + if ((IS_UNRESERVED(*(p))) || + ((*(p) == ';')) || ((*(p) == ':')) || + ((*(p) == '&')) || ((*(p) == '=')) || + ((*(p) == '+')) || ((*(p) == '$')) || + ((*(p) == ','))) + ret[len++] = *p++; + else { + int val = *(unsigned char *)p++; + int hi = val / 0x10, lo = val % 0x10; + ret[len++] = '%'; + ret[len++] = hi + (hi > 9? 'A'-10 : '0'); + ret[len++] = lo + (lo > 9? 'A'-10 : '0'); + } + } + if (len + 3 >= max) { + temp = realloc2n(ret, &max); + if (temp == NULL) goto mem_error; + ret = temp; + } + ret[len++] = '@'; + } + p = uri->server; + while (*p != 0) { + if (len >= max) { + temp = realloc2n(ret, &max); + if (temp == NULL) goto mem_error; + ret = temp; + } + ret[len++] = *p++; + } + if (uri->port > 0) { + if (len + 10 >= max) { + temp = realloc2n(ret, &max); + if (temp == NULL) goto mem_error; + ret = temp; + } + len += snprintf(&ret[len], max - len, ":%d", uri->port); + } + } else if (uri->authority != NULL) { + if (len + 3 >= max) { + temp = realloc2n(ret, &max); + if (temp == NULL) goto mem_error; + ret = temp; + } + ret[len++] = '/'; + ret[len++] = '/'; + p = uri->authority; + while (*p != 0) { + if (len + 3 >= max) { + temp = realloc2n(ret, &max); + if (temp == NULL) goto mem_error; + ret = temp; + } + if ((IS_UNRESERVED(*(p))) || + ((*(p) == '$')) || ((*(p) == ',')) || ((*(p) == ';')) || + ((*(p) == ':')) || ((*(p) == '@')) || ((*(p) == '&')) || + ((*(p) == '=')) || ((*(p) == '+'))) + ret[len++] = *p++; + else { + int val = *(unsigned char *)p++; + int hi = val / 0x10, lo = val % 0x10; + ret[len++] = '%'; + ret[len++] = hi + (hi > 9? 'A'-10 : '0'); + ret[len++] = lo + (lo > 9? 'A'-10 : '0'); + } + } + } else if (uri->scheme != NULL) { + if (len + 3 >= max) { + temp = realloc2n(ret, &max); + if (temp == NULL) goto mem_error; + ret = temp; + } + ret[len++] = '/'; + ret[len++] = '/'; + } + if (uri->path != NULL) { + p = uri->path; + /* + * the colon in file:///d: should not be escaped or + * Windows accesses fail later. + */ + if ((uri->scheme != NULL) && + (p[0] == '/') && + (((p[1] >= 'a') && (p[1] <= 'z')) || + ((p[1] >= 'A') && (p[1] <= 'Z'))) && + (p[2] == ':') && + (!strcmp(uri->scheme, "file"))) { + if (len + 3 >= max) { + temp = realloc2n(ret, &max); + if (temp == NULL) goto mem_error; + ret = temp; + } + ret[len++] = *p++; + ret[len++] = *p++; + ret[len++] = *p++; + } + while (*p != 0) { + if (len + 3 >= max) { + temp = realloc2n(ret, &max); + if (temp == NULL) goto mem_error; + ret = temp; + } + if ((IS_UNRESERVED(*(p))) || ((*(p) == '/')) || + ((*(p) == ';')) || ((*(p) == '@')) || ((*(p) == '&')) || + ((*(p) == '=')) || ((*(p) == '+')) || ((*(p) == '$')) || + ((*(p) == ','))) + ret[len++] = *p++; + else { + int val = *(unsigned char *)p++; + int hi = val / 0x10, lo = val % 0x10; + ret[len++] = '%'; + ret[len++] = hi + (hi > 9? 'A'-10 : '0'); + ret[len++] = lo + (lo > 9? 'A'-10 : '0'); + } + } + } + if (uri->query != NULL) { + if (len + 1 >= max) { + temp = realloc2n(ret, &max); + if (temp == NULL) goto mem_error; + ret = temp; + } + ret[len++] = '?'; + p = uri->query; + while (*p != 0) { + if (len + 1 >= max) { + temp = realloc2n(ret, &max); + if (temp == NULL) goto mem_error; + ret = temp; + } + ret[len++] = *p++; + } + } + } + if (uri->fragment != NULL) { + if (len + 3 >= max) { + temp = realloc2n(ret, &max); + if (temp == NULL) goto mem_error; + ret = temp; + } + ret[len++] = '#'; + p = uri->fragment; + while (*p != 0) { + if (len + 3 >= max) { + temp = realloc2n(ret, &max); + if (temp == NULL) goto mem_error; + ret = temp; + } + if ((IS_UNRESERVED(*(p))) || (IS_RESERVED(*(p)))) + ret[len++] = *p++; + else { + int val = *(unsigned char *)p++; + int hi = val / 0x10, lo = val % 0x10; + ret[len++] = '%'; + ret[len++] = hi + (hi > 9? 'A'-10 : '0'); + ret[len++] = lo + (lo > 9? 'A'-10 : '0'); + } + } + } + if (len >= max) { + temp = realloc2n(ret, &max); + if (temp == NULL) goto mem_error; + ret = temp; + } + ret[len] = 0; + return(ret); + +mem_error: + g_free(ret); + return(NULL); +} + +/** + * uri_clean: + * @uri: pointer to an URI + * + * Make sure the URI struct is free of content + */ +static void +uri_clean(URI *uri) { + if (uri == NULL) return; + + if (uri->scheme != NULL) g_free(uri->scheme); + uri->scheme = NULL; + if (uri->server != NULL) g_free(uri->server); + uri->server = NULL; + if (uri->user != NULL) g_free(uri->user); + uri->user = NULL; + if (uri->path != NULL) g_free(uri->path); + uri->path = NULL; + if (uri->fragment != NULL) g_free(uri->fragment); + uri->fragment = NULL; + if (uri->opaque != NULL) g_free(uri->opaque); + uri->opaque = NULL; + if (uri->authority != NULL) g_free(uri->authority); + uri->authority = NULL; + if (uri->query != NULL) g_free(uri->query); + uri->query = NULL; +} + +/** + * uri_free: + * @uri: pointer to an URI + * + * Free up the URI struct + */ +void +uri_free(URI *uri) { + uri_clean(uri); + g_free(uri); +} + +/************************************************************************ + * * + * Helper functions * + * * + ************************************************************************/ + +/** + * normalize_uri_path: + * @path: pointer to the path string + * + * Applies the 5 normalization steps to a path string--that is, RFC 2396 + * Section 5.2, steps 6.c through 6.g. + * + * Normalization occurs directly on the string, no new allocation is done + * + * Returns 0 or an error code + */ +static int +normalize_uri_path(char *path) { + char *cur, *out; + + if (path == NULL) + return(-1); + + /* Skip all initial "/" chars. We want to get to the beginning of the + * first non-empty segment. + */ + cur = path; + while (cur[0] == '/') + ++cur; + if (cur[0] == '\0') + return(0); + + /* Keep everything we've seen so far. */ + out = cur; + + /* + * Analyze each segment in sequence for cases (c) and (d). + */ + while (cur[0] != '\0') { + /* + * c) All occurrences of "./", where "." is a complete path segment, + * are removed from the buffer string. + */ + if ((cur[0] == '.') && (cur[1] == '/')) { + cur += 2; + /* '//' normalization should be done at this point too */ + while (cur[0] == '/') + cur++; + continue; + } + + /* + * d) If the buffer string ends with "." as a complete path segment, + * that "." is removed. + */ + if ((cur[0] == '.') && (cur[1] == '\0')) + break; + + /* Otherwise keep the segment. */ + while (cur[0] != '/') { + if (cur[0] == '\0') + goto done_cd; + (out++)[0] = (cur++)[0]; + } + /* nomalize // */ + while ((cur[0] == '/') && (cur[1] == '/')) + cur++; + + (out++)[0] = (cur++)[0]; + } + done_cd: + out[0] = '\0'; + + /* Reset to the beginning of the first segment for the next sequence. */ + cur = path; + while (cur[0] == '/') + ++cur; + if (cur[0] == '\0') + return(0); + + /* + * Analyze each segment in sequence for cases (e) and (f). + * + * e) All occurrences of "<segment>/../", where <segment> is a + * complete path segment not equal to "..", are removed from the + * buffer string. Removal of these path segments is performed + * iteratively, removing the leftmost matching pattern on each + * iteration, until no matching pattern remains. + * + * f) If the buffer string ends with "<segment>/..", where <segment> + * is a complete path segment not equal to "..", that + * "<segment>/.." is removed. + * + * To satisfy the "iterative" clause in (e), we need to collapse the + * string every time we find something that needs to be removed. Thus, + * we don't need to keep two pointers into the string: we only need a + * "current position" pointer. + */ + while (1) { + char *segp, *tmp; + + /* At the beginning of each iteration of this loop, "cur" points to + * the first character of the segment we want to examine. + */ + + /* Find the end of the current segment. */ + segp = cur; + while ((segp[0] != '/') && (segp[0] != '\0')) + ++segp; + + /* If this is the last segment, we're done (we need at least two + * segments to meet the criteria for the (e) and (f) cases). + */ + if (segp[0] == '\0') + break; + + /* If the first segment is "..", or if the next segment _isn't_ "..", + * keep this segment and try the next one. + */ + ++segp; + if (((cur[0] == '.') && (cur[1] == '.') && (segp == cur+3)) + || ((segp[0] != '.') || (segp[1] != '.') + || ((segp[2] != '/') && (segp[2] != '\0')))) { + cur = segp; + continue; + } + + /* If we get here, remove this segment and the next one and back up + * to the previous segment (if there is one), to implement the + * "iteratively" clause. It's pretty much impossible to back up + * while maintaining two pointers into the buffer, so just compact + * the whole buffer now. + */ + + /* If this is the end of the buffer, we're done. */ + if (segp[2] == '\0') { + cur[0] = '\0'; + break; + } + /* Valgrind complained, strcpy(cur, segp + 3); */ + /* string will overlap, do not use strcpy */ + tmp = cur; + segp += 3; + while ((*tmp++ = *segp++) != 0) + ; + + /* If there are no previous segments, then keep going from here. */ + segp = cur; + while ((segp > path) && ((--segp)[0] == '/')) + ; + if (segp == path) + continue; + + /* "segp" is pointing to the end of a previous segment; find it's + * start. We need to back up to the previous segment and start + * over with that to handle things like "foo/bar/../..". If we + * don't do this, then on the first pass we'll remove the "bar/..", + * but be pointing at the second ".." so we won't realize we can also + * remove the "foo/..". + */ + cur = segp; + while ((cur > path) && (cur[-1] != '/')) + --cur; + } + out[0] = '\0'; + + /* + * g) If the resulting buffer string still begins with one or more + * complete path segments of "..", then the reference is + * considered to be in error. Implementations may handle this + * error by retaining these components in the resolved path (i.e., + * treating them as part of the final URI), by removing them from + * the resolved path (i.e., discarding relative levels above the + * root), or by avoiding traversal of the reference. + * + * We discard them from the final path. + */ + if (path[0] == '/') { + cur = path; + while ((cur[0] == '/') && (cur[1] == '.') && (cur[2] == '.') + && ((cur[3] == '/') || (cur[3] == '\0'))) + cur += 3; + + if (cur != path) { + out = path; + while (cur[0] != '\0') + (out++)[0] = (cur++)[0]; + out[0] = 0; + } + } + + return(0); +} + +static int is_hex(char c) { + if (((c >= '0') && (c <= '9')) || + ((c >= 'a') && (c <= 'f')) || + ((c >= 'A') && (c <= 'F'))) + return(1); + return(0); +} + + +/** + * uri_string_unescape: + * @str: the string to unescape + * @len: the length in bytes to unescape (or <= 0 to indicate full string) + * @target: optional destination buffer + * + * Unescaping routine, but does not check that the string is an URI. The + * output is a direct unsigned char translation of %XX values (no encoding) + * Note that the length of the result can only be smaller or same size as + * the input string. + * + * Returns a copy of the string, but unescaped, will return NULL only in case + * of error + */ +char * +uri_string_unescape(const char *str, int len, char *target) { + char *ret, *out; + const char *in; + + if (str == NULL) + return(NULL); + if (len <= 0) len = strlen(str); + if (len < 0) return(NULL); + + if (target == NULL) { + ret = g_malloc(len + 1); + } else + ret = target; + in = str; + out = ret; + while(len > 0) { + if ((len > 2) && (*in == '%') && (is_hex(in[1])) && (is_hex(in[2]))) { + in++; + if ((*in >= '0') && (*in <= '9')) + *out = (*in - '0'); + else if ((*in >= 'a') && (*in <= 'f')) + *out = (*in - 'a') + 10; + else if ((*in >= 'A') && (*in <= 'F')) + *out = (*in - 'A') + 10; + in++; + if ((*in >= '0') && (*in <= '9')) + *out = *out * 16 + (*in - '0'); + else if ((*in >= 'a') && (*in <= 'f')) + *out = *out * 16 + (*in - 'a') + 10; + else if ((*in >= 'A') && (*in <= 'F')) + *out = *out * 16 + (*in - 'A') + 10; + in++; + len -= 3; + out++; + } else { + *out++ = *in++; + len--; + } + } + *out = 0; + return(ret); +} + +/** + * uri_string_escape: + * @str: string to escape + * @list: exception list string of chars not to escape + * + * This routine escapes a string to hex, ignoring reserved characters (a-z) + * and the characters in the exception list. + * + * Returns a new escaped string or NULL in case of error. + */ +char * +uri_string_escape(const char *str, const char *list) { + char *ret, ch; + char *temp; + const char *in; + int len, out; + + if (str == NULL) + return(NULL); + if (str[0] == 0) + return(g_strdup(str)); + len = strlen(str); + if (!(len > 0)) return(NULL); + + len += 20; + ret = g_malloc(len); + in = str; + out = 0; + while(*in != 0) { + if (len - out <= 3) { + temp = realloc2n(ret, &len); + ret = temp; + } + + ch = *in; + + if ((ch != '@') && (!IS_UNRESERVED(ch)) && (!strchr(list, ch))) { + unsigned char val; + ret[out++] = '%'; + val = ch >> 4; + if (val <= 9) + ret[out++] = '0' + val; + else + ret[out++] = 'A' + val - 0xA; + val = ch & 0xF; + if (val <= 9) + ret[out++] = '0' + val; + else + ret[out++] = 'A' + val - 0xA; + in++; + } else { + ret[out++] = *in++; + } + + } + ret[out] = 0; + return(ret); +} + +/************************************************************************ + * * + * Public functions * + * * + ************************************************************************/ + +/** + * uri_resolve: + * @URI: the URI instance found in the document + * @base: the base value + * + * Computes he final URI of the reference done by checking that + * the given URI is valid, and building the final URI using the + * base URI. This is processed according to section 5.2 of the + * RFC 2396 + * + * 5.2. Resolving Relative References to Absolute Form + * + * Returns a new URI string (to be freed by the caller) or NULL in case + * of error. + */ +char * +uri_resolve(const char *uri, const char *base) { + char *val = NULL; + int ret, len, indx, cur, out; + URI *ref = NULL; + URI *bas = NULL; + URI *res = NULL; + + /* + * 1) The URI reference is parsed into the potential four components and + * fragment identifier, as described in Section 4.3. + * + * NOTE that a completely empty URI is treated by modern browsers + * as a reference to "." rather than as a synonym for the current + * URI. Should we do that here? + */ + if (uri == NULL) + ret = -1; + else { + if (*uri) { + ref = uri_new(); + if (ref == NULL) + goto done; + ret = uri_parse_into(ref, uri); + } + else + ret = 0; + } + if (ret != 0) + goto done; + if ((ref != NULL) && (ref->scheme != NULL)) { + /* + * The URI is absolute don't modify. + */ + val = g_strdup(uri); + goto done; + } + if (base == NULL) + ret = -1; + else { + bas = uri_new(); + if (bas == NULL) + goto done; + ret = uri_parse_into(bas, base); + } + if (ret != 0) { + if (ref) + val = uri_to_string(ref); + goto done; + } + if (ref == NULL) { + /* + * the base fragment must be ignored + */ + if (bas->fragment != NULL) { + g_free(bas->fragment); + bas->fragment = NULL; + } + val = uri_to_string(bas); + goto done; + } + + /* + * 2) If the path component is empty and the scheme, authority, and + * query components are undefined, then it is a reference to the + * current document and we are done. Otherwise, the reference URI's + * query and fragment components are defined as found (or not found) + * within the URI reference and not inherited from the base URI. + * + * NOTE that in modern browsers, the parsing differs from the above + * in the following aspect: the query component is allowed to be + * defined while still treating this as a reference to the current + * document. + */ + res = uri_new(); + if (res == NULL) + goto done; + if ((ref->scheme == NULL) && (ref->path == NULL) && + ((ref->authority == NULL) && (ref->server == NULL))) { + if (bas->scheme != NULL) + res->scheme = g_strdup(bas->scheme); + if (bas->authority != NULL) + res->authority = g_strdup(bas->authority); + else if (bas->server != NULL) { + res->server = g_strdup(bas->server); + if (bas->user != NULL) + res->user = g_strdup(bas->user); + res->port = bas->port; + } + if (bas->path != NULL) + res->path = g_strdup(bas->path); + if (ref->query != NULL) + res->query = g_strdup (ref->query); + else if (bas->query != NULL) + res->query = g_strdup(bas->query); + if (ref->fragment != NULL) + res->fragment = g_strdup(ref->fragment); + goto step_7; + } + + /* + * 3) If the scheme component is defined, indicating that the reference + * starts with a scheme name, then the reference is interpreted as an + * absolute URI and we are done. Otherwise, the reference URI's + * scheme is inherited from the base URI's scheme component. + */ + if (ref->scheme != NULL) { + val = uri_to_string(ref); + goto done; + } + if (bas->scheme != NULL) + res->scheme = g_strdup(bas->scheme); + + if (ref->query != NULL) + res->query = g_strdup(ref->query); + if (ref->fragment != NULL) + res->fragment = g_strdup(ref->fragment); + + /* + * 4) If the authority component is defined, then the reference is a + * network-path and we skip to step 7. Otherwise, the reference + * URI's authority is inherited from the base URI's authority + * component, which will also be undefined if the URI scheme does not + * use an authority component. + */ + if ((ref->authority != NULL) || (ref->server != NULL)) { + if (ref->authority != NULL) + res->authority = g_strdup(ref->authority); + else { + res->server = g_strdup(ref->server); + if (ref->user != NULL) + res->user = g_strdup(ref->user); + res->port = ref->port; + } + if (ref->path != NULL) + res->path = g_strdup(ref->path); + goto step_7; + } + if (bas->authority != NULL) + res->authority = g_strdup(bas->authority); + else if (bas->server != NULL) { + res->server = g_strdup(bas->server); + if (bas->user != NULL) + res->user = g_strdup(bas->user); + res->port = bas->port; + } + + /* + * 5) If the path component begins with a slash character ("/"), then + * the reference is an absolute-path and we skip to step 7. + */ + if ((ref->path != NULL) && (ref->path[0] == '/')) { + res->path = g_strdup(ref->path); + goto step_7; + } + + + /* + * 6) If this step is reached, then we are resolving a relative-path + * reference. The relative path needs to be merged with the base + * URI's path. Although there are many ways to do this, we will + * describe a simple method using a separate string buffer. + * + * Allocate a buffer large enough for the result string. + */ + len = 2; /* extra / and 0 */ + if (ref->path != NULL) + len += strlen(ref->path); + if (bas->path != NULL) + len += strlen(bas->path); + res->path = g_malloc(len); + res->path[0] = 0; + + /* + * a) All but the last segment of the base URI's path component is + * copied to the buffer. In other words, any characters after the + * last (right-most) slash character, if any, are excluded. + */ + cur = 0; + out = 0; + if (bas->path != NULL) { + while (bas->path[cur] != 0) { + while ((bas->path[cur] != 0) && (bas->path[cur] != '/')) + cur++; + if (bas->path[cur] == 0) + break; + + cur++; + while (out < cur) { + res->path[out] = bas->path[out]; + out++; + } + } + } + res->path[out] = 0; + + /* + * b) The reference's path component is appended to the buffer + * string. + */ + if (ref->path != NULL && ref->path[0] != 0) { + indx = 0; + /* + * Ensure the path includes a '/' + */ + if ((out == 0) && (bas->server != NULL)) + res->path[out++] = '/'; + while (ref->path[indx] != 0) { + res->path[out++] = ref->path[indx++]; + } + } + res->path[out] = 0; + + /* + * Steps c) to h) are really path normalization steps + */ + normalize_uri_path(res->path); + +step_7: + + /* + * 7) The resulting URI components, including any inherited from the + * base URI, are recombined to give the absolute form of the URI + * reference. + */ + val = uri_to_string(res); + +done: + if (ref != NULL) + uri_free(ref); + if (bas != NULL) + uri_free(bas); + if (res != NULL) + uri_free(res); + return(val); +} + +/** + * uri_resolve_relative: + * @URI: the URI reference under consideration + * @base: the base value + * + * Expresses the URI of the reference in terms relative to the + * base. Some examples of this operation include: + * base = "http://site1.com/docs/book1.html" + * URI input URI returned + * docs/pic1.gif pic1.gif + * docs/img/pic1.gif img/pic1.gif + * img/pic1.gif ../img/pic1.gif + * http://site1.com/docs/pic1.gif pic1.gif + * http://site2.com/docs/pic1.gif http://site2.com/docs/pic1.gif + * + * base = "docs/book1.html" + * URI input URI returned + * docs/pic1.gif pic1.gif + * docs/img/pic1.gif img/pic1.gif + * img/pic1.gif ../img/pic1.gif + * http://site1.com/docs/pic1.gif http://site1.com/docs/pic1.gif + * + * + * Note: if the URI reference is really wierd or complicated, it may be + * worthwhile to first convert it into a "nice" one by calling + * uri_resolve (using 'base') before calling this routine, + * since this routine (for reasonable efficiency) assumes URI has + * already been through some validation. + * + * Returns a new URI string (to be freed by the caller) or NULL in case + * error. + */ +char * +uri_resolve_relative (const char *uri, const char * base) +{ + char *val = NULL; + int ret; + int ix; + int pos = 0; + int nbslash = 0; + int len; + URI *ref = NULL; + URI *bas = NULL; + char *bptr, *uptr, *vptr; + int remove_path = 0; + + if ((uri == NULL) || (*uri == 0)) + return NULL; + + /* + * First parse URI into a standard form + */ + ref = uri_new (); + if (ref == NULL) + return NULL; + /* If URI not already in "relative" form */ + if (uri[0] != '.') { + ret = uri_parse_into (ref, uri); + if (ret != 0) + goto done; /* Error in URI, return NULL */ + } else + ref->path = g_strdup(uri); + + /* + * Next parse base into the same standard form + */ + if ((base == NULL) || (*base == 0)) { + val = g_strdup (uri); + goto done; + } + bas = uri_new (); + if (bas == NULL) + goto done; + if (base[0] != '.') { + ret = uri_parse_into (bas, base); + if (ret != 0) + goto done; /* Error in base, return NULL */ + } else + bas->path = g_strdup(base); + + /* + * If the scheme / server on the URI differs from the base, + * just return the URI + */ + if ((ref->scheme != NULL) && + ((bas->scheme == NULL) || + (strcmp (bas->scheme, ref->scheme)) || + (strcmp (bas->server, ref->server)))) { + val = g_strdup (uri); + goto done; + } + if (!strcmp(bas->path, ref->path)) { + val = g_strdup(""); + goto done; + } + if (bas->path == NULL) { + val = g_strdup(ref->path); + goto done; + } + if (ref->path == NULL) { + ref->path = (char *) "/"; + remove_path = 1; + } + + /* + * At this point (at last!) we can compare the two paths + * + * First we take care of the special case where either of the + * two path components may be missing (bug 316224) + */ + if (bas->path == NULL) { + if (ref->path != NULL) { + uptr = ref->path; + if (*uptr == '/') + uptr++; + /* exception characters from uri_to_string */ + val = uri_string_escape(uptr, "/;&=+$,"); + } + goto done; + } + bptr = bas->path; + if (ref->path == NULL) { + for (ix = 0; bptr[ix] != 0; ix++) { + if (bptr[ix] == '/') + nbslash++; + } + uptr = NULL; + len = 1; /* this is for a string terminator only */ + } else { + /* + * Next we compare the two strings and find where they first differ + */ + if ((ref->path[pos] == '.') && (ref->path[pos+1] == '/')) + pos += 2; + if ((*bptr == '.') && (bptr[1] == '/')) + bptr += 2; + else if ((*bptr == '/') && (ref->path[pos] != '/')) + bptr++; + while ((bptr[pos] == ref->path[pos]) && (bptr[pos] != 0)) + pos++; + + if (bptr[pos] == ref->path[pos]) { + val = g_strdup(""); + goto done; /* (I can't imagine why anyone would do this) */ + } + + /* + * In URI, "back up" to the last '/' encountered. This will be the + * beginning of the "unique" suffix of URI + */ + ix = pos; + if ((ref->path[ix] == '/') && (ix > 0)) + ix--; + else if ((ref->path[ix] == 0) && (ix > 1) && (ref->path[ix - 1] == '/')) + ix -= 2; + for (; ix > 0; ix--) { + if (ref->path[ix] == '/') + break; + } + if (ix == 0) { + uptr = ref->path; + } else { + ix++; + uptr = &ref->path[ix]; + } + + /* + * In base, count the number of '/' from the differing point + */ + if (bptr[pos] != ref->path[pos]) {/* check for trivial URI == base */ + for (; bptr[ix] != 0; ix++) { + if (bptr[ix] == '/') + nbslash++; + } + } + len = strlen (uptr) + 1; + } + + if (nbslash == 0) { + if (uptr != NULL) + /* exception characters from uri_to_string */ + val = uri_string_escape(uptr, "/;&=+$,"); + goto done; + } + + /* + * Allocate just enough space for the returned string - + * length of the remainder of the URI, plus enough space + * for the "../" groups, plus one for the terminator + */ + val = g_malloc (len + 3 * nbslash); + vptr = val; + /* + * Put in as many "../" as needed + */ + for (; nbslash>0; nbslash--) { + *vptr++ = '.'; + *vptr++ = '.'; + *vptr++ = '/'; + } + /* + * Finish up with the end of the URI + */ + if (uptr != NULL) { + if ((vptr > val) && (len > 0) && + (uptr[0] == '/') && (vptr[-1] == '/')) { + memcpy (vptr, uptr + 1, len - 1); + vptr[len - 2] = 0; + } else { + memcpy (vptr, uptr, len); + vptr[len - 1] = 0; + } + } else { + vptr[len - 1] = 0; + } + + /* escape the freshly-built path */ + vptr = val; + /* exception characters from uri_to_string */ + val = uri_string_escape(vptr, "/;&=+$,"); + g_free(vptr); + +done: + /* + * Free the working variables + */ + if (remove_path != 0) + ref->path = NULL; + if (ref != NULL) + uri_free (ref); + if (bas != NULL) + uri_free (bas); + + return val; +} + +/* + * Utility functions to help parse and assemble query strings. + */ + +struct QueryParams * +query_params_new (int init_alloc) +{ + struct QueryParams *ps; + + if (init_alloc <= 0) init_alloc = 1; + + ps = g_new(QueryParams, 1); + ps->n = 0; + ps->alloc = init_alloc; + ps->p = g_new(QueryParam, ps->alloc); + + return ps; +} + +/* Ensure there is space to store at least one more parameter + * at the end of the set. + */ +static int +query_params_append (struct QueryParams *ps, + const char *name, const char *value) +{ + if (ps->n >= ps->alloc) { + ps->p = g_renew(QueryParam, ps->p, ps->alloc * 2); + ps->alloc *= 2; + } + + ps->p[ps->n].name = g_strdup(name); + ps->p[ps->n].value = value ? g_strdup(value) : NULL; + ps->p[ps->n].ignore = 0; + ps->n++; + + return 0; +} + +void +query_params_free (struct QueryParams *ps) +{ + int i; + + for (i = 0; i < ps->n; ++i) { + g_free (ps->p[i].name); + g_free (ps->p[i].value); + } + g_free (ps->p); + g_free (ps); +} + +struct QueryParams * +query_params_parse (const char *query) +{ + struct QueryParams *ps; + const char *end, *eq; + + ps = query_params_new (0); + if (!query || query[0] == '\0') return ps; + + while (*query) { + char *name = NULL, *value = NULL; + + /* Find the next separator, or end of the string. */ + end = strchr (query, '&'); + if (!end) + end = strchr (query, ';'); + if (!end) + end = query + strlen (query); + + /* Find the first '=' character between here and end. */ + eq = strchr (query, '='); + if (eq && eq >= end) eq = NULL; + + /* Empty section (eg. "&&"). */ + if (end == query) + goto next; + + /* If there is no '=' character, then we have just "name" + * and consistent with CGI.pm we assume value is "". + */ + else if (!eq) { + name = uri_string_unescape (query, end - query, NULL); + value = NULL; + } + /* Or if we have "name=" here (works around annoying + * problem when calling uri_string_unescape with len = 0). + */ + else if (eq+1 == end) { + name = uri_string_unescape (query, eq - query, NULL); + value = g_new0(char, 1); + } + /* If the '=' character is at the beginning then we have + * "=value" and consistent with CGI.pm we _ignore_ this. + */ + else if (query == eq) + goto next; + + /* Otherwise it's "name=value". */ + else { + name = uri_string_unescape (query, eq - query, NULL); + value = uri_string_unescape (eq+1, end - (eq+1), NULL); + } + + /* Append to the parameter set. */ + query_params_append (ps, name, value); + g_free(name); + g_free(value); + + next: + query = end; + if (*query) query ++; /* skip '&' separator */ + } + + return ps; +} @@ -0,0 +1,113 @@ +/** + * Summary: library of generic URI related routines + * Description: library of generic URI related routines + * Implements RFC 2396 + * + * Copyright (C) 1998-2003 Daniel Veillard. All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * DANIEL VEILLARD BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER + * IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + * Except as contained in this notice, the name of Daniel Veillard shall not + * be used in advertising or otherwise to promote the sale, use or other + * dealings in this Software without prior written authorization from him. + * + * Author: Daniel Veillard + ** + * Copyright (C) 2007 Red Hat, Inc. + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + * + * Authors: + * Richard W.M. Jones <rjones@redhat.com> + * + * Utility functions to help parse and assemble query strings. + */ + +#ifndef QEMU_URI_H +#define QEMU_URI_H + +#ifdef __cplusplus +extern "C" { +#endif + +/** + * URI: + * + * A parsed URI reference. This is a struct containing the various fields + * as described in RFC 2396 but separated for further processing. + */ +typedef struct URI { + char *scheme; /* the URI scheme */ + char *opaque; /* opaque part */ + char *authority; /* the authority part */ + char *server; /* the server part */ + char *user; /* the user part */ + int port; /* the port number */ + char *path; /* the path string */ + char *fragment; /* the fragment identifier */ + int cleanup; /* parsing potentially unclean URI */ + char *query; /* the query string (as it appears in the URI) */ +} URI; + +URI *uri_new(void); +char *uri_resolve(const char *URI, const char *base); +char *uri_resolve_relative(const char *URI, const char *base); +URI *uri_parse(const char *str); +URI *uri_parse_raw(const char *str, int raw); +int uri_parse_into(URI *uri, const char *str); +char *uri_to_string(URI *uri); +char *uri_string_escape(const char *str, const char *list); +char *uri_string_unescape(const char *str, int len, char *target); +void uri_free(URI *uri); + +/* Single web service query parameter 'name=value'. */ +typedef struct QueryParam { + char *name; /* Name (unescaped). */ + char *value; /* Value (unescaped). */ + int ignore; /* Ignore this field in qparam_get_query */ +} QueryParam; + +/* Set of parameters. */ +typedef struct QueryParams { + int n; /* number of parameters used */ + int alloc; /* allocated space */ + QueryParam *p; /* array of parameters */ +} QueryParams; + +struct QueryParams *query_params_new (int init_alloc); +int query_param_append (QueryParams *ps, const char *name, const char *value); +extern char *query_param_to_string (const QueryParams *ps); +extern QueryParams *query_params_parse (const char *query); +extern void query_params_free (QueryParams *ps); + +#ifdef __cplusplus +} +#endif +#endif /* QEMU_URI_H */ |