aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--Makefile.objs5
-rw-r--r--QMP/qmp-events.txt28
-rw-r--r--aio.c14
-rw-r--r--block-migration.c2
-rw-r--r--block.c346
-rw-r--r--block.h25
-rw-r--r--block/Makefile.objs5
-rw-r--r--block/blkdebug.c12
-rw-r--r--block/commit.c268
-rw-r--r--block/gluster.c624
-rw-r--r--block/stream.c29
-rw-r--r--block_int.h178
-rw-r--r--blockdev.c149
-rw-r--r--blockjob.c249
-rw-r--r--blockjob.h243
-rwxr-xr-xconfigure35
-rw-r--r--hmp-commands.hx35
-rw-r--r--hmp.c26
-rw-r--r--hmp.h2
-rw-r--r--hw/fdc.c4
-rw-r--r--hw/ide/core.c22
-rw-r--r--hw/ide/pci.c4
-rw-r--r--hw/scsi-disk.c25
-rw-r--r--hw/scsi-generic.c4
-rw-r--r--hw/virtio-blk.c23
-rw-r--r--monitor.c1
-rw-r--r--monitor.h1
-rw-r--r--qapi-schema.json125
-rw-r--r--qemu-tool.c6
-rw-r--r--qerror.h6
-rw-r--r--qmp-commands.hx20
-rwxr-xr-xtests/qemu-iotests/030260
-rw-r--r--tests/qemu-iotests/030.out4
-rwxr-xr-xtests/qemu-iotests/040178
-rw-r--r--tests/qemu-iotests/040.out5
-rw-r--r--tests/qemu-iotests/group3
-rw-r--r--tests/qemu-iotests/iotests.py15
-rw-r--r--trace-events6
-rw-r--r--uri.c2249
-rw-r--r--uri.h113
40 files changed, 4923 insertions, 426 deletions
diff --git a/Makefile.objs b/Makefile.objs
index 4412757309..b1f3e22547 100644
--- a/Makefile.objs
+++ b/Makefile.objs
@@ -42,7 +42,8 @@ coroutine-obj-$(CONFIG_WIN32) += coroutine-win32.o
# block-obj-y is code used by both qemu system emulation and qemu-img
block-obj-y = cutils.o iov.o cache-utils.o qemu-option.o module.o async.o
-block-obj-y += nbd.o block.o aio.o aes.o qemu-config.o qemu-progress.o qemu-sockets.o
+block-obj-y += nbd.o block.o blockjob.o aio.o aes.o qemu-config.o
+block-obj-y += qemu-progress.o qemu-sockets.o uri.o
block-obj-y += $(coroutine-obj-y) $(qobject-obj-y) $(version-obj-y)
block-obj-$(CONFIG_POSIX) += posix-aio-compat.o
block-obj-$(CONFIG_LINUX_AIO) += linux-aio.o
@@ -59,7 +60,7 @@ endif
# suppress *all* target specific code in case of system emulation, i.e. a
# single QEMU executable should support all CPUs and machines.
-common-obj-y = $(block-obj-y) blockdev.o
+common-obj-y = $(block-obj-y) blockdev.o block/
common-obj-y += net.o net/
common-obj-y += qom/
common-obj-y += readline.o console.o cursor.o
diff --git a/QMP/qmp-events.txt b/QMP/qmp-events.txt
index 287805825f..987c5756b3 100644
--- a/QMP/qmp-events.txt
+++ b/QMP/qmp-events.txt
@@ -50,7 +50,8 @@ Emitted when a block job has been cancelled.
Data:
-- "type": Job type ("stream" for image streaming, json-string)
+- "type": Job type (json-string; "stream" for image streaming
+ "commit" for block commit)
- "device": Device name (json-string)
- "len": Maximum progress value (json-int)
- "offset": Current progress value (json-int)
@@ -73,7 +74,8 @@ Emitted when a block job has completed.
Data:
-- "type": Job type ("stream" for image streaming, json-string)
+- "type": Job type (json-string; "stream" for image streaming
+ "commit" for block commit)
- "device": Device name (json-string)
- "len": Maximum progress value (json-int)
- "offset": Current progress value (json-int)
@@ -94,6 +96,28 @@ Example:
"speed": 0 },
"timestamp": { "seconds": 1267061043, "microseconds": 959568 } }
+BLOCK_JOB_ERROR
+---------------
+
+Emitted when a block job encounters an error.
+
+Data:
+
+- "device": device name (json-string)
+- "operation": I/O operation (json-string, "read" or "write")
+- "action": action that has been taken, it's one of the following (json-string):
+ "ignore": error has been ignored, the job may fail later
+ "report": error will be reported and the job canceled
+ "stop": error caused job to be paused
+
+Example:
+
+{ "event": "BLOCK_JOB_ERROR",
+ "data": { "device": "ide0-hd1",
+ "operation": "write",
+ "action": "stop" },
+ "timestamp": { "seconds": 1265044230, "microseconds": 450486 } }
+
DEVICE_TRAY_MOVED
-----------------
diff --git a/aio.c b/aio.c
index 0a9eb10c76..c738a4e15d 100644
--- a/aio.c
+++ b/aio.c
@@ -119,7 +119,7 @@ bool qemu_aio_wait(void)
return true;
}
- walking_handlers = 1;
+ walking_handlers++;
FD_ZERO(&rdfds);
FD_ZERO(&wrfds);
@@ -147,7 +147,7 @@ bool qemu_aio_wait(void)
}
}
- walking_handlers = 0;
+ walking_handlers--;
/* No AIO operations? Get us out of here */
if (!busy) {
@@ -159,14 +159,14 @@ bool qemu_aio_wait(void)
/* if we have any readable fds, dispatch event */
if (ret > 0) {
- walking_handlers = 1;
-
/* we have to walk very carefully in case
* qemu_aio_set_fd_handler is called while we're walking */
node = QLIST_FIRST(&aio_handlers);
while (node) {
AioHandler *tmp;
+ walking_handlers++;
+
if (!node->deleted &&
FD_ISSET(node->fd, &rdfds) &&
node->io_read) {
@@ -181,13 +181,13 @@ bool qemu_aio_wait(void)
tmp = node;
node = QLIST_NEXT(node, node);
- if (tmp->deleted) {
+ walking_handlers--;
+
+ if (!walking_handlers && tmp->deleted) {
QLIST_REMOVE(tmp, node);
g_free(tmp);
}
}
-
- walking_handlers = 0;
}
return true;
diff --git a/block-migration.c b/block-migration.c
index 7def8ab197..ed933017f9 100644
--- a/block-migration.c
+++ b/block-migration.c
@@ -519,6 +519,8 @@ static void blk_mig_cleanup(void)
BlkMigDevState *bmds;
BlkMigBlock *blk;
+ bdrv_drain_all();
+
set_dirty_tracking(0);
while ((bmds = QSIMPLEQ_FIRST(&block_mig_state.bmds_list)) != NULL) {
diff --git a/block.c b/block.c
index 751ebdc06b..c108a76952 100644
--- a/block.c
+++ b/block.c
@@ -26,8 +26,10 @@
#include "trace.h"
#include "monitor.h"
#include "block_int.h"
+#include "blockjob.h"
#include "module.h"
#include "qjson.h"
+#include "sysemu.h"
#include "qemu-coroutine.h"
#include "qmp-commands.h"
#include "qemu-timer.h"
@@ -1386,7 +1388,8 @@ void bdrv_set_dev_ops(BlockDriverState *bs, const BlockDevOps *ops,
}
void bdrv_emit_qmp_error_event(const BlockDriverState *bdrv,
- BlockQMPEventAction action, int is_read)
+ enum MonitorEvent ev,
+ BlockErrorAction action, bool is_read)
{
QObject *data;
const char *action_str;
@@ -1409,7 +1412,7 @@ void bdrv_emit_qmp_error_event(const BlockDriverState *bdrv,
bdrv->device_name,
action_str,
is_read ? "read" : "write");
- monitor_protocol_event(QEVENT_BLOCK_IO_ERROR, data);
+ monitor_protocol_event(ev, data);
qobject_decref(data);
}
@@ -1724,6 +1727,149 @@ int bdrv_change_backing_file(BlockDriverState *bs,
return ret;
}
+/*
+ * Finds the image layer in the chain that has 'bs' as its backing file.
+ *
+ * active is the current topmost image.
+ *
+ * Returns NULL if bs is not found in active's image chain,
+ * or if active == bs.
+ */
+BlockDriverState *bdrv_find_overlay(BlockDriverState *active,
+ BlockDriverState *bs)
+{
+ BlockDriverState *overlay = NULL;
+ BlockDriverState *intermediate;
+
+ assert(active != NULL);
+ assert(bs != NULL);
+
+ /* if bs is the same as active, then by definition it has no overlay
+ */
+ if (active == bs) {
+ return NULL;
+ }
+
+ intermediate = active;
+ while (intermediate->backing_hd) {
+ if (intermediate->backing_hd == bs) {
+ overlay = intermediate;
+ break;
+ }
+ intermediate = intermediate->backing_hd;
+ }
+
+ return overlay;
+}
+
+typedef struct BlkIntermediateStates {
+ BlockDriverState *bs;
+ QSIMPLEQ_ENTRY(BlkIntermediateStates) entry;
+} BlkIntermediateStates;
+
+
+/*
+ * Drops images above 'base' up to and including 'top', and sets the image
+ * above 'top' to have base as its backing file.
+ *
+ * Requires that the overlay to 'top' is opened r/w, so that the backing file
+ * information in 'bs' can be properly updated.
+ *
+ * E.g., this will convert the following chain:
+ * bottom <- base <- intermediate <- top <- active
+ *
+ * to
+ *
+ * bottom <- base <- active
+ *
+ * It is allowed for bottom==base, in which case it converts:
+ *
+ * base <- intermediate <- top <- active
+ *
+ * to
+ *
+ * base <- active
+ *
+ * Error conditions:
+ * if active == top, that is considered an error
+ *
+ */
+int bdrv_drop_intermediate(BlockDriverState *active, BlockDriverState *top,
+ BlockDriverState *base)
+{
+ BlockDriverState *intermediate;
+ BlockDriverState *base_bs = NULL;
+ BlockDriverState *new_top_bs = NULL;
+ BlkIntermediateStates *intermediate_state, *next;
+ int ret = -EIO;
+
+ QSIMPLEQ_HEAD(states_to_delete, BlkIntermediateStates) states_to_delete;
+ QSIMPLEQ_INIT(&states_to_delete);
+
+ if (!top->drv || !base->drv) {
+ goto exit;
+ }
+
+ new_top_bs = bdrv_find_overlay(active, top);
+
+ if (new_top_bs == NULL) {
+ /* we could not find the image above 'top', this is an error */
+ goto exit;
+ }
+
+ /* special case of new_top_bs->backing_hd already pointing to base - nothing
+ * to do, no intermediate images */
+ if (new_top_bs->backing_hd == base) {
+ ret = 0;
+ goto exit;
+ }
+
+ intermediate = top;
+
+ /* now we will go down through the list, and add each BDS we find
+ * into our deletion queue, until we hit the 'base'
+ */
+ while (intermediate) {
+ intermediate_state = g_malloc0(sizeof(BlkIntermediateStates));
+ intermediate_state->bs = intermediate;
+ QSIMPLEQ_INSERT_TAIL(&states_to_delete, intermediate_state, entry);
+
+ if (intermediate->backing_hd == base) {
+ base_bs = intermediate->backing_hd;
+ break;
+ }
+ intermediate = intermediate->backing_hd;
+ }
+ if (base_bs == NULL) {
+ /* something went wrong, we did not end at the base. safely
+ * unravel everything, and exit with error */
+ goto exit;
+ }
+
+ /* success - we can delete the intermediate states, and link top->base */
+ ret = bdrv_change_backing_file(new_top_bs, base_bs->filename,
+ base_bs->drv ? base_bs->drv->format_name : "");
+ if (ret) {
+ goto exit;
+ }
+ new_top_bs->backing_hd = base_bs;
+
+
+ QSIMPLEQ_FOREACH_SAFE(intermediate_state, &states_to_delete, entry, next) {
+ /* so that bdrv_close() does not recursively close the chain */
+ intermediate_state->bs->backing_hd = NULL;
+ bdrv_delete(intermediate_state->bs);
+ }
+ ret = 0;
+
+exit:
+ QSIMPLEQ_FOREACH_SAFE(intermediate_state, &states_to_delete, entry, next) {
+ g_free(intermediate_state);
+ }
+ return ret;
+}
+
+
static int bdrv_check_byte_request(BlockDriverState *bs, int64_t offset,
size_t size)
{
@@ -2330,18 +2476,51 @@ void bdrv_set_io_limits(BlockDriverState *bs,
bs->io_limits_enabled = bdrv_io_limits_enabled(bs);
}
-void bdrv_set_on_error(BlockDriverState *bs, BlockErrorAction on_read_error,
- BlockErrorAction on_write_error)
+void bdrv_set_on_error(BlockDriverState *bs, BlockdevOnError on_read_error,
+ BlockdevOnError on_write_error)
{
bs->on_read_error = on_read_error;
bs->on_write_error = on_write_error;
}
-BlockErrorAction bdrv_get_on_error(BlockDriverState *bs, int is_read)
+BlockdevOnError bdrv_get_on_error(BlockDriverState *bs, bool is_read)
{
return is_read ? bs->on_read_error : bs->on_write_error;
}
+BlockErrorAction bdrv_get_error_action(BlockDriverState *bs, bool is_read, int error)
+{
+ BlockdevOnError on_err = is_read ? bs->on_read_error : bs->on_write_error;
+
+ switch (on_err) {
+ case BLOCKDEV_ON_ERROR_ENOSPC:
+ return (error == ENOSPC) ? BDRV_ACTION_STOP : BDRV_ACTION_REPORT;
+ case BLOCKDEV_ON_ERROR_STOP:
+ return BDRV_ACTION_STOP;
+ case BLOCKDEV_ON_ERROR_REPORT:
+ return BDRV_ACTION_REPORT;
+ case BLOCKDEV_ON_ERROR_IGNORE:
+ return BDRV_ACTION_IGNORE;
+ default:
+ abort();
+ }
+}
+
+/* This is done by device models because, while the block layer knows
+ * about the error, it does not know whether an operation comes from
+ * the device or the block layer (from a job, for example).
+ */
+void bdrv_error_action(BlockDriverState *bs, BlockErrorAction action,
+ bool is_read, int error)
+{
+ assert(error >= 0);
+ bdrv_emit_qmp_error_event(bs, QEVENT_BLOCK_IO_ERROR, action, is_read);
+ if (action == BDRV_ACTION_STOP) {
+ vm_stop(RUN_STATE_IO_ERROR);
+ bdrv_iostatus_set_err(bs, error);
+ }
+}
+
int bdrv_is_read_only(BlockDriverState *bs)
{
return bs->read_only;
@@ -2974,6 +3153,22 @@ int bdrv_get_backing_file_depth(BlockDriverState *bs)
return 1 + bdrv_get_backing_file_depth(bs->backing_hd);
}
+BlockDriverState *bdrv_find_base(BlockDriverState *bs)
+{
+ BlockDriverState *curr_bs = NULL;
+
+ if (!bs) {
+ return NULL;
+ }
+
+ curr_bs = bs;
+
+ while (curr_bs->backing_hd) {
+ curr_bs = curr_bs->backing_hd;
+ }
+ return curr_bs;
+}
+
#define NB_SUFFIXES 4
char *get_human_readable_size(char *buf, int buf_size, int64_t size)
@@ -4049,9 +4244,9 @@ void bdrv_iostatus_enable(BlockDriverState *bs)
bool bdrv_iostatus_is_enabled(const BlockDriverState *bs)
{
return (bs->iostatus_enabled &&
- (bs->on_write_error == BLOCK_ERR_STOP_ENOSPC ||
- bs->on_write_error == BLOCK_ERR_STOP_ANY ||
- bs->on_read_error == BLOCK_ERR_STOP_ANY));
+ (bs->on_write_error == BLOCKDEV_ON_ERROR_ENOSPC ||
+ bs->on_write_error == BLOCKDEV_ON_ERROR_STOP ||
+ bs->on_read_error == BLOCKDEV_ON_ERROR_STOP));
}
void bdrv_iostatus_disable(BlockDriverState *bs)
@@ -4066,14 +4261,10 @@ void bdrv_iostatus_reset(BlockDriverState *bs)
}
}
-/* XXX: Today this is set by device models because it makes the implementation
- quite simple. However, the block layer knows about the error, so it's
- possible to implement this without device models being involved */
void bdrv_iostatus_set_err(BlockDriverState *bs, int error)
{
- if (bdrv_iostatus_is_enabled(bs) &&
- bs->iostatus == BLOCK_DEVICE_IO_STATUS_OK) {
- assert(error >= 0);
+ assert(bdrv_iostatus_is_enabled(bs));
+ if (bs->iostatus == BLOCK_DEVICE_IO_STATUS_OK) {
bs->iostatus = error == ENOSPC ? BLOCK_DEVICE_IO_STATUS_NOSPACE :
BLOCK_DEVICE_IO_STATUS_FAILED;
}
@@ -4247,130 +4438,3 @@ out:
return ret;
}
-
-void *block_job_create(const BlockJobType *job_type, BlockDriverState *bs,
- int64_t speed, BlockDriverCompletionFunc *cb,
- void *opaque, Error **errp)
-{
- BlockJob *job;
-
- if (bs->job || bdrv_in_use(bs)) {
- error_set(errp, QERR_DEVICE_IN_USE, bdrv_get_device_name(bs));
- return NULL;
- }
- bdrv_set_in_use(bs, 1);
-
- job = g_malloc0(job_type->instance_size);
- job->job_type = job_type;
- job->bs = bs;
- job->cb = cb;
- job->opaque = opaque;
- job->busy = true;
- bs->job = job;
-
- /* Only set speed when necessary to avoid NotSupported error */
- if (speed != 0) {
- Error *local_err = NULL;
-
- block_job_set_speed(job, speed, &local_err);
- if (error_is_set(&local_err)) {
- bs->job = NULL;
- g_free(job);
- bdrv_set_in_use(bs, 0);
- error_propagate(errp, local_err);
- return NULL;
- }
- }
- return job;
-}
-
-void block_job_complete(BlockJob *job, int ret)
-{
- BlockDriverState *bs = job->bs;
-
- assert(bs->job == job);
- job->cb(job->opaque, ret);
- bs->job = NULL;
- g_free(job);
- bdrv_set_in_use(bs, 0);
-}
-
-void block_job_set_speed(BlockJob *job, int64_t speed, Error **errp)
-{
- Error *local_err = NULL;
-
- if (!job->job_type->set_speed) {
- error_set(errp, QERR_NOT_SUPPORTED);
- return;
- }
- job->job_type->set_speed(job, speed, &local_err);
- if (error_is_set(&local_err)) {
- error_propagate(errp, local_err);
- return;
- }
-
- job->speed = speed;
-}
-
-void block_job_cancel(BlockJob *job)
-{
- job->cancelled = true;
- if (job->co && !job->busy) {
- qemu_coroutine_enter(job->co, NULL);
- }
-}
-
-bool block_job_is_cancelled(BlockJob *job)
-{
- return job->cancelled;
-}
-
-struct BlockCancelData {
- BlockJob *job;
- BlockDriverCompletionFunc *cb;
- void *opaque;
- bool cancelled;
- int ret;
-};
-
-static void block_job_cancel_cb(void *opaque, int ret)
-{
- struct BlockCancelData *data = opaque;
-
- data->cancelled = block_job_is_cancelled(data->job);
- data->ret = ret;
- data->cb(data->opaque, ret);
-}
-
-int block_job_cancel_sync(BlockJob *job)
-{
- struct BlockCancelData data;
- BlockDriverState *bs = job->bs;
-
- assert(bs->job == job);
-
- /* Set up our own callback to store the result and chain to
- * the original callback.
- */
- data.job = job;
- data.cb = job->cb;
- data.opaque = job->opaque;
- data.ret = -EINPROGRESS;
- job->cb = block_job_cancel_cb;
- job->opaque = &data;
- block_job_cancel(job);
- while (data.ret == -EINPROGRESS) {
- qemu_aio_wait();
- }
- return (data.cancelled && data.ret == 0) ? -ECANCELED : data.ret;
-}
-
-void block_job_sleep_ns(BlockJob *job, QEMUClock *clock, int64_t ns)
-{
- /* Check cancellation *before* setting busy = false, too! */
- if (!block_job_is_cancelled(job)) {
- job->busy = false;
- co_sleep_ns(clock, ns);
- job->busy = true;
- }
-}
diff --git a/block.h b/block.h
index b1095d8599..e2d89d7bc1 100644
--- a/block.h
+++ b/block.h
@@ -6,9 +6,11 @@
#include "qemu-option.h"
#include "qemu-coroutine.h"
#include "qobject.h"
+#include "qapi-types.h"
/* block.c */
typedef struct BlockDriver BlockDriver;
+typedef struct BlockJob BlockJob;
typedef struct BlockDriverInfo {
/* in bytes, 0 if irrelevant */
@@ -89,13 +91,8 @@ typedef struct BlockDevOps {
#define BDRV_SECTOR_MASK ~(BDRV_SECTOR_SIZE - 1)
typedef enum {
- BLOCK_ERR_REPORT, BLOCK_ERR_IGNORE, BLOCK_ERR_STOP_ENOSPC,
- BLOCK_ERR_STOP_ANY
-} BlockErrorAction;
-
-typedef enum {
BDRV_ACTION_REPORT, BDRV_ACTION_IGNORE, BDRV_ACTION_STOP
-} BlockQMPEventAction;
+} BlockErrorAction;
typedef QSIMPLEQ_HEAD(BlockReopenQueue, BlockReopenQueueEntry) BlockReopenQueue;
@@ -111,8 +108,6 @@ void bdrv_iostatus_reset(BlockDriverState *bs);
void bdrv_iostatus_disable(BlockDriverState *bs);
bool bdrv_iostatus_is_enabled(const BlockDriverState *bs);
void bdrv_iostatus_set_err(BlockDriverState *bs, int error);
-void bdrv_emit_qmp_error_event(const BlockDriverState *bdrv,
- BlockQMPEventAction action, int is_read);
void bdrv_info_print(Monitor *mon, const QObject *data);
void bdrv_info(Monitor *mon, QObject **ret_data);
void bdrv_stats_print(Monitor *mon, const QObject *data);
@@ -203,6 +198,11 @@ int bdrv_commit_all(void);
int bdrv_change_backing_file(BlockDriverState *bs,
const char *backing_file, const char *backing_fmt);
void bdrv_register(BlockDriver *bdrv);
+int bdrv_drop_intermediate(BlockDriverState *active, BlockDriverState *top,
+ BlockDriverState *base);
+BlockDriverState *bdrv_find_overlay(BlockDriverState *active,
+ BlockDriverState *bs);
+BlockDriverState *bdrv_find_base(BlockDriverState *bs);
typedef struct BdrvCheckResult {
@@ -277,9 +277,12 @@ int bdrv_has_zero_init(BlockDriverState *bs);
int bdrv_is_allocated(BlockDriverState *bs, int64_t sector_num, int nb_sectors,
int *pnum);
-void bdrv_set_on_error(BlockDriverState *bs, BlockErrorAction on_read_error,
- BlockErrorAction on_write_error);
-BlockErrorAction bdrv_get_on_error(BlockDriverState *bs, int is_read);
+void bdrv_set_on_error(BlockDriverState *bs, BlockdevOnError on_read_error,
+ BlockdevOnError on_write_error);
+BlockdevOnError bdrv_get_on_error(BlockDriverState *bs, bool is_read);
+BlockErrorAction bdrv_get_error_action(BlockDriverState *bs, bool is_read, int error);
+void bdrv_error_action(BlockDriverState *bs, BlockErrorAction action,
+ bool is_read, int error);
int bdrv_is_read_only(BlockDriverState *bs);
int bdrv_is_sg(BlockDriverState *bs);
int bdrv_enable_write_cache(BlockDriverState *bs);
diff --git a/block/Makefile.objs b/block/Makefile.objs
index b5754d39bf..554f429d05 100644
--- a/block/Makefile.objs
+++ b/block/Makefile.objs
@@ -3,9 +3,12 @@ block-obj-y += qcow2.o qcow2-refcount.o qcow2-cluster.o qcow2-snapshot.o qcow2-c
block-obj-y += qed.o qed-gencb.o qed-l2-cache.o qed-table.o qed-cluster.o
block-obj-y += qed-check.o
block-obj-y += parallels.o nbd.o blkdebug.o sheepdog.o blkverify.o
-block-obj-y += stream.o
block-obj-$(CONFIG_WIN32) += raw-win32.o
block-obj-$(CONFIG_POSIX) += raw-posix.o
block-obj-$(CONFIG_LIBISCSI) += iscsi.o
block-obj-$(CONFIG_CURL) += curl.o
block-obj-$(CONFIG_RBD) += rbd.o
+block-obj-$(CONFIG_GLUSTERFS) += gluster.o
+
+common-obj-y += stream.o
+common-obj-y += commit.o
diff --git a/block/blkdebug.c b/block/blkdebug.c
index 59dcea0650..1206d5256b 100644
--- a/block/blkdebug.c
+++ b/block/blkdebug.c
@@ -28,6 +28,7 @@
typedef struct BDRVBlkdebugState {
int state;
+ int new_state;
QLIST_HEAD(, BlkdebugRule) rules[BLKDBG_EVENT_MAX];
QSIMPLEQ_HEAD(, BlkdebugRule) active_rules;
} BDRVBlkdebugState;
@@ -403,12 +404,12 @@ static void blkdebug_close(BlockDriverState *bs)
}
static bool process_rule(BlockDriverState *bs, struct BlkdebugRule *rule,
- int old_state, bool injected)
+ bool injected)
{
BDRVBlkdebugState *s = bs->opaque;
/* Only process rules for the current state */
- if (rule->state && rule->state != old_state) {
+ if (rule->state && rule->state != s->state) {
return injected;
}
@@ -423,7 +424,7 @@ static bool process_rule(BlockDriverState *bs, struct BlkdebugRule *rule,
break;
case ACTION_SET_STATE:
- s->state = rule->options.set_state.new_state;
+ s->new_state = rule->options.set_state.new_state;
break;
}
return injected;
@@ -433,15 +434,16 @@ static void blkdebug_debug_event(BlockDriverState *bs, BlkDebugEvent event)
{
BDRVBlkdebugState *s = bs->opaque;
struct BlkdebugRule *rule;
- int old_state = s->state;
bool injected;
assert((int)event >= 0 && event < BLKDBG_EVENT_MAX);
injected = false;
+ s->new_state = s->state;
QLIST_FOREACH(rule, &s->rules[event], next) {
- injected = process_rule(bs, rule, old_state, injected);
+ injected = process_rule(bs, rule, injected);
}
+ s->state = s->new_state;
}
static int64_t blkdebug_getlength(BlockDriverState *bs)
diff --git a/block/commit.c b/block/commit.c
new file mode 100644
index 0000000000..733c91403c
--- /dev/null
+++ b/block/commit.c
@@ -0,0 +1,268 @@
+/*
+ * Live block commit
+ *
+ * Copyright Red Hat, Inc. 2012
+ *
+ * Authors:
+ * Jeff Cody <jcody@redhat.com>
+ * Based on stream.c by Stefan Hajnoczi
+ *
+ * This work is licensed under the terms of the GNU LGPL, version 2 or later.
+ * See the COPYING.LIB file in the top-level directory.
+ *
+ */
+
+#include "trace.h"
+#include "block_int.h"
+#include "blockjob.h"
+#include "qemu/ratelimit.h"
+
+enum {
+ /*
+ * Size of data buffer for populating the image file. This should be large
+ * enough to process multiple clusters in a single call, so that populating
+ * contiguous regions of the image is efficient.
+ */
+ COMMIT_BUFFER_SIZE = 512 * 1024, /* in bytes */
+};
+
+#define SLICE_TIME 100000000ULL /* ns */
+
+typedef struct CommitBlockJob {
+ BlockJob common;
+ RateLimit limit;
+ BlockDriverState *active;
+ BlockDriverState *top;
+ BlockDriverState *base;
+ BlockdevOnError on_error;
+ int base_flags;
+ int orig_overlay_flags;
+} CommitBlockJob;
+
+static int coroutine_fn commit_populate(BlockDriverState *bs,
+ BlockDriverState *base,
+ int64_t sector_num, int nb_sectors,
+ void *buf)
+{
+ int ret = 0;
+
+ ret = bdrv_read(bs, sector_num, buf, nb_sectors);
+ if (ret) {
+ return ret;
+ }
+
+ ret = bdrv_write(base, sector_num, buf, nb_sectors);
+ if (ret) {
+ return ret;
+ }
+
+ return 0;
+}
+
+static void coroutine_fn commit_run(void *opaque)
+{
+ CommitBlockJob *s = opaque;
+ BlockDriverState *active = s->active;
+ BlockDriverState *top = s->top;
+ BlockDriverState *base = s->base;
+ BlockDriverState *overlay_bs = NULL;
+ int64_t sector_num, end;
+ int ret = 0;
+ int n = 0;
+ void *buf;
+ int bytes_written = 0;
+ int64_t base_len;
+
+ ret = s->common.len = bdrv_getlength(top);
+
+
+ if (s->common.len < 0) {
+ goto exit_restore_reopen;
+ }
+
+ ret = base_len = bdrv_getlength(base);
+ if (base_len < 0) {
+ goto exit_restore_reopen;
+ }
+
+ if (base_len < s->common.len) {
+ ret = bdrv_truncate(base, s->common.len);
+ if (ret) {
+ goto exit_restore_reopen;
+ }
+ }
+
+ overlay_bs = bdrv_find_overlay(active, top);
+
+ end = s->common.len >> BDRV_SECTOR_BITS;
+ buf = qemu_blockalign(top, COMMIT_BUFFER_SIZE);
+
+ for (sector_num = 0; sector_num < end; sector_num += n) {
+ uint64_t delay_ns = 0;
+ bool copy;
+
+wait:
+ /* Note that even when no rate limit is applied we need to yield
+ * with no pending I/O here so that qemu_aio_flush() returns.
+ */
+ block_job_sleep_ns(&s->common, rt_clock, delay_ns);
+ if (block_job_is_cancelled(&s->common)) {
+ break;
+ }
+ /* Copy if allocated above the base */
+ ret = bdrv_co_is_allocated_above(top, base, sector_num,
+ COMMIT_BUFFER_SIZE / BDRV_SECTOR_SIZE,
+ &n);
+ copy = (ret == 1);
+ trace_commit_one_iteration(s, sector_num, n, ret);
+ if (copy) {
+ if (s->common.speed) {
+ delay_ns = ratelimit_calculate_delay(&s->limit, n);
+ if (delay_ns > 0) {
+ goto wait;
+ }
+ }
+ ret = commit_populate(top, base, sector_num, n, buf);
+ bytes_written += n * BDRV_SECTOR_SIZE;
+ }
+ if (ret < 0) {
+ if (s->on_error == BLOCKDEV_ON_ERROR_STOP ||
+ s->on_error == BLOCKDEV_ON_ERROR_REPORT||
+ (s->on_error == BLOCKDEV_ON_ERROR_ENOSPC && ret == -ENOSPC)) {
+ goto exit_free_buf;
+ } else {
+ n = 0;
+ continue;
+ }
+ }
+ /* Publish progress */
+ s->common.offset += n * BDRV_SECTOR_SIZE;
+ }
+
+ ret = 0;
+
+ if (!block_job_is_cancelled(&s->common) && sector_num == end) {
+ /* success */
+ ret = bdrv_drop_intermediate(active, top, base);
+ }
+
+exit_free_buf:
+ qemu_vfree(buf);
+
+exit_restore_reopen:
+ /* restore base open flags here if appropriate (e.g., change the base back
+ * to r/o). These reopens do not need to be atomic, since we won't abort
+ * even on failure here */
+ if (s->base_flags != bdrv_get_flags(base)) {
+ bdrv_reopen(base, s->base_flags, NULL);
+ }
+ if (s->orig_overlay_flags != bdrv_get_flags(overlay_bs)) {
+ bdrv_reopen(overlay_bs, s->orig_overlay_flags, NULL);
+ }
+
+ block_job_complete(&s->common, ret);
+}
+
+static void commit_set_speed(BlockJob *job, int64_t speed, Error **errp)
+{
+ CommitBlockJob *s = container_of(job, CommitBlockJob, common);
+
+ if (speed < 0) {
+ error_set(errp, QERR_INVALID_PARAMETER, "speed");
+ return;
+ }
+ ratelimit_set_speed(&s->limit, speed / BDRV_SECTOR_SIZE, SLICE_TIME);
+}
+
+static BlockJobType commit_job_type = {
+ .instance_size = sizeof(CommitBlockJob),
+ .job_type = "commit",
+ .set_speed = commit_set_speed,
+};
+
+void commit_start(BlockDriverState *bs, BlockDriverState *base,
+ BlockDriverState *top, int64_t speed,
+ BlockdevOnError on_error, BlockDriverCompletionFunc *cb,
+ void *opaque, Error **errp)
+{
+ CommitBlockJob *s;
+ BlockReopenQueue *reopen_queue = NULL;
+ int orig_overlay_flags;
+ int orig_base_flags;
+ BlockDriverState *overlay_bs;
+ Error *local_err = NULL;
+
+ if ((on_error == BLOCKDEV_ON_ERROR_STOP ||
+ on_error == BLOCKDEV_ON_ERROR_ENOSPC) &&
+ !bdrv_iostatus_is_enabled(bs)) {
+ error_set(errp, QERR_INVALID_PARAMETER_COMBINATION);
+ return;
+ }
+
+ /* Once we support top == active layer, remove this check */
+ if (top == bs) {
+ error_setg(errp,
+ "Top image as the active layer is currently unsupported");
+ return;
+ }
+
+ if (top == base) {
+ error_setg(errp, "Invalid files for merge: top and base are the same");
+ return;
+ }
+
+ /* top and base may be valid, but let's make sure that base is reachable
+ * from top */
+ if (bdrv_find_backing_image(top, base->filename) != base) {
+ error_setg(errp,
+ "Base (%s) is not reachable from top (%s)",
+ base->filename, top->filename);
+ return;
+ }
+
+ overlay_bs = bdrv_find_overlay(bs, top);
+
+ if (overlay_bs == NULL) {
+ error_setg(errp, "Could not find overlay image for %s:", top->filename);
+ return;
+ }
+
+ orig_base_flags = bdrv_get_flags(base);
+ orig_overlay_flags = bdrv_get_flags(overlay_bs);
+
+ /* convert base & overlay_bs to r/w, if necessary */
+ if (!(orig_base_flags & BDRV_O_RDWR)) {
+ reopen_queue = bdrv_reopen_queue(reopen_queue, base,
+ orig_base_flags | BDRV_O_RDWR);
+ }
+ if (!(orig_overlay_flags & BDRV_O_RDWR)) {
+ reopen_queue = bdrv_reopen_queue(reopen_queue, overlay_bs,
+ orig_overlay_flags | BDRV_O_RDWR);
+ }
+ if (reopen_queue) {
+ bdrv_reopen_multiple(reopen_queue, &local_err);
+ if (local_err != NULL) {
+ error_propagate(errp, local_err);
+ return;
+ }
+ }
+
+
+ s = block_job_create(&commit_job_type, bs, speed, cb, opaque, errp);
+ if (!s) {
+ return;
+ }
+
+ s->base = base;
+ s->top = top;
+ s->active = bs;
+
+ s->base_flags = orig_base_flags;
+ s->orig_overlay_flags = orig_overlay_flags;
+
+ s->on_error = on_error;
+ s->common.co = qemu_coroutine_create(commit_run);
+
+ trace_commit_start(bs, base, top, s, s->common.co, opaque);
+ qemu_coroutine_enter(s->common.co, s);
+}
diff --git a/block/gluster.c b/block/gluster.c
new file mode 100644
index 0000000000..3588d7377f
--- /dev/null
+++ b/block/gluster.c
@@ -0,0 +1,624 @@
+/*
+ * GlusterFS backend for QEMU
+ *
+ * Copyright (C) 2012 Bharata B Rao <bharata@linux.vnet.ibm.com>
+ *
+ * Pipe handling mechanism in AIO implementation is derived from
+ * block/rbd.c. Hence,
+ *
+ * Copyright (C) 2010-2011 Christian Brunner <chb@muc.de>,
+ * Josh Durgin <josh.durgin@dreamhost.com>
+ *
+ * This work is licensed under the terms of the GNU GPL, version 2. See
+ * the COPYING file in the top-level directory.
+ *
+ * Contributions after 2012-01-13 are licensed under the terms of the
+ * GNU GPL, version 2 or (at your option) any later version.
+ */
+#include <glusterfs/api/glfs.h>
+#include "block_int.h"
+#include "qemu_socket.h"
+#include "uri.h"
+
+typedef struct GlusterAIOCB {
+ BlockDriverAIOCB common;
+ int64_t size;
+ int ret;
+ bool *finished;
+ QEMUBH *bh;
+} GlusterAIOCB;
+
+typedef struct BDRVGlusterState {
+ struct glfs *glfs;
+ int fds[2];
+ struct glfs_fd *fd;
+ int qemu_aio_count;
+ int event_reader_pos;
+ GlusterAIOCB *event_acb;
+} BDRVGlusterState;
+
+#define GLUSTER_FD_READ 0
+#define GLUSTER_FD_WRITE 1
+
+typedef struct GlusterConf {
+ char *server;
+ int port;
+ char *volname;
+ char *image;
+ char *transport;
+} GlusterConf;
+
+static void qemu_gluster_gconf_free(GlusterConf *gconf)
+{
+ g_free(gconf->server);
+ g_free(gconf->volname);
+ g_free(gconf->image);
+ g_free(gconf->transport);
+ g_free(gconf);
+}
+
+static int parse_volume_options(GlusterConf *gconf, char *path)
+{
+ char *p, *q;
+
+ if (!path) {
+ return -EINVAL;
+ }
+
+ /* volume */
+ p = q = path + strspn(path, "/");
+ p += strcspn(p, "/");
+ if (*p == '\0') {
+ return -EINVAL;
+ }
+ gconf->volname = g_strndup(q, p - q);
+
+ /* image */
+ p += strspn(p, "/");
+ if (*p == '\0') {
+ return -EINVAL;
+ }
+ gconf->image = g_strdup(p);
+ return 0;
+}
+
+/*
+ * file=gluster[+transport]://[server[:port]]/volname/image[?socket=...]
+ *
+ * 'gluster' is the protocol.
+ *
+ * 'transport' specifies the transport type used to connect to gluster
+ * management daemon (glusterd). Valid transport types are
+ * tcp, unix and rdma. If a transport type isn't specified, then tcp
+ * type is assumed.
+ *
+ * 'server' specifies the server where the volume file specification for
+ * the given volume resides. This can be either hostname, ipv4 address
+ * or ipv6 address. ipv6 address needs to be within square brackets [ ].
+ * If transport type is 'unix', then 'server' field should not be specifed.
+ * The 'socket' field needs to be populated with the path to unix domain
+ * socket.
+ *
+ * 'port' is the port number on which glusterd is listening. This is optional
+ * and if not specified, QEMU will send 0 which will make gluster to use the
+ * default port. If the transport type is unix, then 'port' should not be
+ * specified.
+ *
+ * 'volname' is the name of the gluster volume which contains the VM image.
+ *
+ * 'image' is the path to the actual VM image that resides on gluster volume.
+ *
+ * Examples:
+ *
+ * file=gluster://1.2.3.4/testvol/a.img
+ * file=gluster+tcp://1.2.3.4/testvol/a.img
+ * file=gluster+tcp://1.2.3.4:24007/testvol/dir/a.img
+ * file=gluster+tcp://[1:2:3:4:5:6:7:8]/testvol/dir/a.img
+ * file=gluster+tcp://[1:2:3:4:5:6:7:8]:24007/testvol/dir/a.img
+ * file=gluster+tcp://server.domain.com:24007/testvol/dir/a.img
+ * file=gluster+unix:///testvol/dir/a.img?socket=/tmp/glusterd.socket
+ * file=gluster+rdma://1.2.3.4:24007/testvol/a.img
+ */
+static int qemu_gluster_parseuri(GlusterConf *gconf, const char *filename)
+{
+ URI *uri;
+ QueryParams *qp = NULL;
+ bool is_unix = false;
+ int ret = 0;
+
+ uri = uri_parse(filename);
+ if (!uri) {
+ return -EINVAL;
+ }
+
+ /* transport */
+ if (!strcmp(uri->scheme, "gluster")) {
+ gconf->transport = g_strdup("tcp");
+ } else if (!strcmp(uri->scheme, "gluster+tcp")) {
+ gconf->transport = g_strdup("tcp");
+ } else if (!strcmp(uri->scheme, "gluster+unix")) {
+ gconf->transport = g_strdup("unix");
+ is_unix = true;
+ } else if (!strcmp(uri->scheme, "gluster+rdma")) {
+ gconf->transport = g_strdup("rdma");
+ } else {
+ ret = -EINVAL;
+ goto out;
+ }
+
+ ret = parse_volume_options(gconf, uri->path);
+ if (ret < 0) {
+ goto out;
+ }
+
+ qp = query_params_parse(uri->query);
+ if (qp->n > 1 || (is_unix && !qp->n) || (!is_unix && qp->n)) {
+ ret = -EINVAL;
+ goto out;
+ }
+
+ if (is_unix) {
+ if (uri->server || uri->port) {
+ ret = -EINVAL;
+ goto out;
+ }
+ if (strcmp(qp->p[0].name, "socket")) {
+ ret = -EINVAL;
+ goto out;
+ }
+ gconf->server = g_strdup(qp->p[0].value);
+ } else {
+ gconf->server = g_strdup(uri->server);
+ gconf->port = uri->port;
+ }
+
+out:
+ if (qp) {
+ query_params_free(qp);
+ }
+ uri_free(uri);
+ return ret;
+}
+
+static struct glfs *qemu_gluster_init(GlusterConf *gconf, const char *filename)
+{
+ struct glfs *glfs = NULL;
+ int ret;
+ int old_errno;
+
+ ret = qemu_gluster_parseuri(gconf, filename);
+ if (ret < 0) {
+ error_report("Usage: file=gluster[+transport]://[server[:port]]/"
+ "volname/image[?socket=...]");
+ errno = -ret;
+ goto out;
+ }
+
+ glfs = glfs_new(gconf->volname);
+ if (!glfs) {
+ goto out;
+ }
+
+ ret = glfs_set_volfile_server(glfs, gconf->transport, gconf->server,
+ gconf->port);
+ if (ret < 0) {
+ goto out;
+ }
+
+ /*
+ * TODO: Use GF_LOG_ERROR instead of hard code value of 4 here when
+ * GlusterFS makes GF_LOG_* macros available to libgfapi users.
+ */
+ ret = glfs_set_logging(glfs, "-", 4);
+ if (ret < 0) {
+ goto out;
+ }
+
+ ret = glfs_init(glfs);
+ if (ret) {
+ error_report("Gluster connection failed for server=%s port=%d "
+ "volume=%s image=%s transport=%s\n", gconf->server, gconf->port,
+ gconf->volname, gconf->image, gconf->transport);
+ goto out;
+ }
+ return glfs;
+
+out:
+ if (glfs) {
+ old_errno = errno;
+ glfs_fini(glfs);
+ errno = old_errno;
+ }
+ return NULL;
+}
+
+static void qemu_gluster_complete_aio(GlusterAIOCB *acb, BDRVGlusterState *s)
+{
+ int ret;
+ bool *finished = acb->finished;
+ BlockDriverCompletionFunc *cb = acb->common.cb;
+ void *opaque = acb->common.opaque;
+
+ if (!acb->ret || acb->ret == acb->size) {
+ ret = 0; /* Success */
+ } else if (acb->ret < 0) {
+ ret = acb->ret; /* Read/Write failed */
+ } else {
+ ret = -EIO; /* Partial read/write - fail it */
+ }
+
+ s->qemu_aio_count--;
+ qemu_aio_release(acb);
+ cb(opaque, ret);
+ if (finished) {
+ *finished = true;
+ }
+}
+
+static void qemu_gluster_aio_event_reader(void *opaque)
+{
+ BDRVGlusterState *s = opaque;
+ ssize_t ret;
+
+ do {
+ char *p = (char *)&s->event_acb;
+
+ ret = read(s->fds[GLUSTER_FD_READ], p + s->event_reader_pos,
+ sizeof(s->event_acb) - s->event_reader_pos);
+ if (ret > 0) {
+ s->event_reader_pos += ret;
+ if (s->event_reader_pos == sizeof(s->event_acb)) {
+ s->event_reader_pos = 0;
+ qemu_gluster_complete_aio(s->event_acb, s);
+ }
+ }
+ } while (ret < 0 && errno == EINTR);
+}
+
+static int qemu_gluster_aio_flush_cb(void *opaque)
+{
+ BDRVGlusterState *s = opaque;
+
+ return (s->qemu_aio_count > 0);
+}
+
+static int qemu_gluster_open(BlockDriverState *bs, const char *filename,
+ int bdrv_flags)
+{
+ BDRVGlusterState *s = bs->opaque;
+ int open_flags = O_BINARY;
+ int ret = 0;
+ GlusterConf *gconf = g_malloc0(sizeof(GlusterConf));
+
+ s->glfs = qemu_gluster_init(gconf, filename);
+ if (!s->glfs) {
+ ret = -errno;
+ goto out;
+ }
+
+ if (bdrv_flags & BDRV_O_RDWR) {
+ open_flags |= O_RDWR;
+ } else {
+ open_flags |= O_RDONLY;
+ }
+
+ if ((bdrv_flags & BDRV_O_NOCACHE)) {
+ open_flags |= O_DIRECT;
+ }
+
+ s->fd = glfs_open(s->glfs, gconf->image, open_flags);
+ if (!s->fd) {
+ ret = -errno;
+ goto out;
+ }
+
+ ret = qemu_pipe(s->fds);
+ if (ret < 0) {
+ ret = -errno;
+ goto out;
+ }
+ fcntl(s->fds[GLUSTER_FD_READ], F_SETFL, O_NONBLOCK);
+ qemu_aio_set_fd_handler(s->fds[GLUSTER_FD_READ],
+ qemu_gluster_aio_event_reader, NULL, qemu_gluster_aio_flush_cb, s);
+
+out:
+ qemu_gluster_gconf_free(gconf);
+ if (!ret) {
+ return ret;
+ }
+ if (s->fd) {
+ glfs_close(s->fd);
+ }
+ if (s->glfs) {
+ glfs_fini(s->glfs);
+ }
+ return ret;
+}
+
+static int qemu_gluster_create(const char *filename,
+ QEMUOptionParameter *options)
+{
+ struct glfs *glfs;
+ struct glfs_fd *fd;
+ int ret = 0;
+ int64_t total_size = 0;
+ GlusterConf *gconf = g_malloc0(sizeof(GlusterConf));
+
+ glfs = qemu_gluster_init(gconf, filename);
+ if (!glfs) {
+ ret = -errno;
+ goto out;
+ }
+
+ while (options && options->name) {
+ if (!strcmp(options->name, BLOCK_OPT_SIZE)) {
+ total_size = options->value.n / BDRV_SECTOR_SIZE;
+ }
+ options++;
+ }
+
+ fd = glfs_creat(glfs, gconf->image,
+ O_WRONLY | O_CREAT | O_TRUNC | O_BINARY, S_IRUSR | S_IWUSR);
+ if (!fd) {
+ ret = -errno;
+ } else {
+ if (glfs_ftruncate(fd, total_size * BDRV_SECTOR_SIZE) != 0) {
+ ret = -errno;
+ }
+ if (glfs_close(fd) != 0) {
+ ret = -errno;
+ }
+ }
+out:
+ qemu_gluster_gconf_free(gconf);
+ if (glfs) {
+ glfs_fini(glfs);
+ }
+ return ret;
+}
+
+static void qemu_gluster_aio_cancel(BlockDriverAIOCB *blockacb)
+{
+ GlusterAIOCB *acb = (GlusterAIOCB *)blockacb;
+ bool finished = false;
+
+ acb->finished = &finished;
+ while (!finished) {
+ qemu_aio_wait();
+ }
+}
+
+static AIOPool gluster_aio_pool = {
+ .aiocb_size = sizeof(GlusterAIOCB),
+ .cancel = qemu_gluster_aio_cancel,
+};
+
+static void gluster_finish_aiocb(struct glfs_fd *fd, ssize_t ret, void *arg)
+{
+ GlusterAIOCB *acb = (GlusterAIOCB *)arg;
+ BlockDriverState *bs = acb->common.bs;
+ BDRVGlusterState *s = bs->opaque;
+ int retval;
+
+ acb->ret = ret;
+ retval = qemu_write_full(s->fds[GLUSTER_FD_WRITE], &acb, sizeof(acb));
+ if (retval != sizeof(acb)) {
+ /*
+ * Gluster AIO callback thread failed to notify the waiting
+ * QEMU thread about IO completion.
+ *
+ * Complete this IO request and make the disk inaccessible for
+ * subsequent reads and writes.
+ */
+ error_report("Gluster failed to notify QEMU about IO completion");
+
+ qemu_mutex_lock_iothread(); /* We are in gluster thread context */
+ acb->common.cb(acb->common.opaque, -EIO);
+ qemu_aio_release(acb);
+ s->qemu_aio_count--;
+ close(s->fds[GLUSTER_FD_READ]);
+ close(s->fds[GLUSTER_FD_WRITE]);
+ qemu_aio_set_fd_handler(s->fds[GLUSTER_FD_READ], NULL, NULL, NULL,
+ NULL);
+ bs->drv = NULL; /* Make the disk inaccessible */
+ qemu_mutex_unlock_iothread();
+ }
+}
+
+static BlockDriverAIOCB *qemu_gluster_aio_rw(BlockDriverState *bs,
+ int64_t sector_num, QEMUIOVector *qiov, int nb_sectors,
+ BlockDriverCompletionFunc *cb, void *opaque, int write)
+{
+ int ret;
+ GlusterAIOCB *acb;
+ BDRVGlusterState *s = bs->opaque;
+ size_t size;
+ off_t offset;
+
+ offset = sector_num * BDRV_SECTOR_SIZE;
+ size = nb_sectors * BDRV_SECTOR_SIZE;
+ s->qemu_aio_count++;
+
+ acb = qemu_aio_get(&gluster_aio_pool, bs, cb, opaque);
+ acb->size = size;
+ acb->ret = 0;
+ acb->finished = NULL;
+
+ if (write) {
+ ret = glfs_pwritev_async(s->fd, qiov->iov, qiov->niov, offset, 0,
+ &gluster_finish_aiocb, acb);
+ } else {
+ ret = glfs_preadv_async(s->fd, qiov->iov, qiov->niov, offset, 0,
+ &gluster_finish_aiocb, acb);
+ }
+
+ if (ret < 0) {
+ goto out;
+ }
+ return &acb->common;
+
+out:
+ s->qemu_aio_count--;
+ qemu_aio_release(acb);
+ return NULL;
+}
+
+static BlockDriverAIOCB *qemu_gluster_aio_readv(BlockDriverState *bs,
+ int64_t sector_num, QEMUIOVector *qiov, int nb_sectors,
+ BlockDriverCompletionFunc *cb, void *opaque)
+{
+ return qemu_gluster_aio_rw(bs, sector_num, qiov, nb_sectors, cb, opaque, 0);
+}
+
+static BlockDriverAIOCB *qemu_gluster_aio_writev(BlockDriverState *bs,
+ int64_t sector_num, QEMUIOVector *qiov, int nb_sectors,
+ BlockDriverCompletionFunc *cb, void *opaque)
+{
+ return qemu_gluster_aio_rw(bs, sector_num, qiov, nb_sectors, cb, opaque, 1);
+}
+
+static BlockDriverAIOCB *qemu_gluster_aio_flush(BlockDriverState *bs,
+ BlockDriverCompletionFunc *cb, void *opaque)
+{
+ int ret;
+ GlusterAIOCB *acb;
+ BDRVGlusterState *s = bs->opaque;
+
+ acb = qemu_aio_get(&gluster_aio_pool, bs, cb, opaque);
+ acb->size = 0;
+ acb->ret = 0;
+ acb->finished = NULL;
+ s->qemu_aio_count++;
+
+ ret = glfs_fsync_async(s->fd, &gluster_finish_aiocb, acb);
+ if (ret < 0) {
+ goto out;
+ }
+ return &acb->common;
+
+out:
+ s->qemu_aio_count--;
+ qemu_aio_release(acb);
+ return NULL;
+}
+
+static int64_t qemu_gluster_getlength(BlockDriverState *bs)
+{
+ BDRVGlusterState *s = bs->opaque;
+ int64_t ret;
+
+ ret = glfs_lseek(s->fd, 0, SEEK_END);
+ if (ret < 0) {
+ return -errno;
+ } else {
+ return ret;
+ }
+}
+
+static int64_t qemu_gluster_allocated_file_size(BlockDriverState *bs)
+{
+ BDRVGlusterState *s = bs->opaque;
+ struct stat st;
+ int ret;
+
+ ret = glfs_fstat(s->fd, &st);
+ if (ret < 0) {
+ return -errno;
+ } else {
+ return st.st_blocks * 512;
+ }
+}
+
+static void qemu_gluster_close(BlockDriverState *bs)
+{
+ BDRVGlusterState *s = bs->opaque;
+
+ close(s->fds[GLUSTER_FD_READ]);
+ close(s->fds[GLUSTER_FD_WRITE]);
+ qemu_aio_set_fd_handler(s->fds[GLUSTER_FD_READ], NULL, NULL, NULL, NULL);
+
+ if (s->fd) {
+ glfs_close(s->fd);
+ s->fd = NULL;
+ }
+ glfs_fini(s->glfs);
+}
+
+static QEMUOptionParameter qemu_gluster_create_options[] = {
+ {
+ .name = BLOCK_OPT_SIZE,
+ .type = OPT_SIZE,
+ .help = "Virtual disk size"
+ },
+ { NULL }
+};
+
+static BlockDriver bdrv_gluster = {
+ .format_name = "gluster",
+ .protocol_name = "gluster",
+ .instance_size = sizeof(BDRVGlusterState),
+ .bdrv_file_open = qemu_gluster_open,
+ .bdrv_close = qemu_gluster_close,
+ .bdrv_create = qemu_gluster_create,
+ .bdrv_getlength = qemu_gluster_getlength,
+ .bdrv_get_allocated_file_size = qemu_gluster_allocated_file_size,
+ .bdrv_aio_readv = qemu_gluster_aio_readv,
+ .bdrv_aio_writev = qemu_gluster_aio_writev,
+ .bdrv_aio_flush = qemu_gluster_aio_flush,
+ .create_options = qemu_gluster_create_options,
+};
+
+static BlockDriver bdrv_gluster_tcp = {
+ .format_name = "gluster",
+ .protocol_name = "gluster+tcp",
+ .instance_size = sizeof(BDRVGlusterState),
+ .bdrv_file_open = qemu_gluster_open,
+ .bdrv_close = qemu_gluster_close,
+ .bdrv_create = qemu_gluster_create,
+ .bdrv_getlength = qemu_gluster_getlength,
+ .bdrv_get_allocated_file_size = qemu_gluster_allocated_file_size,
+ .bdrv_aio_readv = qemu_gluster_aio_readv,
+ .bdrv_aio_writev = qemu_gluster_aio_writev,
+ .bdrv_aio_flush = qemu_gluster_aio_flush,
+ .create_options = qemu_gluster_create_options,
+};
+
+static BlockDriver bdrv_gluster_unix = {
+ .format_name = "gluster",
+ .protocol_name = "gluster+unix",
+ .instance_size = sizeof(BDRVGlusterState),
+ .bdrv_file_open = qemu_gluster_open,
+ .bdrv_close = qemu_gluster_close,
+ .bdrv_create = qemu_gluster_create,
+ .bdrv_getlength = qemu_gluster_getlength,
+ .bdrv_get_allocated_file_size = qemu_gluster_allocated_file_size,
+ .bdrv_aio_readv = qemu_gluster_aio_readv,
+ .bdrv_aio_writev = qemu_gluster_aio_writev,
+ .bdrv_aio_flush = qemu_gluster_aio_flush,
+ .create_options = qemu_gluster_create_options,
+};
+
+static BlockDriver bdrv_gluster_rdma = {
+ .format_name = "gluster",
+ .protocol_name = "gluster+rdma",
+ .instance_size = sizeof(BDRVGlusterState),
+ .bdrv_file_open = qemu_gluster_open,
+ .bdrv_close = qemu_gluster_close,
+ .bdrv_create = qemu_gluster_create,
+ .bdrv_getlength = qemu_gluster_getlength,
+ .bdrv_get_allocated_file_size = qemu_gluster_allocated_file_size,
+ .bdrv_aio_readv = qemu_gluster_aio_readv,
+ .bdrv_aio_writev = qemu_gluster_aio_writev,
+ .bdrv_aio_flush = qemu_gluster_aio_flush,
+ .create_options = qemu_gluster_create_options,
+};
+
+static void bdrv_gluster_init(void)
+{
+ bdrv_register(&bdrv_gluster_rdma);
+ bdrv_register(&bdrv_gluster_unix);
+ bdrv_register(&bdrv_gluster_tcp);
+ bdrv_register(&bdrv_gluster);
+}
+
+block_init(bdrv_gluster_init);
diff --git a/block/stream.c b/block/stream.c
index c4f87dd5b6..792665276e 100644
--- a/block/stream.c
+++ b/block/stream.c
@@ -13,6 +13,7 @@
#include "trace.h"
#include "block_int.h"
+#include "blockjob.h"
#include "qemu/ratelimit.h"
enum {
@@ -30,6 +31,7 @@ typedef struct StreamBlockJob {
BlockJob common;
RateLimit limit;
BlockDriverState *base;
+ BlockdevOnError on_error;
char backing_file_id[1024];
} StreamBlockJob;
@@ -77,6 +79,7 @@ static void coroutine_fn stream_run(void *opaque)
BlockDriverState *bs = s->common.bs;
BlockDriverState *base = s->base;
int64_t sector_num, end;
+ int error = 0;
int ret = 0;
int n = 0;
void *buf;
@@ -141,7 +144,19 @@ wait:
ret = stream_populate(bs, sector_num, n, buf);
}
if (ret < 0) {
- break;
+ BlockErrorAction action =
+ block_job_error_action(&s->common, s->common.bs, s->on_error,
+ true, -ret);
+ if (action == BDRV_ACTION_STOP) {
+ n = 0;
+ continue;
+ }
+ if (error == 0) {
+ error = ret;
+ }
+ if (action == BDRV_ACTION_REPORT) {
+ break;
+ }
}
ret = 0;
@@ -153,6 +168,9 @@ wait:
bdrv_disable_copy_on_read(bs);
}
+ /* Do not remove the backing file if an error was there but ignored. */
+ ret = error;
+
if (!block_job_is_cancelled(&s->common) && sector_num == end && ret == 0) {
const char *base_id = NULL, *base_fmt = NULL;
if (base) {
@@ -188,11 +206,19 @@ static BlockJobType stream_job_type = {
void stream_start(BlockDriverState *bs, BlockDriverState *base,
const char *base_id, int64_t speed,
+ BlockdevOnError on_error,
BlockDriverCompletionFunc *cb,
void *opaque, Error **errp)
{
StreamBlockJob *s;
+ if ((on_error == BLOCKDEV_ON_ERROR_STOP ||
+ on_error == BLOCKDEV_ON_ERROR_ENOSPC) &&
+ !bdrv_iostatus_is_enabled(bs)) {
+ error_set(errp, QERR_INVALID_PARAMETER, "on-error");
+ return;
+ }
+
s = block_job_create(&stream_job_type, bs, speed, cb, opaque, errp);
if (!s) {
return;
@@ -203,6 +229,7 @@ void stream_start(BlockDriverState *bs, BlockDriverState *base,
pstrcpy(s->backing_file_id, sizeof(s->backing_file_id), base_id);
}
+ s->on_error = on_error;
s->common.co = qemu_coroutine_create(stream_run);
trace_stream_start(bs, base, s, s->common.co, opaque);
qemu_coroutine_enter(s->common.co, s);
diff --git a/block_int.h b/block_int.h
index ac4245cb18..f4bae04401 100644
--- a/block_int.h
+++ b/block_int.h
@@ -31,6 +31,7 @@
#include "qemu-timer.h"
#include "qapi-types.h"
#include "qerror.h"
+#include "monitor.h"
#define BLOCK_FLAG_ENCRYPT 1
#define BLOCK_FLAG_COMPAT6 4
@@ -67,73 +68,6 @@ typedef struct BlockIOBaseValue {
uint64_t ios[2];
} BlockIOBaseValue;
-typedef struct BlockJob BlockJob;
-
-/**
- * BlockJobType:
- *
- * A class type for block job objects.
- */
-typedef struct BlockJobType {
- /** Derived BlockJob struct size */
- size_t instance_size;
-
- /** String describing the operation, part of query-block-jobs QMP API */
- const char *job_type;
-
- /** Optional callback for job types that support setting a speed limit */
- void (*set_speed)(BlockJob *job, int64_t speed, Error **errp);
-} BlockJobType;
-
-/**
- * BlockJob:
- *
- * Long-running operation on a BlockDriverState.
- */
-struct BlockJob {
- /** The job type, including the job vtable. */
- const BlockJobType *job_type;
-
- /** The block device on which the job is operating. */
- BlockDriverState *bs;
-
- /**
- * The coroutine that executes the job. If not NULL, it is
- * reentered when busy is false and the job is cancelled.
- */
- Coroutine *co;
-
- /**
- * Set to true if the job should cancel itself. The flag must
- * always be tested just before toggling the busy flag from false
- * to true. After a job has been cancelled, it should only yield
- * if #qemu_aio_wait will ("sooner or later") reenter the coroutine.
- */
- bool cancelled;
-
- /**
- * Set to false by the job while it is in a quiescent state, where
- * no I/O is pending and the job has yielded on any condition
- * that is not detected by #qemu_aio_wait, such as a timer.
- */
- bool busy;
-
- /** Offset that is published by the query-block-jobs QMP API */
- int64_t offset;
-
- /** Length that is published by the query-block-jobs QMP API */
- int64_t len;
-
- /** Speed that was set with @block_job_set_speed. */
- int64_t speed;
-
- /** The completion function that will be called when the job completes. */
- BlockDriverCompletionFunc *cb;
-
- /** The opaque value that is passed to the completion function. */
- void *opaque;
-};
-
struct BlockDriver {
const char *format_name;
int instance_size;
@@ -329,7 +263,7 @@ struct BlockDriverState {
/* NOTE: the following infos are only hints for real hardware
drivers. They are not used by the block driver */
- BlockErrorAction on_read_error, on_write_error;
+ BlockdevOnError on_read_error, on_write_error;
bool iostatus_enabled;
BlockDeviceIoStatus iostatus;
char device_name[32];
@@ -353,92 +287,9 @@ void bdrv_set_io_limits(BlockDriverState *bs,
#ifdef _WIN32
int is_windows_drive(const char *filename);
#endif
-
-/**
- * block_job_create:
- * @job_type: The class object for the newly-created job.
- * @bs: The block
- * @speed: The maximum speed, in bytes per second, or 0 for unlimited.
- * @cb: Completion function for the job.
- * @opaque: Opaque pointer value passed to @cb.
- * @errp: Error object.
- *
- * Create a new long-running block device job and return it. The job
- * will call @cb asynchronously when the job completes. Note that
- * @bs may have been closed at the time the @cb it is called. If
- * this is the case, the job may be reported as either cancelled or
- * completed.
- *
- * This function is not part of the public job interface; it should be
- * called from a wrapper that is specific to the job type.
- */
-void *block_job_create(const BlockJobType *job_type, BlockDriverState *bs,
- int64_t speed, BlockDriverCompletionFunc *cb,
- void *opaque, Error **errp);
-
-/**
- * block_job_sleep_ns:
- * @job: The job that calls the function.
- * @clock: The clock to sleep on.
- * @ns: How many nanoseconds to stop for.
- *
- * Put the job to sleep (assuming that it wasn't canceled) for @ns
- * nanoseconds. Canceling the job will interrupt the wait immediately.
- */
-void block_job_sleep_ns(BlockJob *job, QEMUClock *clock, int64_t ns);
-
-/**
- * block_job_complete:
- * @job: The job being completed.
- * @ret: The status code.
- *
- * Call the completion function that was registered at creation time, and
- * free @job.
- */
-void block_job_complete(BlockJob *job, int ret);
-
-/**
- * block_job_set_speed:
- * @job: The job to set the speed for.
- * @speed: The new value
- * @errp: Error object.
- *
- * Set a rate-limiting parameter for the job; the actual meaning may
- * vary depending on the job type.
- */
-void block_job_set_speed(BlockJob *job, int64_t speed, Error **errp);
-
-/**
- * block_job_cancel:
- * @job: The job to be canceled.
- *
- * Asynchronously cancel the specified job.
- */
-void block_job_cancel(BlockJob *job);
-
-/**
- * block_job_is_cancelled:
- * @job: The job being queried.
- *
- * Returns whether the job is scheduled for cancellation.
- */
-bool block_job_is_cancelled(BlockJob *job);
-
-/**
- * block_job_cancel:
- * @job: The job to be canceled.
- *
- * Asynchronously cancel the job and wait for it to reach a quiescent
- * state. Note that the completion callback will still be called
- * asynchronously, hence it is *not* valid to call #bdrv_delete
- * immediately after #block_job_cancel_sync. Users of block jobs
- * will usually protect the BlockDriverState objects with a reference
- * count, should this be a concern.
- *
- * Returns the return value from the job if the job actually completed
- * during the call, or -ECANCELED if it was canceled.
- */
-int block_job_cancel_sync(BlockJob *job);
+void bdrv_emit_qmp_error_event(const BlockDriverState *bdrv,
+ enum MonitorEvent ev,
+ BlockErrorAction action, bool is_read);
/**
* stream_start:
@@ -448,6 +299,7 @@ int block_job_cancel_sync(BlockJob *job);
* @base_id: The file name that will be written to @bs as the new
* backing file if the job completes. Ignored if @base is %NULL.
* @speed: The maximum speed, in bytes per second, or 0 for unlimited.
+ * @on_error: The action to take upon error.
* @cb: Completion function for the job.
* @opaque: Opaque pointer value passed to @cb.
* @errp: Error object.
@@ -459,8 +311,24 @@ int block_job_cancel_sync(BlockJob *job);
* @base_id in the written image and to @base in the live BlockDriverState.
*/
void stream_start(BlockDriverState *bs, BlockDriverState *base,
- const char *base_id, int64_t speed,
+ const char *base_id, int64_t speed, BlockdevOnError on_error,
BlockDriverCompletionFunc *cb,
void *opaque, Error **errp);
+/**
+ * commit_start:
+ * @bs: Top Block device
+ * @base: Block device that will be written into, and become the new top
+ * @speed: The maximum speed, in bytes per second, or 0 for unlimited.
+ * @on_error: The action to take upon error.
+ * @cb: Completion function for the job.
+ * @opaque: Opaque pointer value passed to @cb.
+ * @errp: Error object.
+ *
+ */
+void commit_start(BlockDriverState *bs, BlockDriverState *base,
+ BlockDriverState *top, int64_t speed,
+ BlockdevOnError on_error, BlockDriverCompletionFunc *cb,
+ void *opaque, Error **errp);
+
#endif /* BLOCK_INT_H */
diff --git a/blockdev.c b/blockdev.c
index e5d450f0bb..5f18dfa97b 100644
--- a/blockdev.c
+++ b/blockdev.c
@@ -9,6 +9,7 @@
#include "blockdev.h"
#include "hw/block-common.h"
+#include "blockjob.h"
#include "monitor.h"
#include "qerror.h"
#include "qemu-option.h"
@@ -237,16 +238,16 @@ static void drive_put_ref_bh_schedule(DriveInfo *dinfo)
qemu_bh_schedule(s->bh);
}
-static int parse_block_error_action(const char *buf, int is_read)
+static int parse_block_error_action(const char *buf, bool is_read)
{
if (!strcmp(buf, "ignore")) {
- return BLOCK_ERR_IGNORE;
+ return BLOCKDEV_ON_ERROR_IGNORE;
} else if (!is_read && !strcmp(buf, "enospc")) {
- return BLOCK_ERR_STOP_ENOSPC;
+ return BLOCKDEV_ON_ERROR_ENOSPC;
} else if (!strcmp(buf, "stop")) {
- return BLOCK_ERR_STOP_ANY;
+ return BLOCKDEV_ON_ERROR_STOP;
} else if (!strcmp(buf, "report")) {
- return BLOCK_ERR_REPORT;
+ return BLOCKDEV_ON_ERROR_REPORT;
} else {
error_report("'%s' invalid %s error action",
buf, is_read ? "read" : "write");
@@ -432,7 +433,7 @@ DriveInfo *drive_init(QemuOpts *opts, int default_to_scsi)
return NULL;
}
- on_write_error = BLOCK_ERR_STOP_ENOSPC;
+ on_write_error = BLOCKDEV_ON_ERROR_ENOSPC;
if ((buf = qemu_opt_get(opts, "werror")) != NULL) {
if (type != IF_IDE && type != IF_SCSI && type != IF_VIRTIO && type != IF_NONE) {
error_report("werror is not supported by this bus type");
@@ -445,7 +446,7 @@ DriveInfo *drive_init(QemuOpts *opts, int default_to_scsi)
}
}
- on_read_error = BLOCK_ERR_REPORT;
+ on_read_error = BLOCKDEV_ON_ERROR_REPORT;
if ((buf = qemu_opt_get(opts, "rerror")) != NULL) {
if (type != IF_IDE && type != IF_VIRTIO && type != IF_SCSI && type != IF_NONE) {
error_report("rerror is not supported by this bus type");
@@ -805,6 +806,11 @@ void qmp_transaction(BlockdevActionList *dev_list, Error **errp)
QSIMPLEQ_FOREACH(states, &snap_bdrv_states, entry) {
/* This removes our old bs from the bdrv_states, and adds the new bs */
bdrv_append(states->new_bs, states->old_bs);
+ /* We don't need (or want) to use the transactional
+ * bdrv_reopen_multiple() across all the entries at once, because we
+ * don't want to abort all of them if one of them fails the reopen */
+ bdrv_reopen(states->new_bs, states->new_bs->open_flags & ~BDRV_O_RDWR,
+ NULL);
}
/* success */
@@ -1065,12 +1071,12 @@ static QObject *qobject_from_block_job(BlockJob *job)
job->speed);
}
-static void block_stream_cb(void *opaque, int ret)
+static void block_job_cb(void *opaque, int ret)
{
BlockDriverState *bs = opaque;
QObject *obj;
- trace_block_stream_cb(bs, bs->job, ret);
+ trace_block_job_cb(bs, bs->job, ret);
assert(bs->job);
obj = qobject_from_block_job(bs->job);
@@ -1090,13 +1096,18 @@ static void block_stream_cb(void *opaque, int ret)
}
void qmp_block_stream(const char *device, bool has_base,
- const char *base, bool has_speed,
- int64_t speed, Error **errp)
+ const char *base, bool has_speed, int64_t speed,
+ bool has_on_error, BlockdevOnError on_error,
+ Error **errp)
{
BlockDriverState *bs;
BlockDriverState *base_bs = NULL;
Error *local_err = NULL;
+ if (!has_on_error) {
+ on_error = BLOCKDEV_ON_ERROR_REPORT;
+ }
+
bs = bdrv_find(device);
if (!bs) {
error_set(errp, QERR_DEVICE_NOT_FOUND, device);
@@ -1112,7 +1123,7 @@ void qmp_block_stream(const char *device, bool has_base,
}
stream_start(bs, base_bs, base, has_speed ? speed : 0,
- block_stream_cb, bs, &local_err);
+ on_error, block_job_cb, bs, &local_err);
if (error_is_set(&local_err)) {
error_propagate(errp, local_err);
return;
@@ -1126,6 +1137,64 @@ void qmp_block_stream(const char *device, bool has_base,
trace_qmp_block_stream(bs, bs->job);
}
+void qmp_block_commit(const char *device,
+ bool has_base, const char *base, const char *top,
+ bool has_speed, int64_t speed,
+ Error **errp)
+{
+ BlockDriverState *bs;
+ BlockDriverState *base_bs, *top_bs;
+ Error *local_err = NULL;
+ /* This will be part of the QMP command, if/when the
+ * BlockdevOnError change for blkmirror makes it in
+ */
+ BlockdevOnError on_error = BLOCKDEV_ON_ERROR_REPORT;
+
+ /* drain all i/o before commits */
+ bdrv_drain_all();
+
+ bs = bdrv_find(device);
+ if (!bs) {
+ error_set(errp, QERR_DEVICE_NOT_FOUND, device);
+ return;
+ }
+ if (base && has_base) {
+ base_bs = bdrv_find_backing_image(bs, base);
+ } else {
+ base_bs = bdrv_find_base(bs);
+ }
+
+ if (base_bs == NULL) {
+ error_set(errp, QERR_BASE_NOT_FOUND, base ? base : "NULL");
+ return;
+ }
+
+ /* default top_bs is the active layer */
+ top_bs = bs;
+
+ if (top) {
+ if (strcmp(bs->filename, top) != 0) {
+ top_bs = bdrv_find_backing_image(bs, top);
+ }
+ }
+
+ if (top_bs == NULL) {
+ error_setg(errp, "Top image file %s not found", top ? top : "NULL");
+ return;
+ }
+
+ commit_start(bs, base_bs, top_bs, speed, on_error, block_job_cb, bs,
+ &local_err);
+ if (local_err != NULL) {
+ error_propagate(errp, local_err);
+ return;
+ }
+ /* Grab a reference so hotplug does not delete the BlockDriverState from
+ * underneath us.
+ */
+ drive_get_ref(drive_get_by_blockdev(bs));
+}
+
static BlockJob *find_block_job(const char *device)
{
BlockDriverState *bs;
@@ -1142,19 +1211,28 @@ void qmp_block_job_set_speed(const char *device, int64_t speed, Error **errp)
BlockJob *job = find_block_job(device);
if (!job) {
- error_set(errp, QERR_DEVICE_NOT_ACTIVE, device);
+ error_set(errp, QERR_BLOCK_JOB_NOT_ACTIVE, device);
return;
}
block_job_set_speed(job, speed, errp);
}
-void qmp_block_job_cancel(const char *device, Error **errp)
+void qmp_block_job_cancel(const char *device,
+ bool has_force, bool force, Error **errp)
{
BlockJob *job = find_block_job(device);
+ if (!has_force) {
+ force = false;
+ }
+
if (!job) {
- error_set(errp, QERR_DEVICE_NOT_ACTIVE, device);
+ error_set(errp, QERR_BLOCK_JOB_NOT_ACTIVE, device);
+ return;
+ }
+ if (job->paused && !force) {
+ error_set(errp, QERR_BLOCK_JOB_PAUSED, device);
return;
}
@@ -1162,25 +1240,40 @@ void qmp_block_job_cancel(const char *device, Error **errp)
block_job_cancel(job);
}
+void qmp_block_job_pause(const char *device, Error **errp)
+{
+ BlockJob *job = find_block_job(device);
+
+ if (!job) {
+ error_set(errp, QERR_BLOCK_JOB_NOT_ACTIVE, device);
+ return;
+ }
+
+ trace_qmp_block_job_pause(job);
+ block_job_pause(job);
+}
+
+void qmp_block_job_resume(const char *device, Error **errp)
+{
+ BlockJob *job = find_block_job(device);
+
+ if (!job) {
+ error_set(errp, QERR_BLOCK_JOB_NOT_ACTIVE, device);
+ return;
+ }
+
+ trace_qmp_block_job_resume(job);
+ block_job_resume(job);
+}
+
static void do_qmp_query_block_jobs_one(void *opaque, BlockDriverState *bs)
{
BlockJobInfoList **prev = opaque;
BlockJob *job = bs->job;
if (job) {
- BlockJobInfoList *elem;
- BlockJobInfo *info = g_new(BlockJobInfo, 1);
- *info = (BlockJobInfo){
- .type = g_strdup(job->job_type->job_type),
- .device = g_strdup(bdrv_get_device_name(bs)),
- .len = job->len,
- .offset = job->offset,
- .speed = job->speed,
- };
-
- elem = g_new0(BlockJobInfoList, 1);
- elem->value = info;
-
+ BlockJobInfoList *elem = g_new0(BlockJobInfoList, 1);
+ elem->value = block_job_query(bs->job);
(*prev)->next = elem;
*prev = elem;
}
diff --git a/blockjob.c b/blockjob.c
new file mode 100644
index 0000000000..f55f55a193
--- /dev/null
+++ b/blockjob.c
@@ -0,0 +1,249 @@
+/*
+ * QEMU System Emulator block driver
+ *
+ * Copyright (c) 2011 IBM Corp.
+ * Copyright (c) 2012 Red Hat, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+ * THE SOFTWARE.
+ */
+
+#include "config-host.h"
+#include "qemu-common.h"
+#include "trace.h"
+#include "monitor.h"
+#include "block.h"
+#include "blockjob.h"
+#include "block_int.h"
+#include "qjson.h"
+#include "qemu-coroutine.h"
+#include "qmp-commands.h"
+#include "qemu-timer.h"
+
+void *block_job_create(const BlockJobType *job_type, BlockDriverState *bs,
+ int64_t speed, BlockDriverCompletionFunc *cb,
+ void *opaque, Error **errp)
+{
+ BlockJob *job;
+
+ if (bs->job || bdrv_in_use(bs)) {
+ error_set(errp, QERR_DEVICE_IN_USE, bdrv_get_device_name(bs));
+ return NULL;
+ }
+ bdrv_set_in_use(bs, 1);
+
+ job = g_malloc0(job_type->instance_size);
+ job->job_type = job_type;
+ job->bs = bs;
+ job->cb = cb;
+ job->opaque = opaque;
+ job->busy = true;
+ bs->job = job;
+
+ /* Only set speed when necessary to avoid NotSupported error */
+ if (speed != 0) {
+ Error *local_err = NULL;
+
+ block_job_set_speed(job, speed, &local_err);
+ if (error_is_set(&local_err)) {
+ bs->job = NULL;
+ g_free(job);
+ bdrv_set_in_use(bs, 0);
+ error_propagate(errp, local_err);
+ return NULL;
+ }
+ }
+ return job;
+}
+
+void block_job_complete(BlockJob *job, int ret)
+{
+ BlockDriverState *bs = job->bs;
+
+ assert(bs->job == job);
+ job->cb(job->opaque, ret);
+ bs->job = NULL;
+ g_free(job);
+ bdrv_set_in_use(bs, 0);
+}
+
+void block_job_set_speed(BlockJob *job, int64_t speed, Error **errp)
+{
+ Error *local_err = NULL;
+
+ if (!job->job_type->set_speed) {
+ error_set(errp, QERR_NOT_SUPPORTED);
+ return;
+ }
+ job->job_type->set_speed(job, speed, &local_err);
+ if (error_is_set(&local_err)) {
+ error_propagate(errp, local_err);
+ return;
+ }
+
+ job->speed = speed;
+}
+
+void block_job_pause(BlockJob *job)
+{
+ job->paused = true;
+}
+
+bool block_job_is_paused(BlockJob *job)
+{
+ return job->paused;
+}
+
+void block_job_resume(BlockJob *job)
+{
+ job->paused = false;
+ block_job_iostatus_reset(job);
+ if (job->co && !job->busy) {
+ qemu_coroutine_enter(job->co, NULL);
+ }
+}
+
+void block_job_cancel(BlockJob *job)
+{
+ job->cancelled = true;
+ block_job_resume(job);
+}
+
+bool block_job_is_cancelled(BlockJob *job)
+{
+ return job->cancelled;
+}
+
+void block_job_iostatus_reset(BlockJob *job)
+{
+ job->iostatus = BLOCK_DEVICE_IO_STATUS_OK;
+}
+
+struct BlockCancelData {
+ BlockJob *job;
+ BlockDriverCompletionFunc *cb;
+ void *opaque;
+ bool cancelled;
+ int ret;
+};
+
+static void block_job_cancel_cb(void *opaque, int ret)
+{
+ struct BlockCancelData *data = opaque;
+
+ data->cancelled = block_job_is_cancelled(data->job);
+ data->ret = ret;
+ data->cb(data->opaque, ret);
+}
+
+int block_job_cancel_sync(BlockJob *job)
+{
+ struct BlockCancelData data;
+ BlockDriverState *bs = job->bs;
+
+ assert(bs->job == job);
+
+ /* Set up our own callback to store the result and chain to
+ * the original callback.
+ */
+ data.job = job;
+ data.cb = job->cb;
+ data.opaque = job->opaque;
+ data.ret = -EINPROGRESS;
+ job->cb = block_job_cancel_cb;
+ job->opaque = &data;
+ block_job_cancel(job);
+ while (data.ret == -EINPROGRESS) {
+ qemu_aio_wait();
+ }
+ return (data.cancelled && data.ret == 0) ? -ECANCELED : data.ret;
+}
+
+void block_job_sleep_ns(BlockJob *job, QEMUClock *clock, int64_t ns)
+{
+ assert(job->busy);
+
+ /* Check cancellation *before* setting busy = false, too! */
+ if (block_job_is_cancelled(job)) {
+ return;
+ }
+
+ job->busy = false;
+ if (block_job_is_paused(job)) {
+ qemu_coroutine_yield();
+ } else {
+ co_sleep_ns(clock, ns);
+ }
+ job->busy = true;
+}
+
+BlockJobInfo *block_job_query(BlockJob *job)
+{
+ BlockJobInfo *info = g_new0(BlockJobInfo, 1);
+ info->type = g_strdup(job->job_type->job_type);
+ info->device = g_strdup(bdrv_get_device_name(job->bs));
+ info->len = job->len;
+ info->busy = job->busy;
+ info->paused = job->paused;
+ info->offset = job->offset;
+ info->speed = job->speed;
+ info->io_status = job->iostatus;
+ return info;
+}
+
+static void block_job_iostatus_set_err(BlockJob *job, int error)
+{
+ if (job->iostatus == BLOCK_DEVICE_IO_STATUS_OK) {
+ job->iostatus = error == ENOSPC ? BLOCK_DEVICE_IO_STATUS_NOSPACE :
+ BLOCK_DEVICE_IO_STATUS_FAILED;
+ }
+}
+
+
+BlockErrorAction block_job_error_action(BlockJob *job, BlockDriverState *bs,
+ BlockdevOnError on_err,
+ int is_read, int error)
+{
+ BlockErrorAction action;
+
+ switch (on_err) {
+ case BLOCKDEV_ON_ERROR_ENOSPC:
+ action = (error == ENOSPC) ? BDRV_ACTION_STOP : BDRV_ACTION_REPORT;
+ break;
+ case BLOCKDEV_ON_ERROR_STOP:
+ action = BDRV_ACTION_STOP;
+ break;
+ case BLOCKDEV_ON_ERROR_REPORT:
+ action = BDRV_ACTION_REPORT;
+ break;
+ case BLOCKDEV_ON_ERROR_IGNORE:
+ action = BDRV_ACTION_IGNORE;
+ break;
+ default:
+ abort();
+ }
+ bdrv_emit_qmp_error_event(job->bs, QEVENT_BLOCK_JOB_ERROR, action, is_read);
+ if (action == BDRV_ACTION_STOP) {
+ block_job_pause(job);
+ block_job_iostatus_set_err(job, error);
+ if (bs != job->bs) {
+ bdrv_iostatus_set_err(bs, error);
+ }
+ }
+ return action;
+}
diff --git a/blockjob.h b/blockjob.h
new file mode 100644
index 0000000000..930cc3c46a
--- /dev/null
+++ b/blockjob.h
@@ -0,0 +1,243 @@
+/*
+ * Declarations for long-running block device operations
+ *
+ * Copyright (c) 2011 IBM Corp.
+ * Copyright (c) 2012 Red Hat, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+ * THE SOFTWARE.
+ */
+#ifndef BLOCKJOB_H
+#define BLOCKJOB_H 1
+
+#include "block.h"
+
+/**
+ * BlockJobType:
+ *
+ * A class type for block job objects.
+ */
+typedef struct BlockJobType {
+ /** Derived BlockJob struct size */
+ size_t instance_size;
+
+ /** String describing the operation, part of query-block-jobs QMP API */
+ const char *job_type;
+
+ /** Optional callback for job types that support setting a speed limit */
+ void (*set_speed)(BlockJob *job, int64_t speed, Error **errp);
+} BlockJobType;
+
+/**
+ * BlockJob:
+ *
+ * Long-running operation on a BlockDriverState.
+ */
+struct BlockJob {
+ /** The job type, including the job vtable. */
+ const BlockJobType *job_type;
+
+ /** The block device on which the job is operating. */
+ BlockDriverState *bs;
+
+ /**
+ * The coroutine that executes the job. If not NULL, it is
+ * reentered when busy is false and the job is cancelled.
+ */
+ Coroutine *co;
+
+ /**
+ * Set to true if the job should cancel itself. The flag must
+ * always be tested just before toggling the busy flag from false
+ * to true. After a job has been cancelled, it should only yield
+ * if #qemu_aio_wait will ("sooner or later") reenter the coroutine.
+ */
+ bool cancelled;
+
+ /**
+ * Set to true if the job is either paused, or will pause itself
+ * as soon as possible (if busy == true).
+ */
+ bool paused;
+
+ /**
+ * Set to false by the job while it is in a quiescent state, where
+ * no I/O is pending and the job has yielded on any condition
+ * that is not detected by #qemu_aio_wait, such as a timer.
+ */
+ bool busy;
+
+ /** Status that is published by the query-block-jobs QMP API */
+ BlockDeviceIoStatus iostatus;
+
+ /** Offset that is published by the query-block-jobs QMP API */
+ int64_t offset;
+
+ /** Length that is published by the query-block-jobs QMP API */
+ int64_t len;
+
+ /** Speed that was set with @block_job_set_speed. */
+ int64_t speed;
+
+ /** The completion function that will be called when the job completes. */
+ BlockDriverCompletionFunc *cb;
+
+ /** The opaque value that is passed to the completion function. */
+ void *opaque;
+};
+
+/**
+ * block_job_create:
+ * @job_type: The class object for the newly-created job.
+ * @bs: The block
+ * @speed: The maximum speed, in bytes per second, or 0 for unlimited.
+ * @cb: Completion function for the job.
+ * @opaque: Opaque pointer value passed to @cb.
+ * @errp: Error object.
+ *
+ * Create a new long-running block device job and return it. The job
+ * will call @cb asynchronously when the job completes. Note that
+ * @bs may have been closed at the time the @cb it is called. If
+ * this is the case, the job may be reported as either cancelled or
+ * completed.
+ *
+ * This function is not part of the public job interface; it should be
+ * called from a wrapper that is specific to the job type.
+ */
+void *block_job_create(const BlockJobType *job_type, BlockDriverState *bs,
+ int64_t speed, BlockDriverCompletionFunc *cb,
+ void *opaque, Error **errp);
+
+/**
+ * block_job_sleep_ns:
+ * @job: The job that calls the function.
+ * @clock: The clock to sleep on.
+ * @ns: How many nanoseconds to stop for.
+ *
+ * Put the job to sleep (assuming that it wasn't canceled) for @ns
+ * nanoseconds. Canceling the job will interrupt the wait immediately.
+ */
+void block_job_sleep_ns(BlockJob *job, QEMUClock *clock, int64_t ns);
+
+/**
+ * block_job_complete:
+ * @job: The job being completed.
+ * @ret: The status code.
+ *
+ * Call the completion function that was registered at creation time, and
+ * free @job.
+ */
+void block_job_complete(BlockJob *job, int ret);
+
+/**
+ * block_job_set_speed:
+ * @job: The job to set the speed for.
+ * @speed: The new value
+ * @errp: Error object.
+ *
+ * Set a rate-limiting parameter for the job; the actual meaning may
+ * vary depending on the job type.
+ */
+void block_job_set_speed(BlockJob *job, int64_t speed, Error **errp);
+
+/**
+ * block_job_cancel:
+ * @job: The job to be canceled.
+ *
+ * Asynchronously cancel the specified job.
+ */
+void block_job_cancel(BlockJob *job);
+
+/**
+ * block_job_is_cancelled:
+ * @job: The job being queried.
+ *
+ * Returns whether the job is scheduled for cancellation.
+ */
+bool block_job_is_cancelled(BlockJob *job);
+
+/**
+ * block_job_query:
+ * @job: The job to get information about.
+ *
+ * Return information about a job.
+ */
+BlockJobInfo *block_job_query(BlockJob *job);
+
+/**
+ * block_job_pause:
+ * @job: The job to be paused.
+ *
+ * Asynchronously pause the specified job.
+ */
+void block_job_pause(BlockJob *job);
+
+/**
+ * block_job_resume:
+ * @job: The job to be resumed.
+ *
+ * Resume the specified job.
+ */
+void block_job_resume(BlockJob *job);
+
+/**
+ * block_job_is_paused:
+ * @job: The job being queried.
+ *
+ * Returns whether the job is currently paused, or will pause
+ * as soon as it reaches a sleeping point.
+ */
+bool block_job_is_paused(BlockJob *job);
+
+/**
+ * block_job_cancel_sync:
+ * @job: The job to be canceled.
+ *
+ * Synchronously cancel the job. The completion callback is called
+ * before the function returns. The job may actually complete
+ * instead of canceling itself; the circumstances under which this
+ * happens depend on the kind of job that is active.
+ *
+ * Returns the return value from the job if the job actually completed
+ * during the call, or -ECANCELED if it was canceled.
+ */
+int block_job_cancel_sync(BlockJob *job);
+
+/**
+ * block_job_iostatus_reset:
+ * @job: The job whose I/O status should be reset.
+ *
+ * Reset I/O status on @job.
+ */
+void block_job_iostatus_reset(BlockJob *job);
+
+/**
+ * block_job_error_action:
+ * @job: The job to signal an error for.
+ * @bs: The block device on which to set an I/O error.
+ * @on_err: The error action setting.
+ * @is_read: Whether the operation was a read.
+ * @error: The error that was reported.
+ *
+ * Report an I/O error for a block job and possibly stop the VM. Return the
+ * action that was selected based on @on_err and @error.
+ */
+BlockErrorAction block_job_error_action(BlockJob *job, BlockDriverState *bs,
+ BlockdevOnError on_err,
+ int is_read, int error);
+#endif
diff --git a/configure b/configure
index c5e5bb2e81..e58846d5e2 100755
--- a/configure
+++ b/configure
@@ -219,6 +219,7 @@ want_tools="yes"
libiscsi=""
coroutine=""
seccomp=""
+glusterfs=""
# parse CC options first
for opt do
@@ -856,6 +857,10 @@ for opt do
;;
--disable-seccomp) seccomp="no"
;;
+ --disable-glusterfs) glusterfs="no"
+ ;;
+ --enable-glusterfs) glusterfs="yes"
+ ;;
*) echo "ERROR: unknown option $opt"; show_help="yes"
;;
esac
@@ -1128,6 +1133,8 @@ echo " --disable-seccomp disable seccomp support"
echo " --enable-seccomp enables seccomp support"
echo " --with-coroutine=BACKEND coroutine backend. Supported options:"
echo " gthread, ucontext, sigaltstack, windows"
+echo " --enable-glusterfs enable GlusterFS backend"
+echo " --disable-glusterfs disable GlusterFS backend"
echo ""
echo "NOTE: The object files are built at the place where configure is launched"
exit 1
@@ -2303,6 +2310,29 @@ EOF
fi
fi
+##########################################
+# glusterfs probe
+if test "$glusterfs" != "no" ; then
+ cat > $TMPC <<EOF
+#include <glusterfs/api/glfs.h>
+int main(void) {
+ (void) glfs_new("volume");
+ return 0;
+}
+EOF
+ glusterfs_libs="-lgfapi -lgfrpc -lgfxdr"
+ if compile_prog "" "$glusterfs_libs" ; then
+ glusterfs=yes
+ libs_tools="$glusterfs_libs $libs_tools"
+ libs_softmmu="$glusterfs_libs $libs_softmmu"
+ else
+ if test "$glusterfs" = "yes" ; then
+ feature_not_found "GlusterFS backend support"
+ fi
+ glusterfs=no
+ fi
+fi
+
#
# Check for xxxat() functions when we are building linux-user
# emulator. This is done because older glibc versions don't
@@ -3170,6 +3200,7 @@ echo "libiscsi support $libiscsi"
echo "build guest agent $guest_agent"
echo "seccomp support $seccomp"
echo "coroutine backend $coroutine_backend"
+echo "GlusterFS support $glusterfs"
if test "$sdl_too_old" = "yes"; then
echo "-> Your SDL version is too old - please upgrade to have SDL support"
@@ -3516,6 +3547,10 @@ if test "$has_environ" = "yes" ; then
echo "CONFIG_HAS_ENVIRON=y" >> $config_host_mak
fi
+if test "$glusterfs" = "yes" ; then
+ echo "CONFIG_GLUSTERFS=y" >> $config_host_mak
+fi
+
# USB host support
case "$usb" in
linux)
diff --git a/hmp-commands.hx b/hmp-commands.hx
index 0302458df2..e0b537d0cc 100644
--- a/hmp-commands.hx
+++ b/hmp-commands.hx
@@ -99,9 +99,10 @@ ETEXI
{
.name = "block_job_cancel",
- .args_type = "device:B",
- .params = "device",
- .help = "stop an active background block operation",
+ .args_type = "force:-f,device:B",
+ .params = "[-f] device",
+ .help = "stop an active background block operation (use -f"
+ "\n\t\t\t if the operation is currently paused)",
.mhandler.cmd = hmp_block_job_cancel,
},
@@ -112,6 +113,34 @@ Stop an active block streaming operation.
ETEXI
{
+ .name = "block_job_pause",
+ .args_type = "device:B",
+ .params = "device",
+ .help = "pause an active background block operation",
+ .mhandler.cmd = hmp_block_job_pause,
+ },
+
+STEXI
+@item block_job_pause
+@findex block_job_pause
+Pause an active block streaming operation.
+ETEXI
+
+ {
+ .name = "block_job_resume",
+ .args_type = "device:B",
+ .params = "device",
+ .help = "resume a paused background block operation",
+ .mhandler.cmd = hmp_block_job_resume,
+ },
+
+STEXI
+@item block_job_resume
+@findex block_job_resume
+Resume a paused block streaming operation.
+ETEXI
+
+ {
.name = "eject",
.args_type = "force:-f,device:B",
.params = "[-f] device",
diff --git a/hmp.c b/hmp.c
index 3306bcdbb4..70bdec2433 100644
--- a/hmp.c
+++ b/hmp.c
@@ -930,7 +930,8 @@ void hmp_block_stream(Monitor *mon, const QDict *qdict)
int64_t speed = qdict_get_try_int(qdict, "speed", 0);
qmp_block_stream(device, base != NULL, base,
- qdict_haskey(qdict, "speed"), speed, &error);
+ qdict_haskey(qdict, "speed"), speed,
+ BLOCKDEV_ON_ERROR_REPORT, true, &error);
hmp_handle_error(mon, &error);
}
@@ -950,8 +951,29 @@ void hmp_block_job_cancel(Monitor *mon, const QDict *qdict)
{
Error *error = NULL;
const char *device = qdict_get_str(qdict, "device");
+ bool force = qdict_get_try_bool(qdict, "force", 0);
- qmp_block_job_cancel(device, &error);
+ qmp_block_job_cancel(device, true, force, &error);
+
+ hmp_handle_error(mon, &error);
+}
+
+void hmp_block_job_pause(Monitor *mon, const QDict *qdict)
+{
+ Error *error = NULL;
+ const char *device = qdict_get_str(qdict, "device");
+
+ qmp_block_job_pause(device, &error);
+
+ hmp_handle_error(mon, &error);
+}
+
+void hmp_block_job_resume(Monitor *mon, const QDict *qdict)
+{
+ Error *error = NULL;
+ const char *device = qdict_get_str(qdict, "device");
+
+ qmp_block_job_resume(device, &error);
hmp_handle_error(mon, &error);
}
diff --git a/hmp.h b/hmp.h
index 48b9c59f8a..71ea384523 100644
--- a/hmp.h
+++ b/hmp.h
@@ -64,6 +64,8 @@ void hmp_block_set_io_throttle(Monitor *mon, const QDict *qdict);
void hmp_block_stream(Monitor *mon, const QDict *qdict);
void hmp_block_job_set_speed(Monitor *mon, const QDict *qdict);
void hmp_block_job_cancel(Monitor *mon, const QDict *qdict);
+void hmp_block_job_pause(Monitor *mon, const QDict *qdict);
+void hmp_block_job_resume(Monitor *mon, const QDict *qdict);
void hmp_migrate(Monitor *mon, const QDict *qdict);
void hmp_device_del(Monitor *mon, const QDict *qdict);
void hmp_dump_guest_memory(Monitor *mon, const QDict *qdict);
diff --git a/hw/fdc.c b/hw/fdc.c
index 08830c1ba2..43b0f20503 100644
--- a/hw/fdc.c
+++ b/hw/fdc.c
@@ -1994,11 +1994,11 @@ static int fdctrl_connect_drives(FDCtrl *fdctrl)
drive->fdctrl = fdctrl;
if (drive->bs) {
- if (bdrv_get_on_error(drive->bs, 0) != BLOCK_ERR_STOP_ENOSPC) {
+ if (bdrv_get_on_error(drive->bs, 0) != BLOCKDEV_ON_ERROR_ENOSPC) {
error_report("fdc doesn't support drive option werror");
return -1;
}
- if (bdrv_get_on_error(drive->bs, 1) != BLOCK_ERR_REPORT) {
+ if (bdrv_get_on_error(drive->bs, 1) != BLOCKDEV_ON_ERROR_REPORT) {
error_report("fdc doesn't support drive option rerror");
return -1;
}
diff --git a/hw/ide/core.c b/hw/ide/core.c
index d6fb69c634..d683a8cc84 100644
--- a/hw/ide/core.c
+++ b/hw/ide/core.c
@@ -556,32 +556,22 @@ void ide_dma_error(IDEState *s)
static int ide_handle_rw_error(IDEState *s, int error, int op)
{
- int is_read = (op & BM_STATUS_RETRY_READ);
- BlockErrorAction action = bdrv_get_on_error(s->bs, is_read);
+ bool is_read = (op & BM_STATUS_RETRY_READ) != 0;
+ BlockErrorAction action = bdrv_get_error_action(s->bs, is_read, error);
- if (action == BLOCK_ERR_IGNORE) {
- bdrv_emit_qmp_error_event(s->bs, BDRV_ACTION_IGNORE, is_read);
- return 0;
- }
-
- if ((error == ENOSPC && action == BLOCK_ERR_STOP_ENOSPC)
- || action == BLOCK_ERR_STOP_ANY) {
+ if (action == BDRV_ACTION_STOP) {
s->bus->dma->ops->set_unit(s->bus->dma, s->unit);
s->bus->error_status = op;
- bdrv_emit_qmp_error_event(s->bs, BDRV_ACTION_STOP, is_read);
- vm_stop(RUN_STATE_IO_ERROR);
- bdrv_iostatus_set_err(s->bs, error);
- } else {
+ } else if (action == BDRV_ACTION_REPORT) {
if (op & BM_STATUS_DMA_RETRY) {
dma_buf_commit(s);
ide_dma_error(s);
} else {
ide_rw_error(s);
}
- bdrv_emit_qmp_error_event(s->bs, BDRV_ACTION_REPORT, is_read);
}
-
- return 1;
+ bdrv_error_action(s->bs, action, is_read, error);
+ return action != BDRV_ACTION_IGNORE;
}
void ide_dma_cb(void *opaque, int ret)
diff --git a/hw/ide/pci.c b/hw/ide/pci.c
index 88c0942e34..644533f777 100644
--- a/hw/ide/pci.c
+++ b/hw/ide/pci.c
@@ -188,7 +188,7 @@ static void bmdma_restart_bh(void *opaque)
{
BMDMAState *bm = opaque;
IDEBus *bus = bm->bus;
- int is_read;
+ bool is_read;
int error_status;
qemu_bh_delete(bm->bh);
@@ -198,7 +198,7 @@ static void bmdma_restart_bh(void *opaque)
return;
}
- is_read = !!(bus->error_status & BM_STATUS_RETRY_READ);
+ is_read = (bus->error_status & BM_STATUS_RETRY_READ) != 0;
/* The error status must be cleared before resubmitting the request: The
* request may fail again, and this case can only be distinguished if the
diff --git a/hw/scsi-disk.c b/hw/scsi-disk.c
index 95e91585e6..99bb02ebf8 100644
--- a/hw/scsi-disk.c
+++ b/hw/scsi-disk.c
@@ -386,23 +386,11 @@ static void scsi_read_data(SCSIRequest *req)
*/
static int scsi_handle_rw_error(SCSIDiskReq *r, int error)
{
- int is_read = (r->req.cmd.xfer == SCSI_XFER_FROM_DEV);
+ bool is_read = (r->req.cmd.xfer == SCSI_XFER_FROM_DEV);
SCSIDiskState *s = DO_UPCAST(SCSIDiskState, qdev, r->req.dev);
- BlockErrorAction action = bdrv_get_on_error(s->qdev.conf.bs, is_read);
+ BlockErrorAction action = bdrv_get_error_action(s->qdev.conf.bs, is_read, error);
- if (action == BLOCK_ERR_IGNORE) {
- bdrv_emit_qmp_error_event(s->qdev.conf.bs, BDRV_ACTION_IGNORE, is_read);
- return 0;
- }
-
- if ((error == ENOSPC && action == BLOCK_ERR_STOP_ENOSPC)
- || action == BLOCK_ERR_STOP_ANY) {
-
- bdrv_emit_qmp_error_event(s->qdev.conf.bs, BDRV_ACTION_STOP, is_read);
- vm_stop(RUN_STATE_IO_ERROR);
- bdrv_iostatus_set_err(s->qdev.conf.bs, error);
- scsi_req_retry(&r->req);
- } else {
+ if (action == BDRV_ACTION_REPORT) {
switch (error) {
case ENOMEDIUM:
scsi_check_condition(r, SENSE_CODE(NO_MEDIUM));
@@ -417,9 +405,12 @@ static int scsi_handle_rw_error(SCSIDiskReq *r, int error)
scsi_check_condition(r, SENSE_CODE(IO_ERROR));
break;
}
- bdrv_emit_qmp_error_event(s->qdev.conf.bs, BDRV_ACTION_REPORT, is_read);
}
- return 1;
+ bdrv_error_action(s->qdev.conf.bs, action, is_read, error);
+ if (action == BDRV_ACTION_STOP) {
+ scsi_req_retry(&r->req);
+ }
+ return action != BDRV_ACTION_IGNORE;
}
static void scsi_write_complete(void * opaque, int ret)
diff --git a/hw/scsi-generic.c b/hw/scsi-generic.c
index a5eb663ecf..d9045341ba 100644
--- a/hw/scsi-generic.c
+++ b/hw/scsi-generic.c
@@ -400,11 +400,11 @@ static int scsi_generic_initfn(SCSIDevice *s)
return -1;
}
- if (bdrv_get_on_error(s->conf.bs, 0) != BLOCK_ERR_STOP_ENOSPC) {
+ if (bdrv_get_on_error(s->conf.bs, 0) != BLOCKDEV_ON_ERROR_ENOSPC) {
error_report("Device doesn't support drive option werror");
return -1;
}
- if (bdrv_get_on_error(s->conf.bs, 1) != BLOCK_ERR_REPORT) {
+ if (bdrv_get_on_error(s->conf.bs, 1) != BLOCKDEV_ON_ERROR_REPORT) {
error_report("Device doesn't support drive option rerror");
return -1;
}
diff --git a/hw/virtio-blk.c b/hw/virtio-blk.c
index 6f6d172fd0..e25cc96477 100644
--- a/hw/virtio-blk.c
+++ b/hw/virtio-blk.c
@@ -64,31 +64,22 @@ static void virtio_blk_req_complete(VirtIOBlockReq *req, int status)
}
static int virtio_blk_handle_rw_error(VirtIOBlockReq *req, int error,
- int is_read)
+ bool is_read)
{
- BlockErrorAction action = bdrv_get_on_error(req->dev->bs, is_read);
+ BlockErrorAction action = bdrv_get_error_action(req->dev->bs, is_read, error);
VirtIOBlock *s = req->dev;
- if (action == BLOCK_ERR_IGNORE) {
- bdrv_emit_qmp_error_event(s->bs, BDRV_ACTION_IGNORE, is_read);
- return 0;
- }
-
- if ((error == ENOSPC && action == BLOCK_ERR_STOP_ENOSPC)
- || action == BLOCK_ERR_STOP_ANY) {
+ if (action == BDRV_ACTION_STOP) {
req->next = s->rq;
s->rq = req;
- bdrv_emit_qmp_error_event(s->bs, BDRV_ACTION_STOP, is_read);
- vm_stop(RUN_STATE_IO_ERROR);
- bdrv_iostatus_set_err(s->bs, error);
- } else {
+ } else if (action == BDRV_ACTION_REPORT) {
virtio_blk_req_complete(req, VIRTIO_BLK_S_IOERR);
bdrv_acct_done(s->bs, &req->acct);
g_free(req);
- bdrv_emit_qmp_error_event(s->bs, BDRV_ACTION_REPORT, is_read);
}
- return 1;
+ bdrv_error_action(s->bs, action, is_read, error);
+ return action != BDRV_ACTION_IGNORE;
}
static void virtio_blk_rw_complete(void *opaque, int ret)
@@ -98,7 +89,7 @@ static void virtio_blk_rw_complete(void *opaque, int ret)
trace_virtio_blk_rw_complete(req, ret);
if (ret) {
- int is_read = !(ldl_p(&req->out->type) & VIRTIO_BLK_T_OUT);
+ bool is_read = !(ldl_p(&req->out->type) & VIRTIO_BLK_T_OUT);
if (virtio_blk_handle_rw_error(req, -ret, is_read))
return;
}
diff --git a/monitor.c b/monitor.c
index cd735bbbeb..a0e3ffb924 100644
--- a/monitor.c
+++ b/monitor.c
@@ -450,6 +450,7 @@ static const char *monitor_event_names[] = {
[QEVENT_SPICE_DISCONNECTED] = "SPICE_DISCONNECTED",
[QEVENT_BLOCK_JOB_COMPLETED] = "BLOCK_JOB_COMPLETED",
[QEVENT_BLOCK_JOB_CANCELLED] = "BLOCK_JOB_CANCELLED",
+ [QEVENT_BLOCK_JOB_ERROR] = "BLOCK_JOB_ERROR",
[QEVENT_DEVICE_TRAY_MOVED] = "DEVICE_TRAY_MOVED",
[QEVENT_SUSPEND] = "SUSPEND",
[QEVENT_SUSPEND_DISK] = "SUSPEND_DISK",
diff --git a/monitor.h b/monitor.h
index e240c3f42d..b6e7d95a30 100644
--- a/monitor.h
+++ b/monitor.h
@@ -38,6 +38,7 @@ typedef enum MonitorEvent {
QEVENT_SPICE_DISCONNECTED,
QEVENT_BLOCK_JOB_COMPLETED,
QEVENT_BLOCK_JOB_CANCELLED,
+ QEVENT_BLOCK_JOB_ERROR,
QEVENT_DEVICE_TRAY_MOVED,
QEVENT_SUSPEND,
QEVENT_SUSPEND_DISK,
diff --git a/qapi-schema.json b/qapi-schema.json
index f4c21855cf..23abb94a1a 100644
--- a/qapi-schema.json
+++ b/qapi-schema.json
@@ -1113,6 +1113,29 @@
{ 'command': 'query-pci', 'returns': ['PciInfo'] }
##
+# @BlockdevOnError:
+#
+# An enumeration of possible behaviors for errors on I/O operations.
+# The exact meaning depends on whether the I/O was initiated by a guest
+# or by a block job
+#
+# @report: for guest operations, report the error to the guest;
+# for jobs, cancel the job
+#
+# @ignore: ignore the error, only report a QMP event (BLOCK_IO_ERROR
+# or BLOCK_JOB_ERROR)
+#
+# @enospc: same as @stop on ENOSPC, same as @report otherwise.
+#
+# @stop: for guest operations, stop the virtual machine;
+# for jobs, pause the job
+#
+# Since: 1.3
+##
+{ 'enum': 'BlockdevOnError',
+ 'data': ['report', 'ignore', 'enospc', 'stop'] }
+
+##
# @BlockJobInfo:
#
# Information about a long-running block device operation.
@@ -1123,15 +1146,24 @@
#
# @len: the maximum progress value
#
+# @busy: false if the job is known to be in a quiescent state, with
+# no pending I/O. Since 1.3.
+#
+# @paused: whether the job is paused or, if @busy is true, will
+# pause itself as soon as possible. Since 1.3.
+#
# @offset: the current progress value
#
# @speed: the rate limit, bytes per second
#
+# @io-status: the status of the job (since 1.3)
+#
# Since: 1.1
##
{ 'type': 'BlockJobInfo',
'data': {'type': 'str', 'device': 'str', 'len': 'int',
- 'offset': 'int', 'speed': 'int'} }
+ 'offset': 'int', 'busy': 'bool', 'paused': 'bool', 'speed': 'int',
+ 'io-status': 'BlockDeviceIoStatus'} }
##
# @query-block-jobs:
@@ -1493,6 +1525,40 @@
'returns': 'str' }
##
+# @block-commit
+#
+# Live commit of data from overlay image nodes into backing nodes - i.e.,
+# writes data between 'top' and 'base' into 'base'.
+#
+# @device: the name of the device
+#
+# @base: #optional The file name of the backing image to write data into.
+# If not specified, this is the deepest backing image
+#
+# @top: The file name of the backing image within the image chain,
+# which contains the topmost data to be committed down.
+# Note, the active layer as 'top' is currently unsupported.
+#
+# If top == base, that is an error.
+#
+#
+# @speed: #optional the maximum speed, in bytes per second
+#
+# Returns: Nothing on success
+# If commit or stream is already active on this device, DeviceInUse
+# If @device does not exist, DeviceNotFound
+# If image commit is not supported by this device, NotSupported
+# If @base or @top is invalid, a generic error is returned
+# If @top is the active layer, or omitted, a generic error is returned
+# If @speed is invalid, InvalidParameter
+#
+# Since: 1.3
+#
+##
+{ 'command': 'block-commit',
+ 'data': { 'device': 'str', '*base': 'str', 'top': 'str',
+ '*speed': 'int' } }
+
# @migrate_cancel
#
# Cancel the current executing migration process.
@@ -1828,13 +1894,18 @@
#
# @speed: #optional the maximum speed, in bytes per second
#
+# @on-error: #optional the action to take on an error (default report).
+# 'stop' and 'enospc' can only be used if the block device
+# supports io-status (see BlockInfo). Since 1.3.
+#
# Returns: Nothing on success
# If @device does not exist, DeviceNotFound
#
# Since: 1.1
##
-{ 'command': 'block-stream', 'data': { 'device': 'str', '*base': 'str',
- '*speed': 'int' } }
+{ 'command': 'block-stream',
+ 'data': { 'device': 'str', '*base': 'str', '*speed': 'int',
+ '*on-error': 'BlockdevOnError' } }
##
# @block-job-set-speed:
@@ -1878,12 +1949,58 @@
#
# @device: the device name
#
+# @force: #optional whether to allow cancellation of a paused job (default
+# false). Since 1.3.
+#
# Returns: Nothing on success
# If no background operation is active on this device, DeviceNotActive
#
# Since: 1.1
##
-{ 'command': 'block-job-cancel', 'data': { 'device': 'str' } }
+{ 'command': 'block-job-cancel', 'data': { 'device': 'str', '*force': 'bool' } }
+
+##
+# @block-job-pause:
+#
+# Pause an active background block operation.
+#
+# This command returns immediately after marking the active background block
+# operation for pausing. It is an error to call this command if no
+# operation is in progress. Pausing an already paused job has no cumulative
+# effect; a single block-job-resume command will resume the job.
+#
+# The operation will pause as soon as possible. No event is emitted when
+# the operation is actually paused. Cancelling a paused job automatically
+# resumes it.
+#
+# @device: the device name
+#
+# Returns: Nothing on success
+# If no background operation is active on this device, DeviceNotActive
+#
+# Since: 1.3
+##
+{ 'command': 'block-job-pause', 'data': { 'device': 'str' } }
+
+##
+# @block-job-resume:
+#
+# Resume an active background block operation.
+#
+# This command returns immediately after resuming a paused background block
+# operation. It is an error to call this command if no operation is in
+# progress. Resuming an already running job is not an error.
+#
+# This command also clears the error status of the job.
+#
+# @device: the device name
+#
+# Returns: Nothing on success
+# If no background operation is active on this device, DeviceNotActive
+#
+# Since: 1.3
+##
+{ 'command': 'block-job-resume', 'data': { 'device': 'str' } }
##
# @ObjectTypeInfo:
diff --git a/qemu-tool.c b/qemu-tool.c
index 18205babab..f2f98138ce 100644
--- a/qemu-tool.c
+++ b/qemu-tool.c
@@ -19,6 +19,7 @@
#include "qemu-log.h"
#include "migration.h"
#include "main-loop.h"
+#include "sysemu.h"
#include "qemu_socket.h"
#include "slirp/libslirp.h"
@@ -37,6 +38,11 @@ const char *qemu_get_vm_name(void)
Monitor *cur_mon;
+void vm_stop(RunState state)
+{
+ abort();
+}
+
int monitor_cur_is_qmp(void)
{
return 0;
diff --git a/qerror.h b/qerror.h
index d0a76a4f71..c91708cc3c 100644
--- a/qerror.h
+++ b/qerror.h
@@ -48,6 +48,12 @@ void assert_no_error(Error *err);
#define QERR_BASE_NOT_FOUND \
ERROR_CLASS_GENERIC_ERROR, "Base '%s' not found"
+#define QERR_BLOCK_JOB_NOT_ACTIVE \
+ ERROR_CLASS_DEVICE_NOT_ACTIVE, "No active block job on device '%s'"
+
+#define QERR_BLOCK_JOB_PAUSED \
+ ERROR_CLASS_GENERIC_ERROR, "The block job for device '%s' is currently paused"
+
#define QERR_BLOCK_FORMAT_FEATURE_NOT_SUPPORTED \
ERROR_CLASS_GENERIC_ERROR, "Block format '%s' used by device '%s' does not support feature '%s'"
diff --git a/qmp-commands.hx b/qmp-commands.hx
index 36e08d9ffc..1aef27ca14 100644
--- a/qmp-commands.hx
+++ b/qmp-commands.hx
@@ -787,11 +787,17 @@ EQMP
{
.name = "block-stream",
- .args_type = "device:B,base:s?,speed:o?",
+ .args_type = "device:B,base:s?,speed:o?,on-error:s?",
.mhandler.cmd_new = qmp_marshal_input_block_stream,
},
{
+ .name = "block-commit",
+ .args_type = "device:B,base:s?,top:s,speed:o?",
+ .mhandler.cmd_new = qmp_marshal_input_block_commit,
+ },
+
+ {
.name = "block-job-set-speed",
.args_type = "device:B,speed:o",
.mhandler.cmd_new = qmp_marshal_input_block_job_set_speed,
@@ -799,10 +805,20 @@ EQMP
{
.name = "block-job-cancel",
- .args_type = "device:B",
+ .args_type = "device:B,force:b?",
.mhandler.cmd_new = qmp_marshal_input_block_job_cancel,
},
{
+ .name = "block-job-pause",
+ .args_type = "device:B",
+ .mhandler.cmd_new = qmp_marshal_input_block_job_pause,
+ },
+ {
+ .name = "block-job-resume",
+ .args_type = "device:B",
+ .mhandler.cmd_new = qmp_marshal_input_block_job_resume,
+ },
+ {
.name = "transaction",
.args_type = "actions:q",
.mhandler.cmd_new = qmp_marshal_input_transaction,
diff --git a/tests/qemu-iotests/030 b/tests/qemu-iotests/030
index 55b16f81dd..dd4ef11996 100755
--- a/tests/qemu-iotests/030
+++ b/tests/qemu-iotests/030
@@ -18,6 +18,7 @@
# along with this program. If not, see <http://www.gnu.org/licenses/>.
#
+import time
import os
import iotests
from iotests import qemu_img, qemu_io
@@ -98,6 +99,43 @@ class TestSingleDrive(ImageStreamingTestCase):
qemu_io('-c', 'map', test_img),
'image file map does not match backing file after streaming')
+ def test_stream_pause(self):
+ self.assert_no_active_streams()
+
+ result = self.vm.qmp('block-stream', device='drive0')
+ self.assert_qmp(result, 'return', {})
+
+ result = self.vm.qmp('block-job-pause', device='drive0')
+ self.assert_qmp(result, 'return', {})
+
+ time.sleep(1)
+ result = self.vm.qmp('query-block-jobs')
+ offset = self.dictpath(result, 'return[0]/offset')
+
+ time.sleep(1)
+ result = self.vm.qmp('query-block-jobs')
+ self.assert_qmp(result, 'return[0]/offset', offset)
+
+ result = self.vm.qmp('block-job-resume', device='drive0')
+ self.assert_qmp(result, 'return', {})
+
+ completed = False
+ while not completed:
+ for event in self.vm.get_qmp_events(wait=True):
+ if event['event'] == 'BLOCK_JOB_COMPLETED':
+ self.assert_qmp(event, 'data/type', 'stream')
+ self.assert_qmp(event, 'data/device', 'drive0')
+ self.assert_qmp(event, 'data/offset', self.image_len)
+ self.assert_qmp(event, 'data/len', self.image_len)
+ completed = True
+
+ self.assert_no_active_streams()
+ self.vm.shutdown()
+
+ self.assertEqual(qemu_io('-c', 'map', backing_img),
+ qemu_io('-c', 'map', test_img),
+ 'image file map does not match backing file after streaming')
+
def test_stream_partial(self):
self.assert_no_active_streams()
@@ -157,6 +195,226 @@ class TestSmallerBackingFile(ImageStreamingTestCase):
self.assert_no_active_streams()
self.vm.shutdown()
+class TestErrors(ImageStreamingTestCase):
+ image_len = 2 * 1024 * 1024 # MB
+
+ # this should match STREAM_BUFFER_SIZE/512 in block/stream.c
+ STREAM_BUFFER_SIZE = 512 * 1024
+
+ def create_blkdebug_file(self, name, event, errno):
+ file = open(name, 'w')
+ file.write('''
+[inject-error]
+state = "1"
+event = "%s"
+errno = "%d"
+immediately = "off"
+once = "on"
+sector = "%d"
+
+[set-state]
+state = "1"
+event = "%s"
+new_state = "2"
+
+[set-state]
+state = "2"
+event = "%s"
+new_state = "1"
+''' % (event, errno, self.STREAM_BUFFER_SIZE / 512, event, event))
+ file.close()
+
+class TestEIO(TestErrors):
+ def setUp(self):
+ self.blkdebug_file = backing_img + ".blkdebug"
+ self.create_image(backing_img, TestErrors.image_len)
+ self.create_blkdebug_file(self.blkdebug_file, "read_aio", 5)
+ qemu_img('create', '-f', iotests.imgfmt,
+ '-o', 'backing_file=blkdebug:%s:%s,backing_fmt=raw'
+ % (self.blkdebug_file, backing_img),
+ test_img)
+ self.vm = iotests.VM().add_drive(test_img)
+ self.vm.launch()
+
+ def tearDown(self):
+ self.vm.shutdown()
+ os.remove(test_img)
+ os.remove(backing_img)
+ os.remove(self.blkdebug_file)
+
+ def test_report(self):
+ self.assert_no_active_streams()
+
+ result = self.vm.qmp('block-stream', device='drive0')
+ self.assert_qmp(result, 'return', {})
+
+ completed = False
+ error = False
+ while not completed:
+ for event in self.vm.get_qmp_events(wait=True):
+ if event['event'] == 'BLOCK_JOB_ERROR':
+ self.assert_qmp(event, 'data/device', 'drive0')
+ self.assert_qmp(event, 'data/operation', 'read')
+ error = True
+ elif event['event'] == 'BLOCK_JOB_COMPLETED':
+ self.assertTrue(error, 'job completed unexpectedly')
+ self.assert_qmp(event, 'data/type', 'stream')
+ self.assert_qmp(event, 'data/device', 'drive0')
+ self.assert_qmp(event, 'data/error', 'Input/output error')
+ self.assert_qmp(event, 'data/offset', self.STREAM_BUFFER_SIZE)
+ self.assert_qmp(event, 'data/len', self.image_len)
+ completed = True
+
+ self.assert_no_active_streams()
+ self.vm.shutdown()
+
+ def test_ignore(self):
+ self.assert_no_active_streams()
+
+ result = self.vm.qmp('block-stream', device='drive0', on_error='ignore')
+ self.assert_qmp(result, 'return', {})
+
+ error = False
+ completed = False
+ while not completed:
+ for event in self.vm.get_qmp_events(wait=True):
+ if event['event'] == 'BLOCK_JOB_ERROR':
+ self.assert_qmp(event, 'data/device', 'drive0')
+ self.assert_qmp(event, 'data/operation', 'read')
+ result = self.vm.qmp('query-block-jobs')
+ self.assert_qmp(result, 'return[0]/paused', False)
+ error = True
+ elif event['event'] == 'BLOCK_JOB_COMPLETED':
+ self.assertTrue(error, 'job completed unexpectedly')
+ self.assert_qmp(event, 'data/type', 'stream')
+ self.assert_qmp(event, 'data/device', 'drive0')
+ self.assert_qmp(event, 'data/error', 'Input/output error')
+ self.assert_qmp(event, 'data/offset', self.image_len)
+ self.assert_qmp(event, 'data/len', self.image_len)
+ completed = True
+
+ self.assert_no_active_streams()
+ self.vm.shutdown()
+
+ def test_stop(self):
+ self.assert_no_active_streams()
+
+ result = self.vm.qmp('block-stream', device='drive0', on_error='stop')
+ self.assert_qmp(result, 'return', {})
+
+ error = False
+ completed = False
+ while not completed:
+ for event in self.vm.get_qmp_events(wait=True):
+ if event['event'] == 'BLOCK_JOB_ERROR':
+ self.assert_qmp(event, 'data/device', 'drive0')
+ self.assert_qmp(event, 'data/operation', 'read')
+
+ result = self.vm.qmp('query-block-jobs')
+ self.assert_qmp(result, 'return[0]/paused', True)
+ self.assert_qmp(result, 'return[0]/offset', self.STREAM_BUFFER_SIZE)
+ self.assert_qmp(result, 'return[0]/io-status', 'failed')
+
+ result = self.vm.qmp('block-job-resume', device='drive0')
+ self.assert_qmp(result, 'return', {})
+
+ result = self.vm.qmp('query-block-jobs')
+ self.assert_qmp(result, 'return[0]/paused', False)
+ self.assert_qmp(result, 'return[0]/io-status', 'ok')
+ error = True
+ elif event['event'] == 'BLOCK_JOB_COMPLETED':
+ self.assertTrue(error, 'job completed unexpectedly')
+ self.assert_qmp(event, 'data/type', 'stream')
+ self.assert_qmp(event, 'data/device', 'drive0')
+ self.assert_qmp_absent(event, 'data/error')
+ self.assert_qmp(event, 'data/offset', self.image_len)
+ self.assert_qmp(event, 'data/len', self.image_len)
+ completed = True
+
+ self.assert_no_active_streams()
+ self.vm.shutdown()
+
+ def test_enospc(self):
+ self.assert_no_active_streams()
+
+ result = self.vm.qmp('block-stream', device='drive0', on_error='enospc')
+ self.assert_qmp(result, 'return', {})
+
+ completed = False
+ error = False
+ while not completed:
+ for event in self.vm.get_qmp_events(wait=True):
+ if event['event'] == 'BLOCK_JOB_ERROR':
+ self.assert_qmp(event, 'data/device', 'drive0')
+ self.assert_qmp(event, 'data/operation', 'read')
+ error = True
+ elif event['event'] == 'BLOCK_JOB_COMPLETED':
+ self.assertTrue(error, 'job completed unexpectedly')
+ self.assert_qmp(event, 'data/type', 'stream')
+ self.assert_qmp(event, 'data/device', 'drive0')
+ self.assert_qmp(event, 'data/error', 'Input/output error')
+ self.assert_qmp(event, 'data/offset', self.STREAM_BUFFER_SIZE)
+ self.assert_qmp(event, 'data/len', self.image_len)
+ completed = True
+
+ self.assert_no_active_streams()
+ self.vm.shutdown()
+
+class TestENOSPC(TestErrors):
+ def setUp(self):
+ self.blkdebug_file = backing_img + ".blkdebug"
+ self.create_image(backing_img, TestErrors.image_len)
+ self.create_blkdebug_file(self.blkdebug_file, "read_aio", 28)
+ qemu_img('create', '-f', iotests.imgfmt,
+ '-o', 'backing_file=blkdebug:%s:%s,backing_fmt=raw'
+ % (self.blkdebug_file, backing_img),
+ test_img)
+ self.vm = iotests.VM().add_drive(test_img)
+ self.vm.launch()
+
+ def tearDown(self):
+ self.vm.shutdown()
+ os.remove(test_img)
+ os.remove(backing_img)
+ os.remove(self.blkdebug_file)
+
+ def test_enospc(self):
+ self.assert_no_active_streams()
+
+ result = self.vm.qmp('block-stream', device='drive0', on_error='enospc')
+ self.assert_qmp(result, 'return', {})
+
+ error = False
+ completed = False
+ while not completed:
+ for event in self.vm.get_qmp_events(wait=True):
+ if event['event'] == 'BLOCK_JOB_ERROR':
+ self.assert_qmp(event, 'data/device', 'drive0')
+ self.assert_qmp(event, 'data/operation', 'read')
+
+ result = self.vm.qmp('query-block-jobs')
+ self.assert_qmp(result, 'return[0]/paused', True)
+ self.assert_qmp(result, 'return[0]/offset', self.STREAM_BUFFER_SIZE)
+ self.assert_qmp(result, 'return[0]/io-status', 'nospace')
+
+ result = self.vm.qmp('block-job-resume', device='drive0')
+ self.assert_qmp(result, 'return', {})
+
+ result = self.vm.qmp('query-block-jobs')
+ self.assert_qmp(result, 'return[0]/paused', False)
+ self.assert_qmp(result, 'return[0]/io-status', 'ok')
+ error = True
+ elif event['event'] == 'BLOCK_JOB_COMPLETED':
+ self.assertTrue(error, 'job completed unexpectedly')
+ self.assert_qmp(event, 'data/type', 'stream')
+ self.assert_qmp(event, 'data/device', 'drive0')
+ self.assert_qmp_absent(event, 'data/error')
+ self.assert_qmp(event, 'data/offset', self.image_len)
+ self.assert_qmp(event, 'data/len', self.image_len)
+ completed = True
+
+ self.assert_no_active_streams()
+ self.vm.shutdown()
class TestStreamStop(ImageStreamingTestCase):
image_len = 8 * 1024 * 1024 * 1024 # GB
@@ -173,8 +431,6 @@ class TestStreamStop(ImageStreamingTestCase):
os.remove(backing_img)
def test_stream_stop(self):
- import time
-
self.assert_no_active_streams()
result = self.vm.qmp('block-stream', device='drive0')
diff --git a/tests/qemu-iotests/030.out b/tests/qemu-iotests/030.out
index 2f7d3902f2..fa16b5ccef 100644
--- a/tests/qemu-iotests/030.out
+++ b/tests/qemu-iotests/030.out
@@ -1,5 +1,5 @@
-.......
+.............
----------------------------------------------------------------------
-Ran 7 tests
+Ran 13 tests
OK
diff --git a/tests/qemu-iotests/040 b/tests/qemu-iotests/040
new file mode 100755
index 0000000000..258e7eae38
--- /dev/null
+++ b/tests/qemu-iotests/040
@@ -0,0 +1,178 @@
+#!/usr/bin/env python
+#
+# Tests for image block commit.
+#
+# Copyright (C) 2012 IBM, Corp.
+# Copyright (C) 2012 Red Hat, Inc.
+#
+# This program is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation; either version 2 of the License, or
+# (at your option) any later version.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with this program. If not, see <http://www.gnu.org/licenses/>.
+#
+# Test for live block commit
+# Derived from Image Streaming Test 030
+
+import time
+import os
+import iotests
+from iotests import qemu_img, qemu_io
+import struct
+
+backing_img = os.path.join(iotests.test_dir, 'backing.img')
+mid_img = os.path.join(iotests.test_dir, 'mid.img')
+test_img = os.path.join(iotests.test_dir, 'test.img')
+
+class ImageCommitTestCase(iotests.QMPTestCase):
+ '''Abstract base class for image commit test cases'''
+
+ def assert_no_active_commit(self):
+ result = self.vm.qmp('query-block-jobs')
+ self.assert_qmp(result, 'return', [])
+
+ def cancel_and_wait(self, drive='drive0'):
+ '''Cancel a block job and wait for it to finish'''
+ result = self.vm.qmp('block-job-cancel', device=drive)
+ self.assert_qmp(result, 'return', {})
+
+ cancelled = False
+ while not cancelled:
+ for event in self.vm.get_qmp_events(wait=True):
+ if event['event'] == 'BLOCK_JOB_CANCELLED':
+ self.assert_qmp(event, 'data/type', 'commit')
+ self.assert_qmp(event, 'data/device', drive)
+ cancelled = True
+
+ self.assert_no_active_commit()
+
+ def create_image(self, name, size):
+ file = open(name, 'w')
+ i = 0
+ while i < size:
+ sector = struct.pack('>l504xl', i / 512, i / 512)
+ file.write(sector)
+ i = i + 512
+ file.close()
+
+
+class TestSingleDrive(ImageCommitTestCase):
+ image_len = 1 * 1024 * 1024
+ test_len = 1 * 1024 * 256
+
+ def setUp(self):
+ self.create_image(backing_img, TestSingleDrive.image_len)
+ qemu_img('create', '-f', iotests.imgfmt, '-o', 'backing_file=%s' % backing_img, mid_img)
+ qemu_img('create', '-f', iotests.imgfmt, '-o', 'backing_file=%s' % mid_img, test_img)
+ qemu_io('-c', 'write -P 0xab 0 524288', backing_img)
+ qemu_io('-c', 'write -P 0xef 524288 524288', mid_img)
+ self.vm = iotests.VM().add_drive(test_img)
+ self.vm.launch()
+
+ def tearDown(self):
+ self.vm.shutdown()
+ os.remove(test_img)
+ os.remove(mid_img)
+ os.remove(backing_img)
+
+ def test_commit(self):
+ self.assert_no_active_commit()
+ result = self.vm.qmp('block-commit', device='drive0', top='%s' % mid_img)
+ self.assert_qmp(result, 'return', {})
+
+ completed = False
+ while not completed:
+ for event in self.vm.get_qmp_events(wait=True):
+ if event['event'] == 'BLOCK_JOB_COMPLETED':
+ self.assert_qmp(event, 'data/type', 'commit')
+ self.assert_qmp(event, 'data/device', 'drive0')
+ self.assert_qmp(event, 'data/offset', self.image_len)
+ self.assert_qmp(event, 'data/len', self.image_len)
+ completed = True
+
+ self.assert_no_active_commit()
+ self.vm.shutdown()
+
+ self.assertEqual(-1, qemu_io('-c', 'read -P 0xab 0 524288', backing_img).find("verification failed"))
+ self.assertEqual(-1, qemu_io('-c', 'read -P 0xef 524288 524288', backing_img).find("verification failed"))
+
+ def test_device_not_found(self):
+ result = self.vm.qmp('block-commit', device='nonexistent', top='%s' % mid_img)
+ self.assert_qmp(result, 'error/class', 'DeviceNotFound')
+
+ def test_top_same_base(self):
+ self.assert_no_active_commit()
+ result = self.vm.qmp('block-commit', device='drive0', top='%s' % backing_img, base='%s' % backing_img)
+ self.assert_qmp(result, 'error/class', 'GenericError')
+ self.assert_qmp(result, 'error/desc', 'Invalid files for merge: top and base are the same')
+
+ def test_top_invalid(self):
+ self.assert_no_active_commit()
+ result = self.vm.qmp('block-commit', device='drive0', top='badfile', base='%s' % backing_img)
+ self.assert_qmp(result, 'error/class', 'GenericError')
+ self.assert_qmp(result, 'error/desc', 'Top image file badfile not found')
+
+ def test_base_invalid(self):
+ self.assert_no_active_commit()
+ result = self.vm.qmp('block-commit', device='drive0', top='%s' % mid_img, base='badfile')
+ self.assert_qmp(result, 'error/class', 'GenericError')
+ self.assert_qmp(result, 'error/desc', 'Base \'badfile\' not found')
+
+ def test_top_is_active(self):
+ self.assert_no_active_commit()
+ result = self.vm.qmp('block-commit', device='drive0', top='%s' % test_img, base='%s' % backing_img)
+ self.assert_qmp(result, 'error/class', 'GenericError')
+ self.assert_qmp(result, 'error/desc', 'Top image as the active layer is currently unsupported')
+
+ def test_top_and_base_reversed(self):
+ self.assert_no_active_commit()
+ result = self.vm.qmp('block-commit', device='drive0', top='%s' % backing_img, base='%s' % mid_img)
+ self.assert_qmp(result, 'error/class', 'GenericError')
+ self.assert_qmp(result, 'error/desc', 'Base (%(1)s) is not reachable from top (%(2)s)' % {"1" : mid_img, "2" : backing_img})
+
+ def test_top_omitted(self):
+ self.assert_no_active_commit()
+ result = self.vm.qmp('block-commit', device='drive0')
+ self.assert_qmp(result, 'error/class', 'GenericError')
+ self.assert_qmp(result, 'error/desc', "Parameter 'top' is missing")
+
+
+class TestSetSpeed(ImageCommitTestCase):
+ image_len = 80 * 1024 * 1024 # MB
+
+ def setUp(self):
+ qemu_img('create', backing_img, str(TestSetSpeed.image_len))
+ qemu_img('create', '-f', iotests.imgfmt, '-o', 'backing_file=%s' % backing_img, mid_img)
+ qemu_img('create', '-f', iotests.imgfmt, '-o', 'backing_file=%s' % mid_img, test_img)
+ self.vm = iotests.VM().add_drive(test_img)
+ self.vm.launch()
+
+ def tearDown(self):
+ self.vm.shutdown()
+ os.remove(test_img)
+ os.remove(mid_img)
+ os.remove(backing_img)
+
+ def test_set_speed(self):
+ self.assert_no_active_commit()
+
+ result = self.vm.qmp('block-commit', device='drive0', top=mid_img, speed=1024 * 1024)
+ self.assert_qmp(result, 'return', {})
+
+ # Ensure the speed we set was accepted
+ result = self.vm.qmp('query-block-jobs')
+ self.assert_qmp(result, 'return[0]/device', 'drive0')
+ self.assert_qmp(result, 'return[0]/speed', 1024 * 1024)
+
+ self.cancel_and_wait()
+
+
+if __name__ == '__main__':
+ iotests.main(supported_fmts=['qcow2', 'qed'])
diff --git a/tests/qemu-iotests/040.out b/tests/qemu-iotests/040.out
new file mode 100644
index 0000000000..dae404e278
--- /dev/null
+++ b/tests/qemu-iotests/040.out
@@ -0,0 +1,5 @@
+.........
+----------------------------------------------------------------------
+Ran 9 tests
+
+OK
diff --git a/tests/qemu-iotests/group b/tests/qemu-iotests/group
index ebb5ca4b41..66d2ba9689 100644
--- a/tests/qemu-iotests/group
+++ b/tests/qemu-iotests/group
@@ -36,7 +36,7 @@
027 rw auto quick
028 rw backing auto
029 rw auto quick
-030 rw auto
+030 rw auto backing
031 rw auto quick
032 rw auto
033 rw auto
@@ -46,3 +46,4 @@
037 rw auto backing
038 rw auto backing
039 rw auto
+040 rw auto
diff --git a/tests/qemu-iotests/iotests.py b/tests/qemu-iotests/iotests.py
index e05b1d640b..3c60b2d163 100644
--- a/tests/qemu-iotests/iotests.py
+++ b/tests/qemu-iotests/iotests.py
@@ -19,6 +19,7 @@
import os
import re
import subprocess
+import string
import unittest
import sys; sys.path.append(os.path.join(os.path.dirname(__file__), '..', '..', 'QMP'))
import qmp
@@ -96,9 +97,14 @@ class VM(object):
os.remove(self._qemu_log_path)
self._popen = None
+ underscore_to_dash = string.maketrans('_', '-')
def qmp(self, cmd, **args):
'''Invoke a QMP command and return the result dict'''
- return self._qmp.cmd(cmd, args=args)
+ qmp_args = dict()
+ for k in args.keys():
+ qmp_args[k.translate(self.underscore_to_dash)] = args[k]
+
+ return self._qmp.cmd(cmd, args=qmp_args)
def get_qmp_events(self, wait=False):
'''Poll for queued QMP events and return a list of dicts'''
@@ -132,6 +138,13 @@ class QMPTestCase(unittest.TestCase):
self.fail('invalid index "%s" in path "%s" in "%s"' % (idx, path, str(d)))
return d
+ def assert_qmp_absent(self, d, path):
+ try:
+ result = self.dictpath(d, path)
+ except AssertionError:
+ return
+ self.fail('path "%s" has value "%s"' % (path, str(result)))
+
def assert_qmp(self, d, path, value):
'''Assert that the value for a specific path in a QMP dict matches'''
result = self.dictpath(d, path)
diff --git a/trace-events b/trace-events
index f5b5097552..42b66f19f4 100644
--- a/trace-events
+++ b/trace-events
@@ -74,10 +74,14 @@ bdrv_co_do_copy_on_readv(void *bs, int64_t sector_num, int nb_sectors, int64_t c
# block/stream.c
stream_one_iteration(void *s, int64_t sector_num, int nb_sectors, int is_allocated) "s %p sector_num %"PRId64" nb_sectors %d is_allocated %d"
stream_start(void *bs, void *base, void *s, void *co, void *opaque) "bs %p base %p s %p co %p opaque %p"
+commit_one_iteration(void *s, int64_t sector_num, int nb_sectors, int is_allocated) "s %p sector_num %"PRId64" nb_sectors %d is_allocated %d"
+commit_start(void *bs, void *base, void *top, void *s, void *co, void *opaque) "bs %p base %p top %p s %p co %p opaque %p"
# blockdev.c
qmp_block_job_cancel(void *job) "job %p"
-block_stream_cb(void *bs, void *job, int ret) "bs %p job %p ret %d"
+qmp_block_job_pause(void *job) "job %p"
+qmp_block_job_resume(void *job) "job %p"
+block_job_cb(void *bs, void *job, int ret) "bs %p job %p ret %d"
qmp_block_stream(void *bs, void *job) "bs %p job %p"
# hw/virtio-blk.c
diff --git a/uri.c b/uri.c
new file mode 100644
index 0000000000..dd922de339
--- /dev/null
+++ b/uri.c
@@ -0,0 +1,2249 @@
+/**
+ * uri.c: set of generic URI related routines
+ *
+ * Reference: RFCs 3986, 2732 and 2373
+ *
+ * Copyright (C) 1998-2003 Daniel Veillard. All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * DANIEL VEILLARD BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER
+ * IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ * Except as contained in this notice, the name of Daniel Veillard shall not
+ * be used in advertising or otherwise to promote the sale, use or other
+ * dealings in this Software without prior written authorization from him.
+ *
+ * daniel@veillard.com
+ *
+ **
+ *
+ * Copyright (C) 2007, 2009-2010 Red Hat, Inc.
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+ *
+ * Authors:
+ * Richard W.M. Jones <rjones@redhat.com>
+ *
+ */
+
+#include <glib.h>
+#include <string.h>
+#include <stdio.h>
+
+#include "uri.h"
+
+static void uri_clean(URI *uri);
+
+/*
+ * Old rule from 2396 used in legacy handling code
+ * alpha = lowalpha | upalpha
+ */
+#define IS_ALPHA(x) (IS_LOWALPHA(x) || IS_UPALPHA(x))
+
+
+/*
+ * lowalpha = "a" | "b" | "c" | "d" | "e" | "f" | "g" | "h" | "i" | "j" |
+ * "k" | "l" | "m" | "n" | "o" | "p" | "q" | "r" | "s" | "t" |
+ * "u" | "v" | "w" | "x" | "y" | "z"
+ */
+
+#define IS_LOWALPHA(x) (((x) >= 'a') && ((x) <= 'z'))
+
+/*
+ * upalpha = "A" | "B" | "C" | "D" | "E" | "F" | "G" | "H" | "I" | "J" |
+ * "K" | "L" | "M" | "N" | "O" | "P" | "Q" | "R" | "S" | "T" |
+ * "U" | "V" | "W" | "X" | "Y" | "Z"
+ */
+#define IS_UPALPHA(x) (((x) >= 'A') && ((x) <= 'Z'))
+
+#ifdef IS_DIGIT
+#undef IS_DIGIT
+#endif
+/*
+ * digit = "0" | "1" | "2" | "3" | "4" | "5" | "6" | "7" | "8" | "9"
+ */
+#define IS_DIGIT(x) (((x) >= '0') && ((x) <= '9'))
+
+/*
+ * alphanum = alpha | digit
+ */
+
+#define IS_ALPHANUM(x) (IS_ALPHA(x) || IS_DIGIT(x))
+
+/*
+ * mark = "-" | "_" | "." | "!" | "~" | "*" | "'" | "(" | ")"
+ */
+
+#define IS_MARK(x) (((x) == '-') || ((x) == '_') || ((x) == '.') || \
+ ((x) == '!') || ((x) == '~') || ((x) == '*') || ((x) == '\'') || \
+ ((x) == '(') || ((x) == ')'))
+
+/*
+ * unwise = "{" | "}" | "|" | "\" | "^" | "`"
+ */
+
+#define IS_UNWISE(p) \
+ (((*(p) == '{')) || ((*(p) == '}')) || ((*(p) == '|')) || \
+ ((*(p) == '\\')) || ((*(p) == '^')) || ((*(p) == '[')) || \
+ ((*(p) == ']')) || ((*(p) == '`')))
+/*
+ * reserved = ";" | "/" | "?" | ":" | "@" | "&" | "=" | "+" | "$" | "," |
+ * "[" | "]"
+ */
+
+#define IS_RESERVED(x) (((x) == ';') || ((x) == '/') || ((x) == '?') || \
+ ((x) == ':') || ((x) == '@') || ((x) == '&') || ((x) == '=') || \
+ ((x) == '+') || ((x) == '$') || ((x) == ',') || ((x) == '[') || \
+ ((x) == ']'))
+
+/*
+ * unreserved = alphanum | mark
+ */
+
+#define IS_UNRESERVED(x) (IS_ALPHANUM(x) || IS_MARK(x))
+
+/*
+ * Skip to next pointer char, handle escaped sequences
+ */
+
+#define NEXT(p) ((*p == '%')? p += 3 : p++)
+
+/*
+ * Productions from the spec.
+ *
+ * authority = server | reg_name
+ * reg_name = 1*( unreserved | escaped | "$" | "," |
+ * ";" | ":" | "@" | "&" | "=" | "+" )
+ *
+ * path = [ abs_path | opaque_part ]
+ */
+
+
+/************************************************************************
+ * *
+ * RFC 3986 parser *
+ * *
+ ************************************************************************/
+
+#define ISA_DIGIT(p) ((*(p) >= '0') && (*(p) <= '9'))
+#define ISA_ALPHA(p) (((*(p) >= 'a') && (*(p) <= 'z')) || \
+ ((*(p) >= 'A') && (*(p) <= 'Z')))
+#define ISA_HEXDIG(p) \
+ (ISA_DIGIT(p) || ((*(p) >= 'a') && (*(p) <= 'f')) || \
+ ((*(p) >= 'A') && (*(p) <= 'F')))
+
+/*
+ * sub-delims = "!" / "$" / "&" / "'" / "(" / ")"
+ * / "*" / "+" / "," / ";" / "="
+ */
+#define ISA_SUB_DELIM(p) \
+ (((*(p) == '!')) || ((*(p) == '$')) || ((*(p) == '&')) || \
+ ((*(p) == '(')) || ((*(p) == ')')) || ((*(p) == '*')) || \
+ ((*(p) == '+')) || ((*(p) == ',')) || ((*(p) == ';')) || \
+ ((*(p) == '=')) || ((*(p) == '\'')))
+
+/*
+ * gen-delims = ":" / "/" / "?" / "#" / "[" / "]" / "@"
+ */
+#define ISA_GEN_DELIM(p) \
+ (((*(p) == ':')) || ((*(p) == '/')) || ((*(p) == '?')) || \
+ ((*(p) == '#')) || ((*(p) == '[')) || ((*(p) == ']')) || \
+ ((*(p) == '@')))
+
+/*
+ * reserved = gen-delims / sub-delims
+ */
+#define ISA_RESERVED(p) (ISA_GEN_DELIM(p) || (ISA_SUB_DELIM(p)))
+
+/*
+ * unreserved = ALPHA / DIGIT / "-" / "." / "_" / "~"
+ */
+#define ISA_UNRESERVED(p) \
+ ((ISA_ALPHA(p)) || (ISA_DIGIT(p)) || ((*(p) == '-')) || \
+ ((*(p) == '.')) || ((*(p) == '_')) || ((*(p) == '~')))
+
+/*
+ * pct-encoded = "%" HEXDIG HEXDIG
+ */
+#define ISA_PCT_ENCODED(p) \
+ ((*(p) == '%') && (ISA_HEXDIG(p + 1)) && (ISA_HEXDIG(p + 2)))
+
+/*
+ * pchar = unreserved / pct-encoded / sub-delims / ":" / "@"
+ */
+#define ISA_PCHAR(p) \
+ (ISA_UNRESERVED(p) || ISA_PCT_ENCODED(p) || ISA_SUB_DELIM(p) || \
+ ((*(p) == ':')) || ((*(p) == '@')))
+
+/**
+ * rfc3986_parse_scheme:
+ * @uri: pointer to an URI structure
+ * @str: pointer to the string to analyze
+ *
+ * Parse an URI scheme
+ *
+ * ALPHA *( ALPHA / DIGIT / "+" / "-" / "." )
+ *
+ * Returns 0 or the error code
+ */
+static int
+rfc3986_parse_scheme(URI *uri, const char **str) {
+ const char *cur;
+
+ if (str == NULL)
+ return(-1);
+
+ cur = *str;
+ if (!ISA_ALPHA(cur))
+ return(2);
+ cur++;
+ while (ISA_ALPHA(cur) || ISA_DIGIT(cur) ||
+ (*cur == '+') || (*cur == '-') || (*cur == '.')) cur++;
+ if (uri != NULL) {
+ if (uri->scheme != NULL) g_free(uri->scheme);
+ uri->scheme = g_strndup(*str, cur - *str);
+ }
+ *str = cur;
+ return(0);
+}
+
+/**
+ * rfc3986_parse_fragment:
+ * @uri: pointer to an URI structure
+ * @str: pointer to the string to analyze
+ *
+ * Parse the query part of an URI
+ *
+ * fragment = *( pchar / "/" / "?" )
+ * NOTE: the strict syntax as defined by 3986 does not allow '[' and ']'
+ * in the fragment identifier but this is used very broadly for
+ * xpointer scheme selection, so we are allowing it here to not break
+ * for example all the DocBook processing chains.
+ *
+ * Returns 0 or the error code
+ */
+static int
+rfc3986_parse_fragment(URI *uri, const char **str)
+{
+ const char *cur;
+
+ if (str == NULL)
+ return (-1);
+
+ cur = *str;
+
+ while ((ISA_PCHAR(cur)) || (*cur == '/') || (*cur == '?') ||
+ (*cur == '[') || (*cur == ']') ||
+ ((uri != NULL) && (uri->cleanup & 1) && (IS_UNWISE(cur))))
+ NEXT(cur);
+ if (uri != NULL) {
+ if (uri->fragment != NULL)
+ g_free(uri->fragment);
+ if (uri->cleanup & 2)
+ uri->fragment = g_strndup(*str, cur - *str);
+ else
+ uri->fragment = uri_string_unescape(*str, cur - *str, NULL);
+ }
+ *str = cur;
+ return (0);
+}
+
+/**
+ * rfc3986_parse_query:
+ * @uri: pointer to an URI structure
+ * @str: pointer to the string to analyze
+ *
+ * Parse the query part of an URI
+ *
+ * query = *uric
+ *
+ * Returns 0 or the error code
+ */
+static int
+rfc3986_parse_query(URI *uri, const char **str)
+{
+ const char *cur;
+
+ if (str == NULL)
+ return (-1);
+
+ cur = *str;
+
+ while ((ISA_PCHAR(cur)) || (*cur == '/') || (*cur == '?') ||
+ ((uri != NULL) && (uri->cleanup & 1) && (IS_UNWISE(cur))))
+ NEXT(cur);
+ if (uri != NULL) {
+ if (uri->query != NULL)
+ g_free (uri->query);
+ uri->query = g_strndup (*str, cur - *str);
+ }
+ *str = cur;
+ return (0);
+}
+
+/**
+ * rfc3986_parse_port:
+ * @uri: pointer to an URI structure
+ * @str: the string to analyze
+ *
+ * Parse a port part and fills in the appropriate fields
+ * of the @uri structure
+ *
+ * port = *DIGIT
+ *
+ * Returns 0 or the error code
+ */
+static int
+rfc3986_parse_port(URI *uri, const char **str)
+{
+ const char *cur = *str;
+
+ if (ISA_DIGIT(cur)) {
+ if (uri != NULL)
+ uri->port = 0;
+ while (ISA_DIGIT(cur)) {
+ if (uri != NULL)
+ uri->port = uri->port * 10 + (*cur - '0');
+ cur++;
+ }
+ *str = cur;
+ return(0);
+ }
+ return(1);
+}
+
+/**
+ * rfc3986_parse_user_info:
+ * @uri: pointer to an URI structure
+ * @str: the string to analyze
+ *
+ * Parse an user informations part and fills in the appropriate fields
+ * of the @uri structure
+ *
+ * userinfo = *( unreserved / pct-encoded / sub-delims / ":" )
+ *
+ * Returns 0 or the error code
+ */
+static int
+rfc3986_parse_user_info(URI *uri, const char **str)
+{
+ const char *cur;
+
+ cur = *str;
+ while (ISA_UNRESERVED(cur) || ISA_PCT_ENCODED(cur) ||
+ ISA_SUB_DELIM(cur) || (*cur == ':'))
+ NEXT(cur);
+ if (*cur == '@') {
+ if (uri != NULL) {
+ if (uri->user != NULL) g_free(uri->user);
+ if (uri->cleanup & 2)
+ uri->user = g_strndup(*str, cur - *str);
+ else
+ uri->user = uri_string_unescape(*str, cur - *str, NULL);
+ }
+ *str = cur;
+ return(0);
+ }
+ return(1);
+}
+
+/**
+ * rfc3986_parse_dec_octet:
+ * @str: the string to analyze
+ *
+ * dec-octet = DIGIT ; 0-9
+ * / %x31-39 DIGIT ; 10-99
+ * / "1" 2DIGIT ; 100-199
+ * / "2" %x30-34 DIGIT ; 200-249
+ * / "25" %x30-35 ; 250-255
+ *
+ * Skip a dec-octet.
+ *
+ * Returns 0 if found and skipped, 1 otherwise
+ */
+static int
+rfc3986_parse_dec_octet(const char **str) {
+ const char *cur = *str;
+
+ if (!(ISA_DIGIT(cur)))
+ return(1);
+ if (!ISA_DIGIT(cur+1))
+ cur++;
+ else if ((*cur != '0') && (ISA_DIGIT(cur + 1)) && (!ISA_DIGIT(cur+2)))
+ cur += 2;
+ else if ((*cur == '1') && (ISA_DIGIT(cur + 1)) && (ISA_DIGIT(cur + 2)))
+ cur += 3;
+ else if ((*cur == '2') && (*(cur + 1) >= '0') &&
+ (*(cur + 1) <= '4') && (ISA_DIGIT(cur + 2)))
+ cur += 3;
+ else if ((*cur == '2') && (*(cur + 1) == '5') &&
+ (*(cur + 2) >= '0') && (*(cur + 1) <= '5'))
+ cur += 3;
+ else
+ return(1);
+ *str = cur;
+ return(0);
+}
+/**
+ * rfc3986_parse_host:
+ * @uri: pointer to an URI structure
+ * @str: the string to analyze
+ *
+ * Parse an host part and fills in the appropriate fields
+ * of the @uri structure
+ *
+ * host = IP-literal / IPv4address / reg-name
+ * IP-literal = "[" ( IPv6address / IPvFuture ) "]"
+ * IPv4address = dec-octet "." dec-octet "." dec-octet "." dec-octet
+ * reg-name = *( unreserved / pct-encoded / sub-delims )
+ *
+ * Returns 0 or the error code
+ */
+static int
+rfc3986_parse_host(URI *uri, const char **str)
+{
+ const char *cur = *str;
+ const char *host;
+
+ host = cur;
+ /*
+ * IPv6 and future adressing scheme are enclosed between brackets
+ */
+ if (*cur == '[') {
+ cur++;
+ while ((*cur != ']') && (*cur != 0))
+ cur++;
+ if (*cur != ']')
+ return(1);
+ cur++;
+ goto found;
+ }
+ /*
+ * try to parse an IPv4
+ */
+ if (ISA_DIGIT(cur)) {
+ if (rfc3986_parse_dec_octet(&cur) != 0)
+ goto not_ipv4;
+ if (*cur != '.')
+ goto not_ipv4;
+ cur++;
+ if (rfc3986_parse_dec_octet(&cur) != 0)
+ goto not_ipv4;
+ if (*cur != '.')
+ goto not_ipv4;
+ if (rfc3986_parse_dec_octet(&cur) != 0)
+ goto not_ipv4;
+ if (*cur != '.')
+ goto not_ipv4;
+ if (rfc3986_parse_dec_octet(&cur) != 0)
+ goto not_ipv4;
+ goto found;
+not_ipv4:
+ cur = *str;
+ }
+ /*
+ * then this should be a hostname which can be empty
+ */
+ while (ISA_UNRESERVED(cur) || ISA_PCT_ENCODED(cur) || ISA_SUB_DELIM(cur))
+ NEXT(cur);
+found:
+ if (uri != NULL) {
+ if (uri->authority != NULL) g_free(uri->authority);
+ uri->authority = NULL;
+ if (uri->server != NULL) g_free(uri->server);
+ if (cur != host) {
+ if (uri->cleanup & 2)
+ uri->server = g_strndup(host, cur - host);
+ else
+ uri->server = uri_string_unescape(host, cur - host, NULL);
+ } else
+ uri->server = NULL;
+ }
+ *str = cur;
+ return(0);
+}
+
+/**
+ * rfc3986_parse_authority:
+ * @uri: pointer to an URI structure
+ * @str: the string to analyze
+ *
+ * Parse an authority part and fills in the appropriate fields
+ * of the @uri structure
+ *
+ * authority = [ userinfo "@" ] host [ ":" port ]
+ *
+ * Returns 0 or the error code
+ */
+static int
+rfc3986_parse_authority(URI *uri, const char **str)
+{
+ const char *cur;
+ int ret;
+
+ cur = *str;
+ /*
+ * try to parse an userinfo and check for the trailing @
+ */
+ ret = rfc3986_parse_user_info(uri, &cur);
+ if ((ret != 0) || (*cur != '@'))
+ cur = *str;
+ else
+ cur++;
+ ret = rfc3986_parse_host(uri, &cur);
+ if (ret != 0) return(ret);
+ if (*cur == ':') {
+ cur++;
+ ret = rfc3986_parse_port(uri, &cur);
+ if (ret != 0) return(ret);
+ }
+ *str = cur;
+ return(0);
+}
+
+/**
+ * rfc3986_parse_segment:
+ * @str: the string to analyze
+ * @forbid: an optional forbidden character
+ * @empty: allow an empty segment
+ *
+ * Parse a segment and fills in the appropriate fields
+ * of the @uri structure
+ *
+ * segment = *pchar
+ * segment-nz = 1*pchar
+ * segment-nz-nc = 1*( unreserved / pct-encoded / sub-delims / "@" )
+ * ; non-zero-length segment without any colon ":"
+ *
+ * Returns 0 or the error code
+ */
+static int
+rfc3986_parse_segment(const char **str, char forbid, int empty)
+{
+ const char *cur;
+
+ cur = *str;
+ if (!ISA_PCHAR(cur)) {
+ if (empty)
+ return(0);
+ return(1);
+ }
+ while (ISA_PCHAR(cur) && (*cur != forbid))
+ NEXT(cur);
+ *str = cur;
+ return (0);
+}
+
+/**
+ * rfc3986_parse_path_ab_empty:
+ * @uri: pointer to an URI structure
+ * @str: the string to analyze
+ *
+ * Parse an path absolute or empty and fills in the appropriate fields
+ * of the @uri structure
+ *
+ * path-abempty = *( "/" segment )
+ *
+ * Returns 0 or the error code
+ */
+static int
+rfc3986_parse_path_ab_empty(URI *uri, const char **str)
+{
+ const char *cur;
+ int ret;
+
+ cur = *str;
+
+ while (*cur == '/') {
+ cur++;
+ ret = rfc3986_parse_segment(&cur, 0, 1);
+ if (ret != 0) return(ret);
+ }
+ if (uri != NULL) {
+ if (uri->path != NULL) g_free(uri->path);
+ if (*str != cur) {
+ if (uri->cleanup & 2)
+ uri->path = g_strndup(*str, cur - *str);
+ else
+ uri->path = uri_string_unescape(*str, cur - *str, NULL);
+ } else {
+ uri->path = NULL;
+ }
+ }
+ *str = cur;
+ return (0);
+}
+
+/**
+ * rfc3986_parse_path_absolute:
+ * @uri: pointer to an URI structure
+ * @str: the string to analyze
+ *
+ * Parse an path absolute and fills in the appropriate fields
+ * of the @uri structure
+ *
+ * path-absolute = "/" [ segment-nz *( "/" segment ) ]
+ *
+ * Returns 0 or the error code
+ */
+static int
+rfc3986_parse_path_absolute(URI *uri, const char **str)
+{
+ const char *cur;
+ int ret;
+
+ cur = *str;
+
+ if (*cur != '/')
+ return(1);
+ cur++;
+ ret = rfc3986_parse_segment(&cur, 0, 0);
+ if (ret == 0) {
+ while (*cur == '/') {
+ cur++;
+ ret = rfc3986_parse_segment(&cur, 0, 1);
+ if (ret != 0) return(ret);
+ }
+ }
+ if (uri != NULL) {
+ if (uri->path != NULL) g_free(uri->path);
+ if (cur != *str) {
+ if (uri->cleanup & 2)
+ uri->path = g_strndup(*str, cur - *str);
+ else
+ uri->path = uri_string_unescape(*str, cur - *str, NULL);
+ } else {
+ uri->path = NULL;
+ }
+ }
+ *str = cur;
+ return (0);
+}
+
+/**
+ * rfc3986_parse_path_rootless:
+ * @uri: pointer to an URI structure
+ * @str: the string to analyze
+ *
+ * Parse an path without root and fills in the appropriate fields
+ * of the @uri structure
+ *
+ * path-rootless = segment-nz *( "/" segment )
+ *
+ * Returns 0 or the error code
+ */
+static int
+rfc3986_parse_path_rootless(URI *uri, const char **str)
+{
+ const char *cur;
+ int ret;
+
+ cur = *str;
+
+ ret = rfc3986_parse_segment(&cur, 0, 0);
+ if (ret != 0) return(ret);
+ while (*cur == '/') {
+ cur++;
+ ret = rfc3986_parse_segment(&cur, 0, 1);
+ if (ret != 0) return(ret);
+ }
+ if (uri != NULL) {
+ if (uri->path != NULL) g_free(uri->path);
+ if (cur != *str) {
+ if (uri->cleanup & 2)
+ uri->path = g_strndup(*str, cur - *str);
+ else
+ uri->path = uri_string_unescape(*str, cur - *str, NULL);
+ } else {
+ uri->path = NULL;
+ }
+ }
+ *str = cur;
+ return (0);
+}
+
+/**
+ * rfc3986_parse_path_no_scheme:
+ * @uri: pointer to an URI structure
+ * @str: the string to analyze
+ *
+ * Parse an path which is not a scheme and fills in the appropriate fields
+ * of the @uri structure
+ *
+ * path-noscheme = segment-nz-nc *( "/" segment )
+ *
+ * Returns 0 or the error code
+ */
+static int
+rfc3986_parse_path_no_scheme(URI *uri, const char **str)
+{
+ const char *cur;
+ int ret;
+
+ cur = *str;
+
+ ret = rfc3986_parse_segment(&cur, ':', 0);
+ if (ret != 0) return(ret);
+ while (*cur == '/') {
+ cur++;
+ ret = rfc3986_parse_segment(&cur, 0, 1);
+ if (ret != 0) return(ret);
+ }
+ if (uri != NULL) {
+ if (uri->path != NULL) g_free(uri->path);
+ if (cur != *str) {
+ if (uri->cleanup & 2)
+ uri->path = g_strndup(*str, cur - *str);
+ else
+ uri->path = uri_string_unescape(*str, cur - *str, NULL);
+ } else {
+ uri->path = NULL;
+ }
+ }
+ *str = cur;
+ return (0);
+}
+
+/**
+ * rfc3986_parse_hier_part:
+ * @uri: pointer to an URI structure
+ * @str: the string to analyze
+ *
+ * Parse an hierarchical part and fills in the appropriate fields
+ * of the @uri structure
+ *
+ * hier-part = "//" authority path-abempty
+ * / path-absolute
+ * / path-rootless
+ * / path-empty
+ *
+ * Returns 0 or the error code
+ */
+static int
+rfc3986_parse_hier_part(URI *uri, const char **str)
+{
+ const char *cur;
+ int ret;
+
+ cur = *str;
+
+ if ((*cur == '/') && (*(cur + 1) == '/')) {
+ cur += 2;
+ ret = rfc3986_parse_authority(uri, &cur);
+ if (ret != 0) return(ret);
+ ret = rfc3986_parse_path_ab_empty(uri, &cur);
+ if (ret != 0) return(ret);
+ *str = cur;
+ return(0);
+ } else if (*cur == '/') {
+ ret = rfc3986_parse_path_absolute(uri, &cur);
+ if (ret != 0) return(ret);
+ } else if (ISA_PCHAR(cur)) {
+ ret = rfc3986_parse_path_rootless(uri, &cur);
+ if (ret != 0) return(ret);
+ } else {
+ /* path-empty is effectively empty */
+ if (uri != NULL) {
+ if (uri->path != NULL) g_free(uri->path);
+ uri->path = NULL;
+ }
+ }
+ *str = cur;
+ return (0);
+}
+
+/**
+ * rfc3986_parse_relative_ref:
+ * @uri: pointer to an URI structure
+ * @str: the string to analyze
+ *
+ * Parse an URI string and fills in the appropriate fields
+ * of the @uri structure
+ *
+ * relative-ref = relative-part [ "?" query ] [ "#" fragment ]
+ * relative-part = "//" authority path-abempty
+ * / path-absolute
+ * / path-noscheme
+ * / path-empty
+ *
+ * Returns 0 or the error code
+ */
+static int
+rfc3986_parse_relative_ref(URI *uri, const char *str) {
+ int ret;
+
+ if ((*str == '/') && (*(str + 1) == '/')) {
+ str += 2;
+ ret = rfc3986_parse_authority(uri, &str);
+ if (ret != 0) return(ret);
+ ret = rfc3986_parse_path_ab_empty(uri, &str);
+ if (ret != 0) return(ret);
+ } else if (*str == '/') {
+ ret = rfc3986_parse_path_absolute(uri, &str);
+ if (ret != 0) return(ret);
+ } else if (ISA_PCHAR(str)) {
+ ret = rfc3986_parse_path_no_scheme(uri, &str);
+ if (ret != 0) return(ret);
+ } else {
+ /* path-empty is effectively empty */
+ if (uri != NULL) {
+ if (uri->path != NULL) g_free(uri->path);
+ uri->path = NULL;
+ }
+ }
+
+ if (*str == '?') {
+ str++;
+ ret = rfc3986_parse_query(uri, &str);
+ if (ret != 0) return(ret);
+ }
+ if (*str == '#') {
+ str++;
+ ret = rfc3986_parse_fragment(uri, &str);
+ if (ret != 0) return(ret);
+ }
+ if (*str != 0) {
+ uri_clean(uri);
+ return(1);
+ }
+ return(0);
+}
+
+
+/**
+ * rfc3986_parse:
+ * @uri: pointer to an URI structure
+ * @str: the string to analyze
+ *
+ * Parse an URI string and fills in the appropriate fields
+ * of the @uri structure
+ *
+ * scheme ":" hier-part [ "?" query ] [ "#" fragment ]
+ *
+ * Returns 0 or the error code
+ */
+static int
+rfc3986_parse(URI *uri, const char *str) {
+ int ret;
+
+ ret = rfc3986_parse_scheme(uri, &str);
+ if (ret != 0) return(ret);
+ if (*str != ':') {
+ return(1);
+ }
+ str++;
+ ret = rfc3986_parse_hier_part(uri, &str);
+ if (ret != 0) return(ret);
+ if (*str == '?') {
+ str++;
+ ret = rfc3986_parse_query(uri, &str);
+ if (ret != 0) return(ret);
+ }
+ if (*str == '#') {
+ str++;
+ ret = rfc3986_parse_fragment(uri, &str);
+ if (ret != 0) return(ret);
+ }
+ if (*str != 0) {
+ uri_clean(uri);
+ return(1);
+ }
+ return(0);
+}
+
+/**
+ * rfc3986_parse_uri_reference:
+ * @uri: pointer to an URI structure
+ * @str: the string to analyze
+ *
+ * Parse an URI reference string and fills in the appropriate fields
+ * of the @uri structure
+ *
+ * URI-reference = URI / relative-ref
+ *
+ * Returns 0 or the error code
+ */
+static int
+rfc3986_parse_uri_reference(URI *uri, const char *str) {
+ int ret;
+
+ if (str == NULL)
+ return(-1);
+ uri_clean(uri);
+
+ /*
+ * Try first to parse absolute refs, then fallback to relative if
+ * it fails.
+ */
+ ret = rfc3986_parse(uri, str);
+ if (ret != 0) {
+ uri_clean(uri);
+ ret = rfc3986_parse_relative_ref(uri, str);
+ if (ret != 0) {
+ uri_clean(uri);
+ return(ret);
+ }
+ }
+ return(0);
+}
+
+/**
+ * uri_parse:
+ * @str: the URI string to analyze
+ *
+ * Parse an URI based on RFC 3986
+ *
+ * URI-reference = [ absoluteURI | relativeURI ] [ "#" fragment ]
+ *
+ * Returns a newly built URI or NULL in case of error
+ */
+URI *
+uri_parse(const char *str) {
+ URI *uri;
+ int ret;
+
+ if (str == NULL)
+ return(NULL);
+ uri = uri_new();
+ if (uri != NULL) {
+ ret = rfc3986_parse_uri_reference(uri, str);
+ if (ret) {
+ uri_free(uri);
+ return(NULL);
+ }
+ }
+ return(uri);
+}
+
+/**
+ * uri_parse_into:
+ * @uri: pointer to an URI structure
+ * @str: the string to analyze
+ *
+ * Parse an URI reference string based on RFC 3986 and fills in the
+ * appropriate fields of the @uri structure
+ *
+ * URI-reference = URI / relative-ref
+ *
+ * Returns 0 or the error code
+ */
+int
+uri_parse_into(URI *uri, const char *str) {
+ return(rfc3986_parse_uri_reference(uri, str));
+}
+
+/**
+ * uri_parse_raw:
+ * @str: the URI string to analyze
+ * @raw: if 1 unescaping of URI pieces are disabled
+ *
+ * Parse an URI but allows to keep intact the original fragments.
+ *
+ * URI-reference = URI / relative-ref
+ *
+ * Returns a newly built URI or NULL in case of error
+ */
+URI *
+uri_parse_raw(const char *str, int raw) {
+ URI *uri;
+ int ret;
+
+ if (str == NULL)
+ return(NULL);
+ uri = uri_new();
+ if (uri != NULL) {
+ if (raw) {
+ uri->cleanup |= 2;
+ }
+ ret = uri_parse_into(uri, str);
+ if (ret) {
+ uri_free(uri);
+ return(NULL);
+ }
+ }
+ return(uri);
+}
+
+/************************************************************************
+ * *
+ * Generic URI structure functions *
+ * *
+ ************************************************************************/
+
+/**
+ * uri_new:
+ *
+ * Simply creates an empty URI
+ *
+ * Returns the new structure or NULL in case of error
+ */
+URI *
+uri_new(void) {
+ URI *ret;
+
+ ret = (URI *) g_malloc(sizeof(URI));
+ memset(ret, 0, sizeof(URI));
+ return(ret);
+}
+
+/**
+ * realloc2n:
+ *
+ * Function to handle properly a reallocation when saving an URI
+ * Also imposes some limit on the length of an URI string output
+ */
+static char *
+realloc2n(char *ret, int *max) {
+ char *temp;
+ int tmp;
+
+ tmp = *max * 2;
+ temp = g_realloc(ret, (tmp + 1));
+ *max = tmp;
+ return(temp);
+}
+
+/**
+ * uri_to_string:
+ * @uri: pointer to an URI
+ *
+ * Save the URI as an escaped string
+ *
+ * Returns a new string (to be deallocated by caller)
+ */
+char *
+uri_to_string(URI *uri) {
+ char *ret = NULL;
+ char *temp;
+ const char *p;
+ int len;
+ int max;
+
+ if (uri == NULL) return(NULL);
+
+
+ max = 80;
+ ret = g_malloc(max + 1);
+ len = 0;
+
+ if (uri->scheme != NULL) {
+ p = uri->scheme;
+ while (*p != 0) {
+ if (len >= max) {
+ temp = realloc2n(ret, &max);
+ if (temp == NULL) goto mem_error;
+ ret = temp;
+ }
+ ret[len++] = *p++;
+ }
+ if (len >= max) {
+ temp = realloc2n(ret, &max);
+ if (temp == NULL) goto mem_error;
+ ret = temp;
+ }
+ ret[len++] = ':';
+ }
+ if (uri->opaque != NULL) {
+ p = uri->opaque;
+ while (*p != 0) {
+ if (len + 3 >= max) {
+ temp = realloc2n(ret, &max);
+ if (temp == NULL) goto mem_error;
+ ret = temp;
+ }
+ if (IS_RESERVED(*(p)) || IS_UNRESERVED(*(p)))
+ ret[len++] = *p++;
+ else {
+ int val = *(unsigned char *)p++;
+ int hi = val / 0x10, lo = val % 0x10;
+ ret[len++] = '%';
+ ret[len++] = hi + (hi > 9? 'A'-10 : '0');
+ ret[len++] = lo + (lo > 9? 'A'-10 : '0');
+ }
+ }
+ } else {
+ if (uri->server != NULL) {
+ if (len + 3 >= max) {
+ temp = realloc2n(ret, &max);
+ if (temp == NULL) goto mem_error;
+ ret = temp;
+ }
+ ret[len++] = '/';
+ ret[len++] = '/';
+ if (uri->user != NULL) {
+ p = uri->user;
+ while (*p != 0) {
+ if (len + 3 >= max) {
+ temp = realloc2n(ret, &max);
+ if (temp == NULL) goto mem_error;
+ ret = temp;
+ }
+ if ((IS_UNRESERVED(*(p))) ||
+ ((*(p) == ';')) || ((*(p) == ':')) ||
+ ((*(p) == '&')) || ((*(p) == '=')) ||
+ ((*(p) == '+')) || ((*(p) == '$')) ||
+ ((*(p) == ',')))
+ ret[len++] = *p++;
+ else {
+ int val = *(unsigned char *)p++;
+ int hi = val / 0x10, lo = val % 0x10;
+ ret[len++] = '%';
+ ret[len++] = hi + (hi > 9? 'A'-10 : '0');
+ ret[len++] = lo + (lo > 9? 'A'-10 : '0');
+ }
+ }
+ if (len + 3 >= max) {
+ temp = realloc2n(ret, &max);
+ if (temp == NULL) goto mem_error;
+ ret = temp;
+ }
+ ret[len++] = '@';
+ }
+ p = uri->server;
+ while (*p != 0) {
+ if (len >= max) {
+ temp = realloc2n(ret, &max);
+ if (temp == NULL) goto mem_error;
+ ret = temp;
+ }
+ ret[len++] = *p++;
+ }
+ if (uri->port > 0) {
+ if (len + 10 >= max) {
+ temp = realloc2n(ret, &max);
+ if (temp == NULL) goto mem_error;
+ ret = temp;
+ }
+ len += snprintf(&ret[len], max - len, ":%d", uri->port);
+ }
+ } else if (uri->authority != NULL) {
+ if (len + 3 >= max) {
+ temp = realloc2n(ret, &max);
+ if (temp == NULL) goto mem_error;
+ ret = temp;
+ }
+ ret[len++] = '/';
+ ret[len++] = '/';
+ p = uri->authority;
+ while (*p != 0) {
+ if (len + 3 >= max) {
+ temp = realloc2n(ret, &max);
+ if (temp == NULL) goto mem_error;
+ ret = temp;
+ }
+ if ((IS_UNRESERVED(*(p))) ||
+ ((*(p) == '$')) || ((*(p) == ',')) || ((*(p) == ';')) ||
+ ((*(p) == ':')) || ((*(p) == '@')) || ((*(p) == '&')) ||
+ ((*(p) == '=')) || ((*(p) == '+')))
+ ret[len++] = *p++;
+ else {
+ int val = *(unsigned char *)p++;
+ int hi = val / 0x10, lo = val % 0x10;
+ ret[len++] = '%';
+ ret[len++] = hi + (hi > 9? 'A'-10 : '0');
+ ret[len++] = lo + (lo > 9? 'A'-10 : '0');
+ }
+ }
+ } else if (uri->scheme != NULL) {
+ if (len + 3 >= max) {
+ temp = realloc2n(ret, &max);
+ if (temp == NULL) goto mem_error;
+ ret = temp;
+ }
+ ret[len++] = '/';
+ ret[len++] = '/';
+ }
+ if (uri->path != NULL) {
+ p = uri->path;
+ /*
+ * the colon in file:///d: should not be escaped or
+ * Windows accesses fail later.
+ */
+ if ((uri->scheme != NULL) &&
+ (p[0] == '/') &&
+ (((p[1] >= 'a') && (p[1] <= 'z')) ||
+ ((p[1] >= 'A') && (p[1] <= 'Z'))) &&
+ (p[2] == ':') &&
+ (!strcmp(uri->scheme, "file"))) {
+ if (len + 3 >= max) {
+ temp = realloc2n(ret, &max);
+ if (temp == NULL) goto mem_error;
+ ret = temp;
+ }
+ ret[len++] = *p++;
+ ret[len++] = *p++;
+ ret[len++] = *p++;
+ }
+ while (*p != 0) {
+ if (len + 3 >= max) {
+ temp = realloc2n(ret, &max);
+ if (temp == NULL) goto mem_error;
+ ret = temp;
+ }
+ if ((IS_UNRESERVED(*(p))) || ((*(p) == '/')) ||
+ ((*(p) == ';')) || ((*(p) == '@')) || ((*(p) == '&')) ||
+ ((*(p) == '=')) || ((*(p) == '+')) || ((*(p) == '$')) ||
+ ((*(p) == ',')))
+ ret[len++] = *p++;
+ else {
+ int val = *(unsigned char *)p++;
+ int hi = val / 0x10, lo = val % 0x10;
+ ret[len++] = '%';
+ ret[len++] = hi + (hi > 9? 'A'-10 : '0');
+ ret[len++] = lo + (lo > 9? 'A'-10 : '0');
+ }
+ }
+ }
+ if (uri->query != NULL) {
+ if (len + 1 >= max) {
+ temp = realloc2n(ret, &max);
+ if (temp == NULL) goto mem_error;
+ ret = temp;
+ }
+ ret[len++] = '?';
+ p = uri->query;
+ while (*p != 0) {
+ if (len + 1 >= max) {
+ temp = realloc2n(ret, &max);
+ if (temp == NULL) goto mem_error;
+ ret = temp;
+ }
+ ret[len++] = *p++;
+ }
+ }
+ }
+ if (uri->fragment != NULL) {
+ if (len + 3 >= max) {
+ temp = realloc2n(ret, &max);
+ if (temp == NULL) goto mem_error;
+ ret = temp;
+ }
+ ret[len++] = '#';
+ p = uri->fragment;
+ while (*p != 0) {
+ if (len + 3 >= max) {
+ temp = realloc2n(ret, &max);
+ if (temp == NULL) goto mem_error;
+ ret = temp;
+ }
+ if ((IS_UNRESERVED(*(p))) || (IS_RESERVED(*(p))))
+ ret[len++] = *p++;
+ else {
+ int val = *(unsigned char *)p++;
+ int hi = val / 0x10, lo = val % 0x10;
+ ret[len++] = '%';
+ ret[len++] = hi + (hi > 9? 'A'-10 : '0');
+ ret[len++] = lo + (lo > 9? 'A'-10 : '0');
+ }
+ }
+ }
+ if (len >= max) {
+ temp = realloc2n(ret, &max);
+ if (temp == NULL) goto mem_error;
+ ret = temp;
+ }
+ ret[len] = 0;
+ return(ret);
+
+mem_error:
+ g_free(ret);
+ return(NULL);
+}
+
+/**
+ * uri_clean:
+ * @uri: pointer to an URI
+ *
+ * Make sure the URI struct is free of content
+ */
+static void
+uri_clean(URI *uri) {
+ if (uri == NULL) return;
+
+ if (uri->scheme != NULL) g_free(uri->scheme);
+ uri->scheme = NULL;
+ if (uri->server != NULL) g_free(uri->server);
+ uri->server = NULL;
+ if (uri->user != NULL) g_free(uri->user);
+ uri->user = NULL;
+ if (uri->path != NULL) g_free(uri->path);
+ uri->path = NULL;
+ if (uri->fragment != NULL) g_free(uri->fragment);
+ uri->fragment = NULL;
+ if (uri->opaque != NULL) g_free(uri->opaque);
+ uri->opaque = NULL;
+ if (uri->authority != NULL) g_free(uri->authority);
+ uri->authority = NULL;
+ if (uri->query != NULL) g_free(uri->query);
+ uri->query = NULL;
+}
+
+/**
+ * uri_free:
+ * @uri: pointer to an URI
+ *
+ * Free up the URI struct
+ */
+void
+uri_free(URI *uri) {
+ uri_clean(uri);
+ g_free(uri);
+}
+
+/************************************************************************
+ * *
+ * Helper functions *
+ * *
+ ************************************************************************/
+
+/**
+ * normalize_uri_path:
+ * @path: pointer to the path string
+ *
+ * Applies the 5 normalization steps to a path string--that is, RFC 2396
+ * Section 5.2, steps 6.c through 6.g.
+ *
+ * Normalization occurs directly on the string, no new allocation is done
+ *
+ * Returns 0 or an error code
+ */
+static int
+normalize_uri_path(char *path) {
+ char *cur, *out;
+
+ if (path == NULL)
+ return(-1);
+
+ /* Skip all initial "/" chars. We want to get to the beginning of the
+ * first non-empty segment.
+ */
+ cur = path;
+ while (cur[0] == '/')
+ ++cur;
+ if (cur[0] == '\0')
+ return(0);
+
+ /* Keep everything we've seen so far. */
+ out = cur;
+
+ /*
+ * Analyze each segment in sequence for cases (c) and (d).
+ */
+ while (cur[0] != '\0') {
+ /*
+ * c) All occurrences of "./", where "." is a complete path segment,
+ * are removed from the buffer string.
+ */
+ if ((cur[0] == '.') && (cur[1] == '/')) {
+ cur += 2;
+ /* '//' normalization should be done at this point too */
+ while (cur[0] == '/')
+ cur++;
+ continue;
+ }
+
+ /*
+ * d) If the buffer string ends with "." as a complete path segment,
+ * that "." is removed.
+ */
+ if ((cur[0] == '.') && (cur[1] == '\0'))
+ break;
+
+ /* Otherwise keep the segment. */
+ while (cur[0] != '/') {
+ if (cur[0] == '\0')
+ goto done_cd;
+ (out++)[0] = (cur++)[0];
+ }
+ /* nomalize // */
+ while ((cur[0] == '/') && (cur[1] == '/'))
+ cur++;
+
+ (out++)[0] = (cur++)[0];
+ }
+ done_cd:
+ out[0] = '\0';
+
+ /* Reset to the beginning of the first segment for the next sequence. */
+ cur = path;
+ while (cur[0] == '/')
+ ++cur;
+ if (cur[0] == '\0')
+ return(0);
+
+ /*
+ * Analyze each segment in sequence for cases (e) and (f).
+ *
+ * e) All occurrences of "<segment>/../", where <segment> is a
+ * complete path segment not equal to "..", are removed from the
+ * buffer string. Removal of these path segments is performed
+ * iteratively, removing the leftmost matching pattern on each
+ * iteration, until no matching pattern remains.
+ *
+ * f) If the buffer string ends with "<segment>/..", where <segment>
+ * is a complete path segment not equal to "..", that
+ * "<segment>/.." is removed.
+ *
+ * To satisfy the "iterative" clause in (e), we need to collapse the
+ * string every time we find something that needs to be removed. Thus,
+ * we don't need to keep two pointers into the string: we only need a
+ * "current position" pointer.
+ */
+ while (1) {
+ char *segp, *tmp;
+
+ /* At the beginning of each iteration of this loop, "cur" points to
+ * the first character of the segment we want to examine.
+ */
+
+ /* Find the end of the current segment. */
+ segp = cur;
+ while ((segp[0] != '/') && (segp[0] != '\0'))
+ ++segp;
+
+ /* If this is the last segment, we're done (we need at least two
+ * segments to meet the criteria for the (e) and (f) cases).
+ */
+ if (segp[0] == '\0')
+ break;
+
+ /* If the first segment is "..", or if the next segment _isn't_ "..",
+ * keep this segment and try the next one.
+ */
+ ++segp;
+ if (((cur[0] == '.') && (cur[1] == '.') && (segp == cur+3))
+ || ((segp[0] != '.') || (segp[1] != '.')
+ || ((segp[2] != '/') && (segp[2] != '\0')))) {
+ cur = segp;
+ continue;
+ }
+
+ /* If we get here, remove this segment and the next one and back up
+ * to the previous segment (if there is one), to implement the
+ * "iteratively" clause. It's pretty much impossible to back up
+ * while maintaining two pointers into the buffer, so just compact
+ * the whole buffer now.
+ */
+
+ /* If this is the end of the buffer, we're done. */
+ if (segp[2] == '\0') {
+ cur[0] = '\0';
+ break;
+ }
+ /* Valgrind complained, strcpy(cur, segp + 3); */
+ /* string will overlap, do not use strcpy */
+ tmp = cur;
+ segp += 3;
+ while ((*tmp++ = *segp++) != 0)
+ ;
+
+ /* If there are no previous segments, then keep going from here. */
+ segp = cur;
+ while ((segp > path) && ((--segp)[0] == '/'))
+ ;
+ if (segp == path)
+ continue;
+
+ /* "segp" is pointing to the end of a previous segment; find it's
+ * start. We need to back up to the previous segment and start
+ * over with that to handle things like "foo/bar/../..". If we
+ * don't do this, then on the first pass we'll remove the "bar/..",
+ * but be pointing at the second ".." so we won't realize we can also
+ * remove the "foo/..".
+ */
+ cur = segp;
+ while ((cur > path) && (cur[-1] != '/'))
+ --cur;
+ }
+ out[0] = '\0';
+
+ /*
+ * g) If the resulting buffer string still begins with one or more
+ * complete path segments of "..", then the reference is
+ * considered to be in error. Implementations may handle this
+ * error by retaining these components in the resolved path (i.e.,
+ * treating them as part of the final URI), by removing them from
+ * the resolved path (i.e., discarding relative levels above the
+ * root), or by avoiding traversal of the reference.
+ *
+ * We discard them from the final path.
+ */
+ if (path[0] == '/') {
+ cur = path;
+ while ((cur[0] == '/') && (cur[1] == '.') && (cur[2] == '.')
+ && ((cur[3] == '/') || (cur[3] == '\0')))
+ cur += 3;
+
+ if (cur != path) {
+ out = path;
+ while (cur[0] != '\0')
+ (out++)[0] = (cur++)[0];
+ out[0] = 0;
+ }
+ }
+
+ return(0);
+}
+
+static int is_hex(char c) {
+ if (((c >= '0') && (c <= '9')) ||
+ ((c >= 'a') && (c <= 'f')) ||
+ ((c >= 'A') && (c <= 'F')))
+ return(1);
+ return(0);
+}
+
+
+/**
+ * uri_string_unescape:
+ * @str: the string to unescape
+ * @len: the length in bytes to unescape (or <= 0 to indicate full string)
+ * @target: optional destination buffer
+ *
+ * Unescaping routine, but does not check that the string is an URI. The
+ * output is a direct unsigned char translation of %XX values (no encoding)
+ * Note that the length of the result can only be smaller or same size as
+ * the input string.
+ *
+ * Returns a copy of the string, but unescaped, will return NULL only in case
+ * of error
+ */
+char *
+uri_string_unescape(const char *str, int len, char *target) {
+ char *ret, *out;
+ const char *in;
+
+ if (str == NULL)
+ return(NULL);
+ if (len <= 0) len = strlen(str);
+ if (len < 0) return(NULL);
+
+ if (target == NULL) {
+ ret = g_malloc(len + 1);
+ } else
+ ret = target;
+ in = str;
+ out = ret;
+ while(len > 0) {
+ if ((len > 2) && (*in == '%') && (is_hex(in[1])) && (is_hex(in[2]))) {
+ in++;
+ if ((*in >= '0') && (*in <= '9'))
+ *out = (*in - '0');
+ else if ((*in >= 'a') && (*in <= 'f'))
+ *out = (*in - 'a') + 10;
+ else if ((*in >= 'A') && (*in <= 'F'))
+ *out = (*in - 'A') + 10;
+ in++;
+ if ((*in >= '0') && (*in <= '9'))
+ *out = *out * 16 + (*in - '0');
+ else if ((*in >= 'a') && (*in <= 'f'))
+ *out = *out * 16 + (*in - 'a') + 10;
+ else if ((*in >= 'A') && (*in <= 'F'))
+ *out = *out * 16 + (*in - 'A') + 10;
+ in++;
+ len -= 3;
+ out++;
+ } else {
+ *out++ = *in++;
+ len--;
+ }
+ }
+ *out = 0;
+ return(ret);
+}
+
+/**
+ * uri_string_escape:
+ * @str: string to escape
+ * @list: exception list string of chars not to escape
+ *
+ * This routine escapes a string to hex, ignoring reserved characters (a-z)
+ * and the characters in the exception list.
+ *
+ * Returns a new escaped string or NULL in case of error.
+ */
+char *
+uri_string_escape(const char *str, const char *list) {
+ char *ret, ch;
+ char *temp;
+ const char *in;
+ int len, out;
+
+ if (str == NULL)
+ return(NULL);
+ if (str[0] == 0)
+ return(g_strdup(str));
+ len = strlen(str);
+ if (!(len > 0)) return(NULL);
+
+ len += 20;
+ ret = g_malloc(len);
+ in = str;
+ out = 0;
+ while(*in != 0) {
+ if (len - out <= 3) {
+ temp = realloc2n(ret, &len);
+ ret = temp;
+ }
+
+ ch = *in;
+
+ if ((ch != '@') && (!IS_UNRESERVED(ch)) && (!strchr(list, ch))) {
+ unsigned char val;
+ ret[out++] = '%';
+ val = ch >> 4;
+ if (val <= 9)
+ ret[out++] = '0' + val;
+ else
+ ret[out++] = 'A' + val - 0xA;
+ val = ch & 0xF;
+ if (val <= 9)
+ ret[out++] = '0' + val;
+ else
+ ret[out++] = 'A' + val - 0xA;
+ in++;
+ } else {
+ ret[out++] = *in++;
+ }
+
+ }
+ ret[out] = 0;
+ return(ret);
+}
+
+/************************************************************************
+ * *
+ * Public functions *
+ * *
+ ************************************************************************/
+
+/**
+ * uri_resolve:
+ * @URI: the URI instance found in the document
+ * @base: the base value
+ *
+ * Computes he final URI of the reference done by checking that
+ * the given URI is valid, and building the final URI using the
+ * base URI. This is processed according to section 5.2 of the
+ * RFC 2396
+ *
+ * 5.2. Resolving Relative References to Absolute Form
+ *
+ * Returns a new URI string (to be freed by the caller) or NULL in case
+ * of error.
+ */
+char *
+uri_resolve(const char *uri, const char *base) {
+ char *val = NULL;
+ int ret, len, indx, cur, out;
+ URI *ref = NULL;
+ URI *bas = NULL;
+ URI *res = NULL;
+
+ /*
+ * 1) The URI reference is parsed into the potential four components and
+ * fragment identifier, as described in Section 4.3.
+ *
+ * NOTE that a completely empty URI is treated by modern browsers
+ * as a reference to "." rather than as a synonym for the current
+ * URI. Should we do that here?
+ */
+ if (uri == NULL)
+ ret = -1;
+ else {
+ if (*uri) {
+ ref = uri_new();
+ if (ref == NULL)
+ goto done;
+ ret = uri_parse_into(ref, uri);
+ }
+ else
+ ret = 0;
+ }
+ if (ret != 0)
+ goto done;
+ if ((ref != NULL) && (ref->scheme != NULL)) {
+ /*
+ * The URI is absolute don't modify.
+ */
+ val = g_strdup(uri);
+ goto done;
+ }
+ if (base == NULL)
+ ret = -1;
+ else {
+ bas = uri_new();
+ if (bas == NULL)
+ goto done;
+ ret = uri_parse_into(bas, base);
+ }
+ if (ret != 0) {
+ if (ref)
+ val = uri_to_string(ref);
+ goto done;
+ }
+ if (ref == NULL) {
+ /*
+ * the base fragment must be ignored
+ */
+ if (bas->fragment != NULL) {
+ g_free(bas->fragment);
+ bas->fragment = NULL;
+ }
+ val = uri_to_string(bas);
+ goto done;
+ }
+
+ /*
+ * 2) If the path component is empty and the scheme, authority, and
+ * query components are undefined, then it is a reference to the
+ * current document and we are done. Otherwise, the reference URI's
+ * query and fragment components are defined as found (or not found)
+ * within the URI reference and not inherited from the base URI.
+ *
+ * NOTE that in modern browsers, the parsing differs from the above
+ * in the following aspect: the query component is allowed to be
+ * defined while still treating this as a reference to the current
+ * document.
+ */
+ res = uri_new();
+ if (res == NULL)
+ goto done;
+ if ((ref->scheme == NULL) && (ref->path == NULL) &&
+ ((ref->authority == NULL) && (ref->server == NULL))) {
+ if (bas->scheme != NULL)
+ res->scheme = g_strdup(bas->scheme);
+ if (bas->authority != NULL)
+ res->authority = g_strdup(bas->authority);
+ else if (bas->server != NULL) {
+ res->server = g_strdup(bas->server);
+ if (bas->user != NULL)
+ res->user = g_strdup(bas->user);
+ res->port = bas->port;
+ }
+ if (bas->path != NULL)
+ res->path = g_strdup(bas->path);
+ if (ref->query != NULL)
+ res->query = g_strdup (ref->query);
+ else if (bas->query != NULL)
+ res->query = g_strdup(bas->query);
+ if (ref->fragment != NULL)
+ res->fragment = g_strdup(ref->fragment);
+ goto step_7;
+ }
+
+ /*
+ * 3) If the scheme component is defined, indicating that the reference
+ * starts with a scheme name, then the reference is interpreted as an
+ * absolute URI and we are done. Otherwise, the reference URI's
+ * scheme is inherited from the base URI's scheme component.
+ */
+ if (ref->scheme != NULL) {
+ val = uri_to_string(ref);
+ goto done;
+ }
+ if (bas->scheme != NULL)
+ res->scheme = g_strdup(bas->scheme);
+
+ if (ref->query != NULL)
+ res->query = g_strdup(ref->query);
+ if (ref->fragment != NULL)
+ res->fragment = g_strdup(ref->fragment);
+
+ /*
+ * 4) If the authority component is defined, then the reference is a
+ * network-path and we skip to step 7. Otherwise, the reference
+ * URI's authority is inherited from the base URI's authority
+ * component, which will also be undefined if the URI scheme does not
+ * use an authority component.
+ */
+ if ((ref->authority != NULL) || (ref->server != NULL)) {
+ if (ref->authority != NULL)
+ res->authority = g_strdup(ref->authority);
+ else {
+ res->server = g_strdup(ref->server);
+ if (ref->user != NULL)
+ res->user = g_strdup(ref->user);
+ res->port = ref->port;
+ }
+ if (ref->path != NULL)
+ res->path = g_strdup(ref->path);
+ goto step_7;
+ }
+ if (bas->authority != NULL)
+ res->authority = g_strdup(bas->authority);
+ else if (bas->server != NULL) {
+ res->server = g_strdup(bas->server);
+ if (bas->user != NULL)
+ res->user = g_strdup(bas->user);
+ res->port = bas->port;
+ }
+
+ /*
+ * 5) If the path component begins with a slash character ("/"), then
+ * the reference is an absolute-path and we skip to step 7.
+ */
+ if ((ref->path != NULL) && (ref->path[0] == '/')) {
+ res->path = g_strdup(ref->path);
+ goto step_7;
+ }
+
+
+ /*
+ * 6) If this step is reached, then we are resolving a relative-path
+ * reference. The relative path needs to be merged with the base
+ * URI's path. Although there are many ways to do this, we will
+ * describe a simple method using a separate string buffer.
+ *
+ * Allocate a buffer large enough for the result string.
+ */
+ len = 2; /* extra / and 0 */
+ if (ref->path != NULL)
+ len += strlen(ref->path);
+ if (bas->path != NULL)
+ len += strlen(bas->path);
+ res->path = g_malloc(len);
+ res->path[0] = 0;
+
+ /*
+ * a) All but the last segment of the base URI's path component is
+ * copied to the buffer. In other words, any characters after the
+ * last (right-most) slash character, if any, are excluded.
+ */
+ cur = 0;
+ out = 0;
+ if (bas->path != NULL) {
+ while (bas->path[cur] != 0) {
+ while ((bas->path[cur] != 0) && (bas->path[cur] != '/'))
+ cur++;
+ if (bas->path[cur] == 0)
+ break;
+
+ cur++;
+ while (out < cur) {
+ res->path[out] = bas->path[out];
+ out++;
+ }
+ }
+ }
+ res->path[out] = 0;
+
+ /*
+ * b) The reference's path component is appended to the buffer
+ * string.
+ */
+ if (ref->path != NULL && ref->path[0] != 0) {
+ indx = 0;
+ /*
+ * Ensure the path includes a '/'
+ */
+ if ((out == 0) && (bas->server != NULL))
+ res->path[out++] = '/';
+ while (ref->path[indx] != 0) {
+ res->path[out++] = ref->path[indx++];
+ }
+ }
+ res->path[out] = 0;
+
+ /*
+ * Steps c) to h) are really path normalization steps
+ */
+ normalize_uri_path(res->path);
+
+step_7:
+
+ /*
+ * 7) The resulting URI components, including any inherited from the
+ * base URI, are recombined to give the absolute form of the URI
+ * reference.
+ */
+ val = uri_to_string(res);
+
+done:
+ if (ref != NULL)
+ uri_free(ref);
+ if (bas != NULL)
+ uri_free(bas);
+ if (res != NULL)
+ uri_free(res);
+ return(val);
+}
+
+/**
+ * uri_resolve_relative:
+ * @URI: the URI reference under consideration
+ * @base: the base value
+ *
+ * Expresses the URI of the reference in terms relative to the
+ * base. Some examples of this operation include:
+ * base = "http://site1.com/docs/book1.html"
+ * URI input URI returned
+ * docs/pic1.gif pic1.gif
+ * docs/img/pic1.gif img/pic1.gif
+ * img/pic1.gif ../img/pic1.gif
+ * http://site1.com/docs/pic1.gif pic1.gif
+ * http://site2.com/docs/pic1.gif http://site2.com/docs/pic1.gif
+ *
+ * base = "docs/book1.html"
+ * URI input URI returned
+ * docs/pic1.gif pic1.gif
+ * docs/img/pic1.gif img/pic1.gif
+ * img/pic1.gif ../img/pic1.gif
+ * http://site1.com/docs/pic1.gif http://site1.com/docs/pic1.gif
+ *
+ *
+ * Note: if the URI reference is really wierd or complicated, it may be
+ * worthwhile to first convert it into a "nice" one by calling
+ * uri_resolve (using 'base') before calling this routine,
+ * since this routine (for reasonable efficiency) assumes URI has
+ * already been through some validation.
+ *
+ * Returns a new URI string (to be freed by the caller) or NULL in case
+ * error.
+ */
+char *
+uri_resolve_relative (const char *uri, const char * base)
+{
+ char *val = NULL;
+ int ret;
+ int ix;
+ int pos = 0;
+ int nbslash = 0;
+ int len;
+ URI *ref = NULL;
+ URI *bas = NULL;
+ char *bptr, *uptr, *vptr;
+ int remove_path = 0;
+
+ if ((uri == NULL) || (*uri == 0))
+ return NULL;
+
+ /*
+ * First parse URI into a standard form
+ */
+ ref = uri_new ();
+ if (ref == NULL)
+ return NULL;
+ /* If URI not already in "relative" form */
+ if (uri[0] != '.') {
+ ret = uri_parse_into (ref, uri);
+ if (ret != 0)
+ goto done; /* Error in URI, return NULL */
+ } else
+ ref->path = g_strdup(uri);
+
+ /*
+ * Next parse base into the same standard form
+ */
+ if ((base == NULL) || (*base == 0)) {
+ val = g_strdup (uri);
+ goto done;
+ }
+ bas = uri_new ();
+ if (bas == NULL)
+ goto done;
+ if (base[0] != '.') {
+ ret = uri_parse_into (bas, base);
+ if (ret != 0)
+ goto done; /* Error in base, return NULL */
+ } else
+ bas->path = g_strdup(base);
+
+ /*
+ * If the scheme / server on the URI differs from the base,
+ * just return the URI
+ */
+ if ((ref->scheme != NULL) &&
+ ((bas->scheme == NULL) ||
+ (strcmp (bas->scheme, ref->scheme)) ||
+ (strcmp (bas->server, ref->server)))) {
+ val = g_strdup (uri);
+ goto done;
+ }
+ if (!strcmp(bas->path, ref->path)) {
+ val = g_strdup("");
+ goto done;
+ }
+ if (bas->path == NULL) {
+ val = g_strdup(ref->path);
+ goto done;
+ }
+ if (ref->path == NULL) {
+ ref->path = (char *) "/";
+ remove_path = 1;
+ }
+
+ /*
+ * At this point (at last!) we can compare the two paths
+ *
+ * First we take care of the special case where either of the
+ * two path components may be missing (bug 316224)
+ */
+ if (bas->path == NULL) {
+ if (ref->path != NULL) {
+ uptr = ref->path;
+ if (*uptr == '/')
+ uptr++;
+ /* exception characters from uri_to_string */
+ val = uri_string_escape(uptr, "/;&=+$,");
+ }
+ goto done;
+ }
+ bptr = bas->path;
+ if (ref->path == NULL) {
+ for (ix = 0; bptr[ix] != 0; ix++) {
+ if (bptr[ix] == '/')
+ nbslash++;
+ }
+ uptr = NULL;
+ len = 1; /* this is for a string terminator only */
+ } else {
+ /*
+ * Next we compare the two strings and find where they first differ
+ */
+ if ((ref->path[pos] == '.') && (ref->path[pos+1] == '/'))
+ pos += 2;
+ if ((*bptr == '.') && (bptr[1] == '/'))
+ bptr += 2;
+ else if ((*bptr == '/') && (ref->path[pos] != '/'))
+ bptr++;
+ while ((bptr[pos] == ref->path[pos]) && (bptr[pos] != 0))
+ pos++;
+
+ if (bptr[pos] == ref->path[pos]) {
+ val = g_strdup("");
+ goto done; /* (I can't imagine why anyone would do this) */
+ }
+
+ /*
+ * In URI, "back up" to the last '/' encountered. This will be the
+ * beginning of the "unique" suffix of URI
+ */
+ ix = pos;
+ if ((ref->path[ix] == '/') && (ix > 0))
+ ix--;
+ else if ((ref->path[ix] == 0) && (ix > 1) && (ref->path[ix - 1] == '/'))
+ ix -= 2;
+ for (; ix > 0; ix--) {
+ if (ref->path[ix] == '/')
+ break;
+ }
+ if (ix == 0) {
+ uptr = ref->path;
+ } else {
+ ix++;
+ uptr = &ref->path[ix];
+ }
+
+ /*
+ * In base, count the number of '/' from the differing point
+ */
+ if (bptr[pos] != ref->path[pos]) {/* check for trivial URI == base */
+ for (; bptr[ix] != 0; ix++) {
+ if (bptr[ix] == '/')
+ nbslash++;
+ }
+ }
+ len = strlen (uptr) + 1;
+ }
+
+ if (nbslash == 0) {
+ if (uptr != NULL)
+ /* exception characters from uri_to_string */
+ val = uri_string_escape(uptr, "/;&=+$,");
+ goto done;
+ }
+
+ /*
+ * Allocate just enough space for the returned string -
+ * length of the remainder of the URI, plus enough space
+ * for the "../" groups, plus one for the terminator
+ */
+ val = g_malloc (len + 3 * nbslash);
+ vptr = val;
+ /*
+ * Put in as many "../" as needed
+ */
+ for (; nbslash>0; nbslash--) {
+ *vptr++ = '.';
+ *vptr++ = '.';
+ *vptr++ = '/';
+ }
+ /*
+ * Finish up with the end of the URI
+ */
+ if (uptr != NULL) {
+ if ((vptr > val) && (len > 0) &&
+ (uptr[0] == '/') && (vptr[-1] == '/')) {
+ memcpy (vptr, uptr + 1, len - 1);
+ vptr[len - 2] = 0;
+ } else {
+ memcpy (vptr, uptr, len);
+ vptr[len - 1] = 0;
+ }
+ } else {
+ vptr[len - 1] = 0;
+ }
+
+ /* escape the freshly-built path */
+ vptr = val;
+ /* exception characters from uri_to_string */
+ val = uri_string_escape(vptr, "/;&=+$,");
+ g_free(vptr);
+
+done:
+ /*
+ * Free the working variables
+ */
+ if (remove_path != 0)
+ ref->path = NULL;
+ if (ref != NULL)
+ uri_free (ref);
+ if (bas != NULL)
+ uri_free (bas);
+
+ return val;
+}
+
+/*
+ * Utility functions to help parse and assemble query strings.
+ */
+
+struct QueryParams *
+query_params_new (int init_alloc)
+{
+ struct QueryParams *ps;
+
+ if (init_alloc <= 0) init_alloc = 1;
+
+ ps = g_new(QueryParams, 1);
+ ps->n = 0;
+ ps->alloc = init_alloc;
+ ps->p = g_new(QueryParam, ps->alloc);
+
+ return ps;
+}
+
+/* Ensure there is space to store at least one more parameter
+ * at the end of the set.
+ */
+static int
+query_params_append (struct QueryParams *ps,
+ const char *name, const char *value)
+{
+ if (ps->n >= ps->alloc) {
+ ps->p = g_renew(QueryParam, ps->p, ps->alloc * 2);
+ ps->alloc *= 2;
+ }
+
+ ps->p[ps->n].name = g_strdup(name);
+ ps->p[ps->n].value = value ? g_strdup(value) : NULL;
+ ps->p[ps->n].ignore = 0;
+ ps->n++;
+
+ return 0;
+}
+
+void
+query_params_free (struct QueryParams *ps)
+{
+ int i;
+
+ for (i = 0; i < ps->n; ++i) {
+ g_free (ps->p[i].name);
+ g_free (ps->p[i].value);
+ }
+ g_free (ps->p);
+ g_free (ps);
+}
+
+struct QueryParams *
+query_params_parse (const char *query)
+{
+ struct QueryParams *ps;
+ const char *end, *eq;
+
+ ps = query_params_new (0);
+ if (!query || query[0] == '\0') return ps;
+
+ while (*query) {
+ char *name = NULL, *value = NULL;
+
+ /* Find the next separator, or end of the string. */
+ end = strchr (query, '&');
+ if (!end)
+ end = strchr (query, ';');
+ if (!end)
+ end = query + strlen (query);
+
+ /* Find the first '=' character between here and end. */
+ eq = strchr (query, '=');
+ if (eq && eq >= end) eq = NULL;
+
+ /* Empty section (eg. "&&"). */
+ if (end == query)
+ goto next;
+
+ /* If there is no '=' character, then we have just "name"
+ * and consistent with CGI.pm we assume value is "".
+ */
+ else if (!eq) {
+ name = uri_string_unescape (query, end - query, NULL);
+ value = NULL;
+ }
+ /* Or if we have "name=" here (works around annoying
+ * problem when calling uri_string_unescape with len = 0).
+ */
+ else if (eq+1 == end) {
+ name = uri_string_unescape (query, eq - query, NULL);
+ value = g_new0(char, 1);
+ }
+ /* If the '=' character is at the beginning then we have
+ * "=value" and consistent with CGI.pm we _ignore_ this.
+ */
+ else if (query == eq)
+ goto next;
+
+ /* Otherwise it's "name=value". */
+ else {
+ name = uri_string_unescape (query, eq - query, NULL);
+ value = uri_string_unescape (eq+1, end - (eq+1), NULL);
+ }
+
+ /* Append to the parameter set. */
+ query_params_append (ps, name, value);
+ g_free(name);
+ g_free(value);
+
+ next:
+ query = end;
+ if (*query) query ++; /* skip '&' separator */
+ }
+
+ return ps;
+}
diff --git a/uri.h b/uri.h
new file mode 100644
index 0000000000..de99b3bd4b
--- /dev/null
+++ b/uri.h
@@ -0,0 +1,113 @@
+/**
+ * Summary: library of generic URI related routines
+ * Description: library of generic URI related routines
+ * Implements RFC 2396
+ *
+ * Copyright (C) 1998-2003 Daniel Veillard. All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * DANIEL VEILLARD BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER
+ * IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ * Except as contained in this notice, the name of Daniel Veillard shall not
+ * be used in advertising or otherwise to promote the sale, use or other
+ * dealings in this Software without prior written authorization from him.
+ *
+ * Author: Daniel Veillard
+ **
+ * Copyright (C) 2007 Red Hat, Inc.
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+ *
+ * Authors:
+ * Richard W.M. Jones <rjones@redhat.com>
+ *
+ * Utility functions to help parse and assemble query strings.
+ */
+
+#ifndef QEMU_URI_H
+#define QEMU_URI_H
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+/**
+ * URI:
+ *
+ * A parsed URI reference. This is a struct containing the various fields
+ * as described in RFC 2396 but separated for further processing.
+ */
+typedef struct URI {
+ char *scheme; /* the URI scheme */
+ char *opaque; /* opaque part */
+ char *authority; /* the authority part */
+ char *server; /* the server part */
+ char *user; /* the user part */
+ int port; /* the port number */
+ char *path; /* the path string */
+ char *fragment; /* the fragment identifier */
+ int cleanup; /* parsing potentially unclean URI */
+ char *query; /* the query string (as it appears in the URI) */
+} URI;
+
+URI *uri_new(void);
+char *uri_resolve(const char *URI, const char *base);
+char *uri_resolve_relative(const char *URI, const char *base);
+URI *uri_parse(const char *str);
+URI *uri_parse_raw(const char *str, int raw);
+int uri_parse_into(URI *uri, const char *str);
+char *uri_to_string(URI *uri);
+char *uri_string_escape(const char *str, const char *list);
+char *uri_string_unescape(const char *str, int len, char *target);
+void uri_free(URI *uri);
+
+/* Single web service query parameter 'name=value'. */
+typedef struct QueryParam {
+ char *name; /* Name (unescaped). */
+ char *value; /* Value (unescaped). */
+ int ignore; /* Ignore this field in qparam_get_query */
+} QueryParam;
+
+/* Set of parameters. */
+typedef struct QueryParams {
+ int n; /* number of parameters used */
+ int alloc; /* allocated space */
+ QueryParam *p; /* array of parameters */
+} QueryParams;
+
+struct QueryParams *query_params_new (int init_alloc);
+int query_param_append (QueryParams *ps, const char *name, const char *value);
+extern char *query_param_to_string (const QueryParams *ps);
+extern QueryParams *query_params_parse (const char *query);
+extern void query_params_free (QueryParams *ps);
+
+#ifdef __cplusplus
+}
+#endif
+#endif /* QEMU_URI_H */