aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--MAINTAINERS8
-rw-r--r--Makefile1
-rw-r--r--Makefile.objs2
-rw-r--r--backends/cryptodev-builtin.c69
-rw-r--r--backends/cryptodev.c34
-rw-r--r--block/Makefile.objs6
-rw-r--r--block/blkdebug.c86
-rw-r--r--block/blkverify.c201
-rw-r--r--block/file-posix.c (renamed from block/raw-posix.c)0
-rw-r--r--block/file-win32.c (renamed from block/raw-win32.c)0
-rw-r--r--block/gluster.c4
-rw-r--r--block/quorum.c410
-rw-r--r--block/raw-format.c (renamed from block/raw_bsd.c)2
-rw-r--r--block/trace-events4
-rwxr-xr-xconfigure2
-rw-r--r--contrib/libvhost-user/Makefile.objs1
-rw-r--r--contrib/libvhost-user/libvhost-user.c1499
-rw-r--r--contrib/libvhost-user/libvhost-user.h435
-rw-r--r--docs/pcie.txt12
-rw-r--r--docs/replay.txt14
-rw-r--r--docs/specs/vhost-user.txt16
-rw-r--r--exec.c33
-rw-r--r--hw/acpi/Makefile.objs2
-rw-r--r--hw/acpi/ich9.c3
-rw-r--r--hw/acpi/memory_hotplug.c420
-rw-r--r--hw/acpi/memory_hotplug_acpi_table.c262
-rw-r--r--hw/acpi/piix4.c3
-rw-r--r--hw/arm/pxa2xx.c4
-rw-r--r--hw/arm/tosa.c4
-rw-r--r--hw/arm/virt-acpi-build.c134
-rw-r--r--hw/arm/virt.c691
-rw-r--r--hw/arm/z2.c4
-rw-r--r--hw/audio/wm8750.c4
-rw-r--r--hw/block/m25p80.c29
-rw-r--r--hw/block/virtio-blk.c2
-rw-r--r--hw/char/exynos4210_uart.c16
-rw-r--r--hw/char/virtio-serial-bus.c3
-rw-r--r--hw/display/ssd0303.c4
-rw-r--r--hw/gpio/max7310.c4
-rw-r--r--hw/i2c/core.c31
-rw-r--r--hw/i2c/i2c-ddc.c4
-rw-r--r--hw/i2c/smbus.c13
-rw-r--r--hw/i386/acpi-build.c206
-rw-r--r--hw/i386/amd_iommu.c2
-rw-r--r--hw/i386/amd_iommu.h4
-rw-r--r--hw/i386/intel_iommu.c114
-rw-r--r--hw/i386/intel_iommu_internal.h13
-rw-r--r--hw/i386/x86-iommu.c17
-rw-r--r--hw/input/lm832x.c4
-rw-r--r--hw/misc/tmp105.c3
-rw-r--r--hw/net/e1000e.c3
-rw-r--r--hw/net/fsl_etsec/rings.c19
-rw-r--r--hw/net/rtl8139.c34
-rw-r--r--hw/net/vhost_net.c18
-rw-r--r--hw/net/virtio-net.c45
-rw-r--r--hw/pci-bridge/ioh3420.c4
-rw-r--r--hw/pci-bridge/xio3130_downstream.c4
-rw-r--r--hw/pci-bridge/xio3130_upstream.c4
-rw-r--r--hw/pci/pci.c4
-rw-r--r--hw/pci/pcie.c15
-rw-r--r--hw/pci/pcie_aer.c19
-rw-r--r--hw/s390x/virtio-ccw.c4
-rw-r--r--hw/scsi/virtio-scsi.c4
-rw-r--r--hw/ssi/imx_spi.c11
-rw-r--r--hw/timer/ds1338.c4
-rw-r--r--hw/timer/twl92230.c4
-rw-r--r--hw/virtio/trace-events2
-rw-r--r--hw/virtio/vhost-user.c34
-rw-r--r--hw/virtio/vhost.c1
-rw-r--r--hw/virtio/virtio-balloon.c7
-rw-r--r--hw/virtio/virtio-bus.c8
-rw-r--r--hw/virtio/virtio-crypto-pci.c6
-rw-r--r--hw/virtio/virtio-crypto.c41
-rw-r--r--hw/virtio/virtio-mmio.c2
-rw-r--r--hw/virtio/virtio-pci.c21
-rw-r--r--hw/virtio/virtio-pci.h4
-rw-r--r--hw/virtio/virtio.c108
-rw-r--r--include/block/block_int.h2
-rw-r--r--include/exec/memory.h8
-rw-r--r--include/hw/acpi/acpi-defs.h45
-rw-r--r--include/hw/acpi/memory_hotplug.h12
-rw-r--r--include/hw/acpi/pc-hotplug.h23
-rw-r--r--include/hw/arm/virt-acpi-build.h47
-rw-r--r--include/hw/arm/virt.h41
-rw-r--r--include/hw/i2c/i2c.h16
-rw-r--r--include/hw/i386/x86-iommu.h1
-rw-r--r--include/hw/pci/pcie.h4
-rw-r--r--include/hw/pci/pcie_aer.h4
-rw-r--r--include/hw/virtio/vhost-backend.h2
-rw-r--r--include/hw/virtio/virtio-access.h31
-rw-r--r--include/hw/virtio/virtio-bus.h1
-rw-r--r--include/hw/virtio/virtio-net.h1
-rw-r--r--include/hw/virtio/virtio.h11
-rw-r--r--include/migration/vmstate.h7
-rw-r--r--include/net/vhost_net.h2
-rw-r--r--include/qemu/coroutine.h6
-rw-r--r--include/standard-headers/linux/pci_regs.h1
-rw-r--r--include/sysemu/cryptodev.h42
-rw-r--r--include/sysemu/replay.h12
-rw-r--r--memory.c9
-rw-r--r--migration/savevm.c34
-rw-r--r--net/Makefile.objs1
-rw-r--r--net/filter-replay.c92
-rw-r--r--qemu-img.c17
-rw-r--r--replay/Makefile.objs1
-rw-r--r--replay/replay-events.c11
-rw-r--r--replay/replay-internal.h10
-rw-r--r--replay/replay-net.c102
-rw-r--r--replay/replay.c2
-rw-r--r--tcg/mips/tcg-target.h60
-rw-r--r--tcg/mips/tcg-target.inc.c1170
-rw-r--r--tests/Makefile.include2
-rw-r--r--tests/acpi-test-data/pc/DSDTbin6008 -> 5098 bytes
-rw-r--r--tests/acpi-test-data/pc/DSDT.bridgebin7867 -> 6957 bytes
-rw-r--r--tests/acpi-test-data/pc/DSDT.cphpbin6471 -> 5561 bytes
-rw-r--r--tests/acpi-test-data/pc/DSDT.ipmikcsbin6080 -> 5170 bytes
-rw-r--r--tests/acpi-test-data/pc/DSDT.memhpbin0 -> 6463 bytes
-rw-r--r--tests/acpi-test-data/pc/SRAT.memhpbin0 -> 224 bytes
-rw-r--r--tests/acpi-test-data/q35/DSDTbin8770 -> 7860 bytes
-rw-r--r--tests/acpi-test-data/q35/DSDT.bridgebin8787 -> 7877 bytes
-rw-r--r--tests/acpi-test-data/q35/DSDT.cphpbin9233 -> 8323 bytes
-rw-r--r--tests/acpi-test-data/q35/DSDT.ipmibtbin8845 -> 7935 bytes
-rw-r--r--tests/acpi-test-data/q35/DSDT.memhpbin0 -> 9225 bytes
-rw-r--r--tests/acpi-test-data/q35/SRAT.memhpbin0 -> 224 bytes
-rw-r--r--tests/bios-tables-test.c24
-rw-r--r--tests/qemu-iotests/071.out8
-rw-r--r--tests/vhost-user-bridge.c1183
-rw-r--r--translate-all.c2
-rw-r--r--util/qemu-coroutine.c7
-rw-r--r--vl.c3
130 files changed, 5607 insertions, 2719 deletions
diff --git a/MAINTAINERS b/MAINTAINERS
index 585cd5abd7..1444b26dc0 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -508,7 +508,6 @@ M: Shannon Zhao <shannon.zhao@linaro.org>
L: qemu-arm@nongnu.org
S: Maintained
F: hw/arm/virt-acpi-build.c
-F: include/hw/arm/virt-acpi-build.h
STM32F205
M: Alistair Francis <alistair@alistair23.me>
@@ -885,7 +884,6 @@ F: hw/acpi/*
F: hw/smbios/*
F: hw/i386/acpi-build.[hc]
F: hw/arm/virt-acpi-build.c
-F: include/hw/arm/virt-acpi-build.h
ppc4xx
M: Alexander Graf <agraf@suse.de>
@@ -1720,9 +1718,9 @@ L: qemu-block@nongnu.org
S: Supported
F: block/linux-aio.c
F: include/block/raw-aio.h
-F: block/raw-posix.c
-F: block/raw-win32.c
-F: block/raw_bsd.c
+F: block/raw-format.c
+F: block/file-posix.c
+F: block/file-win32.c
F: block/win32-aio.c
qcow2
diff --git a/Makefile b/Makefile
index 214cbad35d..1a8bfb225c 100644
--- a/Makefile
+++ b/Makefile
@@ -149,6 +149,7 @@ dummy := $(call unnest-vars,, \
qga-obj-y \
ivshmem-client-obj-y \
ivshmem-server-obj-y \
+ libvhost-user-obj-y \
qga-vss-dll-obj-y \
block-obj-y \
block-obj-m \
diff --git a/Makefile.objs b/Makefile.objs
index 51c36a4d54..01cef866e4 100644
--- a/Makefile.objs
+++ b/Makefile.objs
@@ -115,7 +115,7 @@ qga-vss-dll-obj-y = qga/
# contrib
ivshmem-client-obj-y = contrib/ivshmem-client/
ivshmem-server-obj-y = contrib/ivshmem-server/
-
+libvhost-user-obj-y = contrib/libvhost-user/
######################################################################
trace-events-y = trace-events
diff --git a/backends/cryptodev-builtin.c b/backends/cryptodev-builtin.c
index eda954b2a2..82a068e792 100644
--- a/backends/cryptodev-builtin.c
+++ b/backends/cryptodev-builtin.c
@@ -94,6 +94,8 @@ static void cryptodev_builtin_init(
backend->conf.max_size = LONG_MAX - sizeof(CryptoDevBackendSymOpInfo);
backend->conf.max_cipher_key_len = CRYPTODEV_BUITLIN_MAX_CIPHER_KEY_LEN;
backend->conf.max_auth_key_len = CRYPTODEV_BUITLIN_MAX_AUTH_KEY_LEN;
+
+ cryptodev_backend_set_ready(backend, true);
}
static int
@@ -111,23 +113,42 @@ cryptodev_builtin_get_unused_session_index(
return -1;
}
+#define AES_KEYSIZE_128 16
+#define AES_KEYSIZE_192 24
+#define AES_KEYSIZE_256 32
+#define AES_KEYSIZE_128_XTS AES_KEYSIZE_256
+#define AES_KEYSIZE_256_XTS 64
+
static int
-cryptodev_builtin_get_aes_algo(uint32_t key_len, Error **errp)
+cryptodev_builtin_get_aes_algo(uint32_t key_len, int mode, Error **errp)
{
int algo;
- if (key_len == 128 / 8) {
+ if (key_len == AES_KEYSIZE_128) {
algo = QCRYPTO_CIPHER_ALG_AES_128;
- } else if (key_len == 192 / 8) {
+ } else if (key_len == AES_KEYSIZE_192) {
algo = QCRYPTO_CIPHER_ALG_AES_192;
- } else if (key_len == 256 / 8) {
- algo = QCRYPTO_CIPHER_ALG_AES_256;
+ } else if (key_len == AES_KEYSIZE_256) { /* equals AES_KEYSIZE_128_XTS */
+ if (mode == QCRYPTO_CIPHER_MODE_XTS) {
+ algo = QCRYPTO_CIPHER_ALG_AES_128;
+ } else {
+ algo = QCRYPTO_CIPHER_ALG_AES_256;
+ }
+ } else if (key_len == AES_KEYSIZE_256_XTS) {
+ if (mode == QCRYPTO_CIPHER_MODE_XTS) {
+ algo = QCRYPTO_CIPHER_ALG_AES_256;
+ } else {
+ goto err;
+ }
} else {
- error_setg(errp, "Unsupported key length :%u", key_len);
- return -1;
+ goto err;
}
return algo;
+
+err:
+ error_setg(errp, "Unsupported key length :%u", key_len);
+ return -1;
}
static int cryptodev_builtin_create_cipher_session(
@@ -155,32 +176,48 @@ static int cryptodev_builtin_create_cipher_session(
switch (sess_info->cipher_alg) {
case VIRTIO_CRYPTO_CIPHER_AES_ECB:
+ mode = QCRYPTO_CIPHER_MODE_ECB;
algo = cryptodev_builtin_get_aes_algo(sess_info->key_len,
- errp);
+ mode, errp);
if (algo < 0) {
return -1;
}
- mode = QCRYPTO_CIPHER_MODE_ECB;
break;
case VIRTIO_CRYPTO_CIPHER_AES_CBC:
+ mode = QCRYPTO_CIPHER_MODE_CBC;
algo = cryptodev_builtin_get_aes_algo(sess_info->key_len,
- errp);
+ mode, errp);
if (algo < 0) {
return -1;
}
- mode = QCRYPTO_CIPHER_MODE_CBC;
break;
case VIRTIO_CRYPTO_CIPHER_AES_CTR:
+ mode = QCRYPTO_CIPHER_MODE_CTR;
algo = cryptodev_builtin_get_aes_algo(sess_info->key_len,
- errp);
+ mode, errp);
+ if (algo < 0) {
+ return -1;
+ }
+ break;
+ case VIRTIO_CRYPTO_CIPHER_AES_XTS:
+ mode = QCRYPTO_CIPHER_MODE_XTS;
+ algo = cryptodev_builtin_get_aes_algo(sess_info->key_len,
+ mode, errp);
if (algo < 0) {
return -1;
}
- mode = QCRYPTO_CIPHER_MODE_CTR;
break;
- case VIRTIO_CRYPTO_CIPHER_DES_ECB:
- algo = QCRYPTO_CIPHER_ALG_DES_RFB;
+ case VIRTIO_CRYPTO_CIPHER_3DES_ECB:
mode = QCRYPTO_CIPHER_MODE_ECB;
+ algo = QCRYPTO_CIPHER_ALG_3DES;
+ break;
+ case VIRTIO_CRYPTO_CIPHER_3DES_CBC:
+ mode = QCRYPTO_CIPHER_MODE_CBC;
+ algo = QCRYPTO_CIPHER_ALG_3DES;
+ break;
+ case VIRTIO_CRYPTO_CIPHER_3DES_CTR:
+ mode = QCRYPTO_CIPHER_MODE_CTR;
+ algo = QCRYPTO_CIPHER_ALG_3DES;
break;
default:
error_setg(errp, "Unsupported cipher alg :%u",
@@ -331,6 +368,8 @@ static void cryptodev_builtin_cleanup(
backend->conf.peers.ccs[i] = NULL;
}
}
+
+ cryptodev_backend_set_ready(backend, false);
}
static void
diff --git a/backends/cryptodev.c b/backends/cryptodev.c
index 4a49f9762f..832f056266 100644
--- a/backends/cryptodev.c
+++ b/backends/cryptodev.c
@@ -73,8 +73,6 @@ void cryptodev_backend_cleanup(
if (bc->cleanup) {
bc->cleanup(backend, errp);
}
-
- backend->ready = false;
}
int64_t cryptodev_backend_sym_create_session(
@@ -189,14 +187,39 @@ cryptodev_backend_complete(UserCreatable *uc, Error **errp)
goto out;
}
}
- backend->ready = true;
+
return;
out:
- backend->ready = false;
error_propagate(errp, local_err);
}
+void cryptodev_backend_set_used(CryptoDevBackend *backend, bool used)
+{
+ backend->is_used = used;
+}
+
+bool cryptodev_backend_is_used(CryptoDevBackend *backend)
+{
+ return backend->is_used;
+}
+
+void cryptodev_backend_set_ready(CryptoDevBackend *backend, bool ready)
+{
+ backend->ready = ready;
+}
+
+bool cryptodev_backend_is_ready(CryptoDevBackend *backend)
+{
+ return backend->ready;
+}
+
+static bool
+cryptodev_backend_can_be_deleted(UserCreatable *uc, Error **errp)
+{
+ return !cryptodev_backend_is_used(CRYPTODEV_BACKEND(uc));
+}
+
static void cryptodev_backend_instance_init(Object *obj)
{
object_property_add(obj, "queues", "int",
@@ -209,7 +232,9 @@ static void cryptodev_backend_instance_init(Object *obj)
static void cryptodev_backend_finalize(Object *obj)
{
+ CryptoDevBackend *backend = CRYPTODEV_BACKEND(obj);
+ cryptodev_backend_cleanup(backend, NULL);
}
static void
@@ -218,6 +243,7 @@ cryptodev_backend_class_init(ObjectClass *oc, void *data)
UserCreatableClass *ucc = USER_CREATABLE_CLASS(oc);
ucc->complete = cryptodev_backend_complete;
+ ucc->can_be_deleted = cryptodev_backend_can_be_deleted;
QTAILQ_INIT(&crypto_clients);
}
diff --git a/block/Makefile.objs b/block/Makefile.objs
index 67a036a1df..0b8fd06f27 100644
--- a/block/Makefile.objs
+++ b/block/Makefile.objs
@@ -1,4 +1,4 @@
-block-obj-y += raw_bsd.o qcow.o vdi.o vmdk.o cloop.o bochs.o vpc.o vvfat.o dmg.o
+block-obj-y += raw-format.o qcow.o vdi.o vmdk.o cloop.o bochs.o vpc.o vvfat.o dmg.o
block-obj-y += qcow2.o qcow2-refcount.o qcow2-cluster.o qcow2-snapshot.o qcow2-cache.o
block-obj-y += qed.o qed-gencb.o qed-l2-cache.o qed-table.o qed-cluster.o
block-obj-y += qed-check.o
@@ -6,8 +6,8 @@ block-obj-y += vhdx.o vhdx-endian.o vhdx-log.o
block-obj-y += quorum.o
block-obj-y += parallels.o blkdebug.o blkverify.o blkreplay.o
block-obj-y += block-backend.o snapshot.o qapi.o
-block-obj-$(CONFIG_WIN32) += raw-win32.o win32-aio.o
-block-obj-$(CONFIG_POSIX) += raw-posix.o
+block-obj-$(CONFIG_WIN32) += file-win32.o win32-aio.o
+block-obj-$(CONFIG_POSIX) += file-posix.o
block-obj-$(CONFIG_LINUX_AIO) += linux-aio.o
block-obj-y += null.o mirror.o commit.o io.o
block-obj-y += throttle-groups.o
diff --git a/block/blkdebug.c b/block/blkdebug.c
index 4127571454..acccf85666 100644
--- a/block/blkdebug.c
+++ b/block/blkdebug.c
@@ -58,10 +58,6 @@ typedef struct BlkdebugSuspendedReq {
QLIST_ENTRY(BlkdebugSuspendedReq) next;
} BlkdebugSuspendedReq;
-static const AIOCBInfo blkdebug_aiocb_info = {
- .aiocb_size = sizeof(BlkdebugAIOCB),
-};
-
enum {
ACTION_INJECT_ERROR,
ACTION_SET_STATE,
@@ -77,7 +73,7 @@ typedef struct BlkdebugRule {
int error;
int immediately;
int once;
- int64_t sector;
+ int64_t offset;
} inject;
struct {
int new_state;
@@ -174,6 +170,7 @@ static int add_rule(void *opaque, QemuOpts *opts, Error **errp)
const char* event_name;
BlkdebugEvent event;
struct BlkdebugRule *rule;
+ int64_t sector;
/* Find the right event for the rule */
event_name = qemu_opt_get(opts, "event");
@@ -200,7 +197,9 @@ static int add_rule(void *opaque, QemuOpts *opts, Error **errp)
rule->options.inject.once = qemu_opt_get_bool(opts, "once", 0);
rule->options.inject.immediately =
qemu_opt_get_bool(opts, "immediately", 0);
- rule->options.inject.sector = qemu_opt_get_number(opts, "sector", -1);
+ sector = qemu_opt_get_number(opts, "sector", -1);
+ rule->options.inject.offset =
+ sector == -1 ? -1 : sector * BDRV_SECTOR_SIZE;
break;
case ACTION_SET_STATE:
@@ -408,17 +407,14 @@ out:
static void error_callback_bh(void *opaque)
{
- struct BlkdebugAIOCB *acb = opaque;
- acb->common.cb(acb->common.opaque, acb->ret);
- qemu_aio_unref(acb);
+ Coroutine *co = opaque;
+ qemu_coroutine_enter(co);
}
-static BlockAIOCB *inject_error(BlockDriverState *bs,
- BlockCompletionFunc *cb, void *opaque, BlkdebugRule *rule)
+static int inject_error(BlockDriverState *bs, BlkdebugRule *rule)
{
BDRVBlkdebugState *s = bs->opaque;
int error = rule->options.inject.error;
- struct BlkdebugAIOCB *acb;
bool immediately = rule->options.inject.immediately;
if (rule->options.inject.once) {
@@ -426,81 +422,79 @@ static BlockAIOCB *inject_error(BlockDriverState *bs,
remove_rule(rule);
}
- if (immediately) {
- return NULL;
+ if (!immediately) {
+ aio_bh_schedule_oneshot(bdrv_get_aio_context(bs), error_callback_bh,
+ qemu_coroutine_self());
+ qemu_coroutine_yield();
}
- acb = qemu_aio_get(&blkdebug_aiocb_info, bs, cb, opaque);
- acb->ret = -error;
-
- aio_bh_schedule_oneshot(bdrv_get_aio_context(bs), error_callback_bh, acb);
-
- return &acb->common;
+ return -error;
}
-static BlockAIOCB *blkdebug_aio_readv(BlockDriverState *bs,
- int64_t sector_num, QEMUIOVector *qiov, int nb_sectors,
- BlockCompletionFunc *cb, void *opaque)
+static int coroutine_fn
+blkdebug_co_preadv(BlockDriverState *bs, uint64_t offset, uint64_t bytes,
+ QEMUIOVector *qiov, int flags)
{
BDRVBlkdebugState *s = bs->opaque;
BlkdebugRule *rule = NULL;
QSIMPLEQ_FOREACH(rule, &s->active_rules, active_next) {
- if (rule->options.inject.sector == -1 ||
- (rule->options.inject.sector >= sector_num &&
- rule->options.inject.sector < sector_num + nb_sectors)) {
+ uint64_t inject_offset = rule->options.inject.offset;
+
+ if (inject_offset == -1 ||
+ (inject_offset >= offset && inject_offset < offset + bytes))
+ {
break;
}
}
if (rule && rule->options.inject.error) {
- return inject_error(bs, cb, opaque, rule);
+ return inject_error(bs, rule);
}
- return bdrv_aio_readv(bs->file, sector_num, qiov, nb_sectors,
- cb, opaque);
+ return bdrv_co_preadv(bs->file, offset, bytes, qiov, flags);
}
-static BlockAIOCB *blkdebug_aio_writev(BlockDriverState *bs,
- int64_t sector_num, QEMUIOVector *qiov, int nb_sectors,
- BlockCompletionFunc *cb, void *opaque)
+static int coroutine_fn
+blkdebug_co_pwritev(BlockDriverState *bs, uint64_t offset, uint64_t bytes,
+ QEMUIOVector *qiov, int flags)
{
BDRVBlkdebugState *s = bs->opaque;
BlkdebugRule *rule = NULL;
QSIMPLEQ_FOREACH(rule, &s->active_rules, active_next) {
- if (rule->options.inject.sector == -1 ||
- (rule->options.inject.sector >= sector_num &&
- rule->options.inject.sector < sector_num + nb_sectors)) {
+ uint64_t inject_offset = rule->options.inject.offset;
+
+ if (inject_offset == -1 ||
+ (inject_offset >= offset && inject_offset < offset + bytes))
+ {
break;
}
}
if (rule && rule->options.inject.error) {
- return inject_error(bs, cb, opaque, rule);
+ return inject_error(bs, rule);
}
- return bdrv_aio_writev(bs->file, sector_num, qiov, nb_sectors,
- cb, opaque);
+ return bdrv_co_pwritev(bs->file, offset, bytes, qiov, flags);
}
-static BlockAIOCB *blkdebug_aio_flush(BlockDriverState *bs,
- BlockCompletionFunc *cb, void *opaque)
+static int blkdebug_co_flush(BlockDriverState *bs)
{
BDRVBlkdebugState *s = bs->opaque;
BlkdebugRule *rule = NULL;
QSIMPLEQ_FOREACH(rule, &s->active_rules, active_next) {
- if (rule->options.inject.sector == -1) {
+ if (rule->options.inject.offset == -1) {
break;
}
}
if (rule && rule->options.inject.error) {
- return inject_error(bs, cb, opaque, rule);
+ return inject_error(bs, rule);
}
- return bdrv_aio_flush(bs->file->bs, cb, opaque);
+ return bdrv_co_flush(bs->file->bs);
}
@@ -752,9 +746,9 @@ static BlockDriver bdrv_blkdebug = {
.bdrv_refresh_filename = blkdebug_refresh_filename,
.bdrv_refresh_limits = blkdebug_refresh_limits,
- .bdrv_aio_readv = blkdebug_aio_readv,
- .bdrv_aio_writev = blkdebug_aio_writev,
- .bdrv_aio_flush = blkdebug_aio_flush,
+ .bdrv_co_preadv = blkdebug_co_preadv,
+ .bdrv_co_pwritev = blkdebug_co_pwritev,
+ .bdrv_co_flush_to_disk = blkdebug_co_flush,
.bdrv_debug_event = blkdebug_debug_event,
.bdrv_debug_breakpoint = blkdebug_debug_breakpoint,
diff --git a/block/blkverify.c b/block/blkverify.c
index 28f9af6dba..43a940c2f5 100644
--- a/block/blkverify.c
+++ b/block/blkverify.c
@@ -19,38 +19,36 @@ typedef struct {
BdrvChild *test_file;
} BDRVBlkverifyState;
-typedef struct BlkverifyAIOCB BlkverifyAIOCB;
-struct BlkverifyAIOCB {
- BlockAIOCB common;
+typedef struct BlkverifyRequest {
+ Coroutine *co;
+ BlockDriverState *bs;
/* Request metadata */
bool is_write;
- int64_t sector_num;
- int nb_sectors;
+ uint64_t offset;
+ uint64_t bytes;
+ int flags;
- int ret; /* first completed request's result */
- unsigned int done; /* completion counter */
+ int (*request_fn)(BdrvChild *, int64_t, unsigned int, QEMUIOVector *,
+ BdrvRequestFlags);
- QEMUIOVector *qiov; /* user I/O vector */
- QEMUIOVector raw_qiov; /* cloned I/O vector for raw file */
- void *buf; /* buffer for raw file I/O */
+ int ret; /* test image result */
+ int raw_ret; /* raw image result */
- void (*verify)(BlkverifyAIOCB *acb);
-};
+ unsigned int done; /* completion counter */
-static const AIOCBInfo blkverify_aiocb_info = {
- .aiocb_size = sizeof(BlkverifyAIOCB),
-};
+ QEMUIOVector *qiov; /* user I/O vector */
+ QEMUIOVector *raw_qiov; /* cloned I/O vector for raw file */
+} BlkverifyRequest;
-static void GCC_FMT_ATTR(2, 3) blkverify_err(BlkverifyAIOCB *acb,
+static void GCC_FMT_ATTR(2, 3) blkverify_err(BlkverifyRequest *r,
const char *fmt, ...)
{
va_list ap;
va_start(ap, fmt);
- fprintf(stderr, "blkverify: %s sector_num=%" PRId64 " nb_sectors=%d ",
- acb->is_write ? "write" : "read", acb->sector_num,
- acb->nb_sectors);
+ fprintf(stderr, "blkverify: %s offset=%" PRId64 " bytes=%" PRId64 " ",
+ r->is_write ? "write" : "read", r->offset, r->bytes);
vfprintf(stderr, fmt, ap);
fprintf(stderr, "\n");
va_end(ap);
@@ -166,113 +164,106 @@ static int64_t blkverify_getlength(BlockDriverState *bs)
return bdrv_getlength(s->test_file->bs);
}
-static BlkverifyAIOCB *blkverify_aio_get(BlockDriverState *bs, bool is_write,
- int64_t sector_num, QEMUIOVector *qiov,
- int nb_sectors,
- BlockCompletionFunc *cb,
- void *opaque)
+static void coroutine_fn blkverify_do_test_req(void *opaque)
{
- BlkverifyAIOCB *acb = qemu_aio_get(&blkverify_aiocb_info, bs, cb, opaque);
-
- acb->is_write = is_write;
- acb->sector_num = sector_num;
- acb->nb_sectors = nb_sectors;
- acb->ret = -EINPROGRESS;
- acb->done = 0;
- acb->qiov = qiov;
- acb->buf = NULL;
- acb->verify = NULL;
- return acb;
+ BlkverifyRequest *r = opaque;
+ BDRVBlkverifyState *s = r->bs->opaque;
+
+ r->ret = r->request_fn(s->test_file, r->offset, r->bytes, r->qiov,
+ r->flags);
+ r->done++;
+ qemu_coroutine_enter_if_inactive(r->co);
}
-static void blkverify_aio_bh(void *opaque)
+static void coroutine_fn blkverify_do_raw_req(void *opaque)
{
- BlkverifyAIOCB *acb = opaque;
+ BlkverifyRequest *r = opaque;
- if (acb->buf) {
- qemu_iovec_destroy(&acb->raw_qiov);
- qemu_vfree(acb->buf);
- }
- acb->common.cb(acb->common.opaque, acb->ret);
- qemu_aio_unref(acb);
+ r->raw_ret = r->request_fn(r->bs->file, r->offset, r->bytes, r->raw_qiov,
+ r->flags);
+ r->done++;
+ qemu_coroutine_enter_if_inactive(r->co);
}
-static void blkverify_aio_cb(void *opaque, int ret)
+static int coroutine_fn
+blkverify_co_prwv(BlockDriverState *bs, BlkverifyRequest *r, uint64_t offset,
+ uint64_t bytes, QEMUIOVector *qiov, QEMUIOVector *raw_qiov,
+ int flags, bool is_write)
{
- BlkverifyAIOCB *acb = opaque;
-
- switch (++acb->done) {
- case 1:
- acb->ret = ret;
- break;
-
- case 2:
- if (acb->ret != ret) {
- blkverify_err(acb, "return value mismatch %d != %d", acb->ret, ret);
- }
-
- if (acb->verify) {
- acb->verify(acb);
- }
+ Coroutine *co_a, *co_b;
+
+ *r = (BlkverifyRequest) {
+ .co = qemu_coroutine_self(),
+ .bs = bs,
+ .offset = offset,
+ .bytes = bytes,
+ .qiov = qiov,
+ .raw_qiov = raw_qiov,
+ .flags = flags,
+ .is_write = is_write,
+ .request_fn = is_write ? bdrv_co_pwritev : bdrv_co_preadv,
+ };
+
+ co_a = qemu_coroutine_create(blkverify_do_test_req, r);
+ co_b = qemu_coroutine_create(blkverify_do_raw_req, r);
+
+ qemu_coroutine_enter(co_a);
+ qemu_coroutine_enter(co_b);
+
+ while (r->done < 2) {
+ qemu_coroutine_yield();
+ }
- aio_bh_schedule_oneshot(bdrv_get_aio_context(acb->common.bs),
- blkverify_aio_bh, acb);
- break;
+ if (r->ret != r->raw_ret) {
+ blkverify_err(r, "return value mismatch %d != %d", r->ret, r->raw_ret);
}
+
+ return r->ret;
}
-static void blkverify_verify_readv(BlkverifyAIOCB *acb)
+static int coroutine_fn
+blkverify_co_preadv(BlockDriverState *bs, uint64_t offset, uint64_t bytes,
+ QEMUIOVector *qiov, int flags)
{
- ssize_t offset = qemu_iovec_compare(acb->qiov, &acb->raw_qiov);
- if (offset != -1) {
- blkverify_err(acb, "contents mismatch in sector %" PRId64,
- acb->sector_num + (int64_t)(offset / BDRV_SECTOR_SIZE));
+ BlkverifyRequest r;
+ QEMUIOVector raw_qiov;
+ void *buf;
+ ssize_t cmp_offset;
+ int ret;
+
+ buf = qemu_blockalign(bs->file->bs, qiov->size);
+ qemu_iovec_init(&raw_qiov, qiov->niov);
+ qemu_iovec_clone(&raw_qiov, qiov, buf);
+
+ ret = blkverify_co_prwv(bs, &r, offset, bytes, qiov, &raw_qiov, flags,
+ false);
+
+ cmp_offset = qemu_iovec_compare(qiov, &raw_qiov);
+ if (cmp_offset != -1) {
+ blkverify_err(&r, "contents mismatch at offset %" PRId64,
+ offset + cmp_offset);
}
-}
-static BlockAIOCB *blkverify_aio_readv(BlockDriverState *bs,
- int64_t sector_num, QEMUIOVector *qiov, int nb_sectors,
- BlockCompletionFunc *cb, void *opaque)
-{
- BDRVBlkverifyState *s = bs->opaque;
- BlkverifyAIOCB *acb = blkverify_aio_get(bs, false, sector_num, qiov,
- nb_sectors, cb, opaque);
-
- acb->verify = blkverify_verify_readv;
- acb->buf = qemu_blockalign(bs->file->bs, qiov->size);
- qemu_iovec_init(&acb->raw_qiov, acb->qiov->niov);
- qemu_iovec_clone(&acb->raw_qiov, qiov, acb->buf);
-
- bdrv_aio_readv(s->test_file, sector_num, qiov, nb_sectors,
- blkverify_aio_cb, acb);
- bdrv_aio_readv(bs->file, sector_num, &acb->raw_qiov, nb_sectors,
- blkverify_aio_cb, acb);
- return &acb->common;
+ qemu_iovec_destroy(&raw_qiov);
+ qemu_vfree(buf);
+
+ return ret;
}
-static BlockAIOCB *blkverify_aio_writev(BlockDriverState *bs,
- int64_t sector_num, QEMUIOVector *qiov, int nb_sectors,
- BlockCompletionFunc *cb, void *opaque)
+static int coroutine_fn
+blkverify_co_pwritev(BlockDriverState *bs, uint64_t offset, uint64_t bytes,
+ QEMUIOVector *qiov, int flags)
{
- BDRVBlkverifyState *s = bs->opaque;
- BlkverifyAIOCB *acb = blkverify_aio_get(bs, true, sector_num, qiov,
- nb_sectors, cb, opaque);
-
- bdrv_aio_writev(s->test_file, sector_num, qiov, nb_sectors,
- blkverify_aio_cb, acb);
- bdrv_aio_writev(bs->file, sector_num, qiov, nb_sectors,
- blkverify_aio_cb, acb);
- return &acb->common;
+ BlkverifyRequest r;
+ return blkverify_co_prwv(bs, &r, offset, bytes, qiov, qiov, flags, true);
}
-static BlockAIOCB *blkverify_aio_flush(BlockDriverState *bs,
- BlockCompletionFunc *cb,
- void *opaque)
+static int blkverify_co_flush(BlockDriverState *bs)
{
BDRVBlkverifyState *s = bs->opaque;
/* Only flush test file, the raw file is not important */
- return bdrv_aio_flush(s->test_file->bs, cb, opaque);
+ return bdrv_co_flush(s->test_file->bs);
}
static bool blkverify_recurse_is_first_non_filter(BlockDriverState *bs,
@@ -332,9 +323,9 @@ static BlockDriver bdrv_blkverify = {
.bdrv_getlength = blkverify_getlength,
.bdrv_refresh_filename = blkverify_refresh_filename,
- .bdrv_aio_readv = blkverify_aio_readv,
- .bdrv_aio_writev = blkverify_aio_writev,
- .bdrv_aio_flush = blkverify_aio_flush,
+ .bdrv_co_preadv = blkverify_co_preadv,
+ .bdrv_co_pwritev = blkverify_co_pwritev,
+ .bdrv_co_flush = blkverify_co_flush,
.is_filter = true,
.bdrv_recurse_is_first_non_filter = blkverify_recurse_is_first_non_filter,
diff --git a/block/raw-posix.c b/block/file-posix.c
index 28b47d977b..28b47d977b 100644
--- a/block/raw-posix.c
+++ b/block/file-posix.c
diff --git a/block/raw-win32.c b/block/file-win32.c
index 800fabdd72..800fabdd72 100644
--- a/block/raw-win32.c
+++ b/block/file-win32.c
diff --git a/block/gluster.c b/block/gluster.c
index a0a74e49fd..1a22f2982d 100644
--- a/block/gluster.c
+++ b/block/gluster.c
@@ -1253,7 +1253,7 @@ static int qemu_gluster_has_zero_init(BlockDriverState *bs)
* If @start is in a trailing hole or beyond EOF, return -ENXIO.
* If we can't find out, return a negative errno other than -ENXIO.
*
- * (Shamefully copied from raw-posix.c, only miniscule adaptions.)
+ * (Shamefully copied from file-posix.c, only miniscule adaptions.)
*/
static int find_allocation(BlockDriverState *bs, off_t start,
off_t *data, off_t *hole)
@@ -1349,7 +1349,7 @@ exit:
* 'nb_sectors' is the max value 'pnum' should be set to. If nb_sectors goes
* beyond the end of the disk image it will be clamped.
*
- * (Based on raw_co_get_block_status() from raw-posix.c.)
+ * (Based on raw_co_get_block_status() from file-posix.c.)
*/
static int64_t coroutine_fn qemu_gluster_co_get_block_status(
BlockDriverState *bs, int64_t sector_num, int nb_sectors, int *pnum,
diff --git a/block/quorum.c b/block/quorum.c
index d122299352..86e2072dce 100644
--- a/block/quorum.c
+++ b/block/quorum.c
@@ -97,7 +97,7 @@ typedef struct QuorumAIOCB QuorumAIOCB;
* $children_count QuorumChildRequest.
*/
typedef struct QuorumChildRequest {
- BlockAIOCB *aiocb;
+ BlockDriverState *bs;
QEMUIOVector qiov;
uint8_t *buf;
int ret;
@@ -110,11 +110,12 @@ typedef struct QuorumChildRequest {
* used to do operations on each children and track overall progress.
*/
struct QuorumAIOCB {
- BlockAIOCB common;
+ BlockDriverState *bs;
+ Coroutine *co;
/* Request metadata */
- uint64_t sector_num;
- int nb_sectors;
+ uint64_t offset;
+ uint64_t bytes;
QEMUIOVector *qiov; /* calling IOV */
@@ -133,32 +134,15 @@ struct QuorumAIOCB {
int children_read; /* how many children have been read from */
};
-static bool quorum_vote(QuorumAIOCB *acb);
-
-static void quorum_aio_cancel(BlockAIOCB *blockacb)
-{
- QuorumAIOCB *acb = container_of(blockacb, QuorumAIOCB, common);
- BDRVQuorumState *s = acb->common.bs->opaque;
- int i;
-
- /* cancel all callbacks */
- for (i = 0; i < s->num_children; i++) {
- if (acb->qcrs[i].aiocb) {
- bdrv_aio_cancel_async(acb->qcrs[i].aiocb);
- }
- }
-}
-
-static AIOCBInfo quorum_aiocb_info = {
- .aiocb_size = sizeof(QuorumAIOCB),
- .cancel_async = quorum_aio_cancel,
-};
+typedef struct QuorumCo {
+ QuorumAIOCB *acb;
+ int idx;
+} QuorumCo;
static void quorum_aio_finalize(QuorumAIOCB *acb)
{
- acb->common.cb(acb->common.opaque, acb->vote_ret);
g_free(acb->qcrs);
- qemu_aio_unref(acb);
+ g_free(acb);
}
static bool quorum_sha256_compare(QuorumVoteValue *a, QuorumVoteValue *b)
@@ -171,30 +155,26 @@ static bool quorum_64bits_compare(QuorumVoteValue *a, QuorumVoteValue *b)
return a->l == b->l;
}
-static QuorumAIOCB *quorum_aio_get(BDRVQuorumState *s,
- BlockDriverState *bs,
+static QuorumAIOCB *quorum_aio_get(BlockDriverState *bs,
QEMUIOVector *qiov,
- uint64_t sector_num,
- int nb_sectors,
- BlockCompletionFunc *cb,
- void *opaque)
+ uint64_t offset,
+ uint64_t bytes)
{
- QuorumAIOCB *acb = qemu_aio_get(&quorum_aiocb_info, bs, cb, opaque);
+ BDRVQuorumState *s = bs->opaque;
+ QuorumAIOCB *acb = g_new(QuorumAIOCB, 1);
int i;
- acb->common.bs->opaque = s;
- acb->sector_num = sector_num;
- acb->nb_sectors = nb_sectors;
- acb->qiov = qiov;
- acb->qcrs = g_new0(QuorumChildRequest, s->num_children);
- acb->count = 0;
- acb->success_count = 0;
- acb->rewrite_count = 0;
- acb->votes.compare = quorum_sha256_compare;
- QLIST_INIT(&acb->votes.vote_list);
- acb->is_read = false;
- acb->vote_ret = 0;
+ *acb = (QuorumAIOCB) {
+ .co = qemu_coroutine_self(),
+ .bs = bs,
+ .offset = offset,
+ .bytes = bytes,
+ .qiov = qiov,
+ .votes.compare = quorum_sha256_compare,
+ .votes.vote_list = QLIST_HEAD_INITIALIZER(acb.votes.vote_list),
+ };
+ acb->qcrs = g_new0(QuorumChildRequest, s->num_children);
for (i = 0; i < s->num_children; i++) {
acb->qcrs[i].buf = NULL;
acb->qcrs[i].ret = 0;
@@ -204,30 +184,37 @@ static QuorumAIOCB *quorum_aio_get(BDRVQuorumState *s,
return acb;
}
-static void quorum_report_bad(QuorumOpType type, uint64_t sector_num,
- int nb_sectors, char *node_name, int ret)
+static void quorum_report_bad(QuorumOpType type, uint64_t offset,
+ uint64_t bytes, char *node_name, int ret)
{
const char *msg = NULL;
+ int64_t start_sector = offset / BDRV_SECTOR_SIZE;
+ int64_t end_sector = DIV_ROUND_UP(offset + bytes, BDRV_SECTOR_SIZE);
+
if (ret < 0) {
msg = strerror(-ret);
}
- qapi_event_send_quorum_report_bad(type, !!msg, msg, node_name,
- sector_num, nb_sectors, &error_abort);
+ qapi_event_send_quorum_report_bad(type, !!msg, msg, node_name, start_sector,
+ end_sector - start_sector, &error_abort);
}
static void quorum_report_failure(QuorumAIOCB *acb)
{
- const char *reference = bdrv_get_device_or_node_name(acb->common.bs);
- qapi_event_send_quorum_failure(reference, acb->sector_num,
- acb->nb_sectors, &error_abort);
+ const char *reference = bdrv_get_device_or_node_name(acb->bs);
+ int64_t start_sector = acb->offset / BDRV_SECTOR_SIZE;
+ int64_t end_sector = DIV_ROUND_UP(acb->offset + acb->bytes,
+ BDRV_SECTOR_SIZE);
+
+ qapi_event_send_quorum_failure(reference, start_sector,
+ end_sector - start_sector, &error_abort);
}
static int quorum_vote_error(QuorumAIOCB *acb);
static bool quorum_has_too_much_io_failed(QuorumAIOCB *acb)
{
- BDRVQuorumState *s = acb->common.bs->opaque;
+ BDRVQuorumState *s = acb->bs->opaque;
if (acb->success_count < s->threshold) {
acb->vote_ret = quorum_vote_error(acb);
@@ -238,22 +225,7 @@ static bool quorum_has_too_much_io_failed(QuorumAIOCB *acb)
return false;
}
-static void quorum_rewrite_aio_cb(void *opaque, int ret)
-{
- QuorumAIOCB *acb = opaque;
-
- /* one less rewrite to do */
- acb->rewrite_count--;
-
- /* wait until all rewrite callbacks have completed */
- if (acb->rewrite_count) {
- return;
- }
-
- quorum_aio_finalize(acb);
-}
-
-static BlockAIOCB *read_fifo_child(QuorumAIOCB *acb);
+static int read_fifo_child(QuorumAIOCB *acb);
static void quorum_copy_qiov(QEMUIOVector *dest, QEMUIOVector *source)
{
@@ -272,70 +244,7 @@ static void quorum_report_bad_acb(QuorumChildRequest *sacb, int ret)
{
QuorumAIOCB *acb = sacb->parent;
QuorumOpType type = acb->is_read ? QUORUM_OP_TYPE_READ : QUORUM_OP_TYPE_WRITE;
- quorum_report_bad(type, acb->sector_num, acb->nb_sectors,
- sacb->aiocb->bs->node_name, ret);
-}
-
-static void quorum_fifo_aio_cb(void *opaque, int ret)
-{
- QuorumChildRequest *sacb = opaque;
- QuorumAIOCB *acb = sacb->parent;
- BDRVQuorumState *s = acb->common.bs->opaque;
-
- assert(acb->is_read && s->read_pattern == QUORUM_READ_PATTERN_FIFO);
-
- if (ret < 0) {
- quorum_report_bad_acb(sacb, ret);
-
- /* We try to read next child in FIFO order if we fail to read */
- if (acb->children_read < s->num_children) {
- read_fifo_child(acb);
- return;
- }
- }
-
- acb->vote_ret = ret;
-
- /* FIXME: rewrite failed children if acb->children_read > 1? */
- quorum_aio_finalize(acb);
-}
-
-static void quorum_aio_cb(void *opaque, int ret)
-{
- QuorumChildRequest *sacb = opaque;
- QuorumAIOCB *acb = sacb->parent;
- BDRVQuorumState *s = acb->common.bs->opaque;
- bool rewrite = false;
- int i;
-
- sacb->ret = ret;
- if (ret == 0) {
- acb->success_count++;
- } else {
- quorum_report_bad_acb(sacb, ret);
- }
- acb->count++;
- assert(acb->count <= s->num_children);
- assert(acb->success_count <= s->num_children);
- if (acb->count < s->num_children) {
- return;
- }
-
- /* Do the vote on read */
- if (acb->is_read) {
- rewrite = quorum_vote(acb);
- for (i = 0; i < s->num_children; i++) {
- qemu_vfree(acb->qcrs[i].buf);
- qemu_iovec_destroy(&acb->qcrs[i].qiov);
- }
- } else {
- quorum_has_too_much_io_failed(acb);
- }
-
- /* if no rewrite is done the code will finish right away */
- if (!rewrite) {
- quorum_aio_finalize(acb);
- }
+ quorum_report_bad(type, acb->offset, acb->bytes, sacb->bs->node_name, ret);
}
static void quorum_report_bad_versions(BDRVQuorumState *s,
@@ -350,14 +259,31 @@ static void quorum_report_bad_versions(BDRVQuorumState *s,
continue;
}
QLIST_FOREACH(item, &version->items, next) {
- quorum_report_bad(QUORUM_OP_TYPE_READ, acb->sector_num,
- acb->nb_sectors,
+ quorum_report_bad(QUORUM_OP_TYPE_READ, acb->offset, acb->bytes,
s->children[item->index]->bs->node_name, 0);
}
}
}
-static bool quorum_rewrite_bad_versions(BDRVQuorumState *s, QuorumAIOCB *acb,
+static void quorum_rewrite_entry(void *opaque)
+{
+ QuorumCo *co = opaque;
+ QuorumAIOCB *acb = co->acb;
+ BDRVQuorumState *s = acb->bs->opaque;
+
+ /* Ignore any errors, it's just a correction attempt for already
+ * corrupted data. */
+ bdrv_co_pwritev(s->children[co->idx], acb->offset, acb->bytes,
+ acb->qiov, 0);
+
+ /* Wake up the caller after the last rewrite */
+ acb->rewrite_count--;
+ if (!acb->rewrite_count) {
+ qemu_coroutine_enter_if_inactive(acb->co);
+ }
+}
+
+static bool quorum_rewrite_bad_versions(QuorumAIOCB *acb,
QuorumVoteValue *value)
{
QuorumVoteVersion *version;
@@ -376,7 +302,7 @@ static bool quorum_rewrite_bad_versions(BDRVQuorumState *s, QuorumAIOCB *acb,
}
}
- /* quorum_rewrite_aio_cb will count down this to zero */
+ /* quorum_rewrite_entry will count down this to zero */
acb->rewrite_count = count;
/* now fire the correcting rewrites */
@@ -385,9 +311,14 @@ static bool quorum_rewrite_bad_versions(BDRVQuorumState *s, QuorumAIOCB *acb,
continue;
}
QLIST_FOREACH(item, &version->items, next) {
- bdrv_aio_writev(s->children[item->index], acb->sector_num,
- acb->qiov, acb->nb_sectors, quorum_rewrite_aio_cb,
- acb);
+ Coroutine *co;
+ QuorumCo data = {
+ .acb = acb,
+ .idx = item->index,
+ };
+
+ co = qemu_coroutine_create(quorum_rewrite_entry, &data);
+ qemu_coroutine_enter(co);
}
}
@@ -507,8 +438,8 @@ static void GCC_FMT_ATTR(2, 3) quorum_err(QuorumAIOCB *acb,
va_list ap;
va_start(ap, fmt);
- fprintf(stderr, "quorum: sector_num=%" PRId64 " nb_sectors=%d ",
- acb->sector_num, acb->nb_sectors);
+ fprintf(stderr, "quorum: offset=%" PRIu64 " bytes=%" PRIu64 " ",
+ acb->offset, acb->bytes);
vfprintf(stderr, fmt, ap);
fprintf(stderr, "\n");
va_end(ap);
@@ -519,16 +450,15 @@ static bool quorum_compare(QuorumAIOCB *acb,
QEMUIOVector *a,
QEMUIOVector *b)
{
- BDRVQuorumState *s = acb->common.bs->opaque;
+ BDRVQuorumState *s = acb->bs->opaque;
ssize_t offset;
/* This driver will replace blkverify in this particular case */
if (s->is_blkverify) {
offset = qemu_iovec_compare(a, b);
if (offset != -1) {
- quorum_err(acb, "contents mismatch in sector %" PRId64,
- acb->sector_num +
- (uint64_t)(offset / BDRV_SECTOR_SIZE));
+ quorum_err(acb, "contents mismatch at offset %" PRIu64,
+ acb->offset + offset);
}
return true;
}
@@ -539,7 +469,7 @@ static bool quorum_compare(QuorumAIOCB *acb,
/* Do a vote to get the error code */
static int quorum_vote_error(QuorumAIOCB *acb)
{
- BDRVQuorumState *s = acb->common.bs->opaque;
+ BDRVQuorumState *s = acb->bs->opaque;
QuorumVoteVersion *winner = NULL;
QuorumVotes error_votes;
QuorumVoteValue result_value;
@@ -568,17 +498,16 @@ static int quorum_vote_error(QuorumAIOCB *acb)
return ret;
}
-static bool quorum_vote(QuorumAIOCB *acb)
+static void quorum_vote(QuorumAIOCB *acb)
{
bool quorum = true;
- bool rewrite = false;
int i, j, ret;
QuorumVoteValue hash;
- BDRVQuorumState *s = acb->common.bs->opaque;
+ BDRVQuorumState *s = acb->bs->opaque;
QuorumVoteVersion *winner;
if (quorum_has_too_much_io_failed(acb)) {
- return false;
+ return;
}
/* get the index of the first successful read */
@@ -606,7 +535,7 @@ static bool quorum_vote(QuorumAIOCB *acb)
/* Every successful read agrees */
if (quorum) {
quorum_copy_qiov(acb->qiov, &acb->qcrs[i].qiov);
- return false;
+ return;
}
/* compute hashes for each successful read, also store indexes */
@@ -641,19 +570,46 @@ static bool quorum_vote(QuorumAIOCB *acb)
/* corruption correction is enabled */
if (s->rewrite_corrupted) {
- rewrite = quorum_rewrite_bad_versions(s, acb, &winner->value);
+ quorum_rewrite_bad_versions(acb, &winner->value);
}
free_exit:
/* free lists */
quorum_free_vote_list(&acb->votes);
- return rewrite;
}
-static BlockAIOCB *read_quorum_children(QuorumAIOCB *acb)
+static void read_quorum_children_entry(void *opaque)
{
- BDRVQuorumState *s = acb->common.bs->opaque;
- int i;
+ QuorumCo *co = opaque;
+ QuorumAIOCB *acb = co->acb;
+ BDRVQuorumState *s = acb->bs->opaque;
+ int i = co->idx;
+ QuorumChildRequest *sacb = &acb->qcrs[i];
+
+ sacb->bs = s->children[i]->bs;
+ sacb->ret = bdrv_co_preadv(s->children[i], acb->offset, acb->bytes,
+ &acb->qcrs[i].qiov, 0);
+
+ if (sacb->ret == 0) {
+ acb->success_count++;
+ } else {
+ quorum_report_bad_acb(sacb, sacb->ret);
+ }
+
+ acb->count++;
+ assert(acb->count <= s->num_children);
+ assert(acb->success_count <= s->num_children);
+
+ /* Wake up the caller after the last read */
+ if (acb->count == s->num_children) {
+ qemu_coroutine_enter_if_inactive(acb->co);
+ }
+}
+
+static int read_quorum_children(QuorumAIOCB *acb)
+{
+ BDRVQuorumState *s = acb->bs->opaque;
+ int i, ret;
acb->children_read = s->num_children;
for (i = 0; i < s->num_children; i++) {
@@ -663,65 +619,131 @@ static BlockAIOCB *read_quorum_children(QuorumAIOCB *acb)
}
for (i = 0; i < s->num_children; i++) {
- acb->qcrs[i].aiocb = bdrv_aio_readv(s->children[i], acb->sector_num,
- &acb->qcrs[i].qiov, acb->nb_sectors,
- quorum_aio_cb, &acb->qcrs[i]);
+ Coroutine *co;
+ QuorumCo data = {
+ .acb = acb,
+ .idx = i,
+ };
+
+ co = qemu_coroutine_create(read_quorum_children_entry, &data);
+ qemu_coroutine_enter(co);
}
- return &acb->common;
+ while (acb->count < s->num_children) {
+ qemu_coroutine_yield();
+ }
+
+ /* Do the vote on read */
+ quorum_vote(acb);
+ for (i = 0; i < s->num_children; i++) {
+ qemu_vfree(acb->qcrs[i].buf);
+ qemu_iovec_destroy(&acb->qcrs[i].qiov);
+ }
+
+ while (acb->rewrite_count) {
+ qemu_coroutine_yield();
+ }
+
+ ret = acb->vote_ret;
+
+ return ret;
}
-static BlockAIOCB *read_fifo_child(QuorumAIOCB *acb)
+static int read_fifo_child(QuorumAIOCB *acb)
{
- BDRVQuorumState *s = acb->common.bs->opaque;
- int n = acb->children_read++;
+ BDRVQuorumState *s = acb->bs->opaque;
+ int n, ret;
+
+ /* We try to read the next child in FIFO order if we failed to read */
+ do {
+ n = acb->children_read++;
+ acb->qcrs[n].bs = s->children[n]->bs;
+ ret = bdrv_co_preadv(s->children[n], acb->offset, acb->bytes,
+ acb->qiov, 0);
+ if (ret < 0) {
+ quorum_report_bad_acb(&acb->qcrs[n], ret);
+ }
+ } while (ret < 0 && acb->children_read < s->num_children);
- acb->qcrs[n].aiocb = bdrv_aio_readv(s->children[n], acb->sector_num,
- acb->qiov, acb->nb_sectors,
- quorum_fifo_aio_cb, &acb->qcrs[n]);
+ /* FIXME: rewrite failed children if acb->children_read > 1? */
- return &acb->common;
+ return ret;
}
-static BlockAIOCB *quorum_aio_readv(BlockDriverState *bs,
- int64_t sector_num,
- QEMUIOVector *qiov,
- int nb_sectors,
- BlockCompletionFunc *cb,
- void *opaque)
+static int quorum_co_preadv(BlockDriverState *bs, uint64_t offset,
+ uint64_t bytes, QEMUIOVector *qiov, int flags)
{
BDRVQuorumState *s = bs->opaque;
- QuorumAIOCB *acb = quorum_aio_get(s, bs, qiov, sector_num,
- nb_sectors, cb, opaque);
+ QuorumAIOCB *acb = quorum_aio_get(bs, qiov, offset, bytes);
+ int ret;
+
acb->is_read = true;
acb->children_read = 0;
if (s->read_pattern == QUORUM_READ_PATTERN_QUORUM) {
- return read_quorum_children(acb);
+ ret = read_quorum_children(acb);
+ } else {
+ ret = read_fifo_child(acb);
+ }
+ quorum_aio_finalize(acb);
+
+ return ret;
+}
+
+static void write_quorum_entry(void *opaque)
+{
+ QuorumCo *co = opaque;
+ QuorumAIOCB *acb = co->acb;
+ BDRVQuorumState *s = acb->bs->opaque;
+ int i = co->idx;
+ QuorumChildRequest *sacb = &acb->qcrs[i];
+
+ sacb->bs = s->children[i]->bs;
+ sacb->ret = bdrv_co_pwritev(s->children[i], acb->offset, acb->bytes,
+ acb->qiov, 0);
+ if (sacb->ret == 0) {
+ acb->success_count++;
+ } else {
+ quorum_report_bad_acb(sacb, sacb->ret);
}
+ acb->count++;
+ assert(acb->count <= s->num_children);
+ assert(acb->success_count <= s->num_children);
- return read_fifo_child(acb);
+ /* Wake up the caller after the last write */
+ if (acb->count == s->num_children) {
+ qemu_coroutine_enter_if_inactive(acb->co);
+ }
}
-static BlockAIOCB *quorum_aio_writev(BlockDriverState *bs,
- int64_t sector_num,
- QEMUIOVector *qiov,
- int nb_sectors,
- BlockCompletionFunc *cb,
- void *opaque)
+static int quorum_co_pwritev(BlockDriverState *bs, uint64_t offset,
+ uint64_t bytes, QEMUIOVector *qiov, int flags)
{
BDRVQuorumState *s = bs->opaque;
- QuorumAIOCB *acb = quorum_aio_get(s, bs, qiov, sector_num, nb_sectors,
- cb, opaque);
- int i;
+ QuorumAIOCB *acb = quorum_aio_get(bs, qiov, offset, bytes);
+ int i, ret;
for (i = 0; i < s->num_children; i++) {
- acb->qcrs[i].aiocb = bdrv_aio_writev(s->children[i], sector_num,
- qiov, nb_sectors, &quorum_aio_cb,
- &acb->qcrs[i]);
+ Coroutine *co;
+ QuorumCo data = {
+ .acb = acb,
+ .idx = i,
+ };
+
+ co = qemu_coroutine_create(write_quorum_entry, &data);
+ qemu_coroutine_enter(co);
+ }
+
+ while (acb->count < s->num_children) {
+ qemu_coroutine_yield();
}
- return &acb->common;
+ quorum_has_too_much_io_failed(acb);
+
+ ret = acb->vote_ret;
+ quorum_aio_finalize(acb);
+
+ return ret;
}
static int64_t quorum_getlength(BlockDriverState *bs)
@@ -765,7 +787,7 @@ static coroutine_fn int quorum_co_flush(BlockDriverState *bs)
result = bdrv_co_flush(s->children[i]->bs);
if (result) {
quorum_report_bad(QUORUM_OP_TYPE_FLUSH, 0,
- bdrv_nb_sectors(s->children[i]->bs),
+ bdrv_getlength(s->children[i]->bs),
s->children[i]->bs->node_name, result);
result_value.l = result;
quorum_count_vote(&error_votes, &result_value, i);
@@ -1098,8 +1120,8 @@ static BlockDriver bdrv_quorum = {
.bdrv_getlength = quorum_getlength,
- .bdrv_aio_readv = quorum_aio_readv,
- .bdrv_aio_writev = quorum_aio_writev,
+ .bdrv_co_preadv = quorum_co_preadv,
+ .bdrv_co_pwritev = quorum_co_pwritev,
.bdrv_add_child = quorum_add_child,
.bdrv_del_child = quorum_del_child,
diff --git a/block/raw_bsd.c b/block/raw-format.c
index 8a5b9b0424..8404a82e0c 100644
--- a/block/raw_bsd.c
+++ b/block/raw-format.c
@@ -1,4 +1,4 @@
-/* BlockDriver implementation for "raw"
+/* BlockDriver implementation for "raw" format driver
*
* Copyright (C) 2010-2016 Red Hat, Inc.
* Copyright (C) 2010, Blue Swirl <blauwirbel@gmail.com>
diff --git a/block/trace-events b/block/trace-events
index cfc05f2478..671a6a851c 100644
--- a/block/trace-events
+++ b/block/trace-events
@@ -53,8 +53,8 @@ qmp_block_job_resume(void *job) "job %p"
qmp_block_job_complete(void *job) "job %p"
qmp_block_stream(void *bs, void *job) "bs %p job %p"
-# block/raw-win32.c
-# block/raw-posix.c
+# block/file-win32.c
+# block/file-posix.c
paio_submit_co(int64_t offset, int count, int type) "offset %"PRId64" count %d type %d"
paio_submit(void *acb, void *opaque, int64_t offset, int count, int type) "acb %p opaque %p offset %"PRId64" count %d type %d"
diff --git a/configure b/configure
index 218df87d21..86f5214dd0 100755
--- a/configure
+++ b/configure
@@ -2750,7 +2750,7 @@ if compile_prog "" "" ; then
fi
##########################################
-# xfsctl() probe, used for raw-posix
+# xfsctl() probe, used for file-posix.c
if test "$xfs" != "no" ; then
cat > $TMPC << EOF
#include <stddef.h> /* NULL */
diff --git a/contrib/libvhost-user/Makefile.objs b/contrib/libvhost-user/Makefile.objs
new file mode 100644
index 0000000000..cef1ad6e31
--- /dev/null
+++ b/contrib/libvhost-user/Makefile.objs
@@ -0,0 +1 @@
+libvhost-user-obj-y = libvhost-user.o
diff --git a/contrib/libvhost-user/libvhost-user.c b/contrib/libvhost-user/libvhost-user.c
new file mode 100644
index 0000000000..af4faad60b
--- /dev/null
+++ b/contrib/libvhost-user/libvhost-user.c
@@ -0,0 +1,1499 @@
+/*
+ * Vhost User library
+ *
+ * Copyright IBM, Corp. 2007
+ * Copyright (c) 2016 Red Hat, Inc.
+ *
+ * Authors:
+ * Anthony Liguori <aliguori@us.ibm.com>
+ * Marc-André Lureau <mlureau@redhat.com>
+ * Victor Kaplansky <victork@redhat.com>
+ *
+ * This work is licensed under the terms of the GNU GPL, version 2 or
+ * later. See the COPYING file in the top-level directory.
+ */
+
+#include <qemu/osdep.h>
+#include <sys/eventfd.h>
+#include <linux/vhost.h>
+
+#include "qemu/atomic.h"
+
+#include "libvhost-user.h"
+
+#define VHOST_USER_HDR_SIZE offsetof(VhostUserMsg, payload.u64)
+
+/* The version of the protocol we support */
+#define VHOST_USER_VERSION 1
+#define LIBVHOST_USER_DEBUG 0
+
+#define DPRINT(...) \
+ do { \
+ if (LIBVHOST_USER_DEBUG) { \
+ fprintf(stderr, __VA_ARGS__); \
+ } \
+ } while (0)
+
+static const char *
+vu_request_to_string(int req)
+{
+#define REQ(req) [req] = #req
+ static const char *vu_request_str[] = {
+ REQ(VHOST_USER_NONE),
+ REQ(VHOST_USER_GET_FEATURES),
+ REQ(VHOST_USER_SET_FEATURES),
+ REQ(VHOST_USER_NONE),
+ REQ(VHOST_USER_GET_FEATURES),
+ REQ(VHOST_USER_SET_FEATURES),
+ REQ(VHOST_USER_SET_OWNER),
+ REQ(VHOST_USER_RESET_OWNER),
+ REQ(VHOST_USER_SET_MEM_TABLE),
+ REQ(VHOST_USER_SET_LOG_BASE),
+ REQ(VHOST_USER_SET_LOG_FD),
+ REQ(VHOST_USER_SET_VRING_NUM),
+ REQ(VHOST_USER_SET_VRING_ADDR),
+ REQ(VHOST_USER_SET_VRING_BASE),
+ REQ(VHOST_USER_GET_VRING_BASE),
+ REQ(VHOST_USER_SET_VRING_KICK),
+ REQ(VHOST_USER_SET_VRING_CALL),
+ REQ(VHOST_USER_SET_VRING_ERR),
+ REQ(VHOST_USER_GET_PROTOCOL_FEATURES),
+ REQ(VHOST_USER_SET_PROTOCOL_FEATURES),
+ REQ(VHOST_USER_GET_QUEUE_NUM),
+ REQ(VHOST_USER_SET_VRING_ENABLE),
+ REQ(VHOST_USER_SEND_RARP),
+ REQ(VHOST_USER_INPUT_GET_CONFIG),
+ REQ(VHOST_USER_MAX),
+ };
+#undef REQ
+
+ if (req < VHOST_USER_MAX) {
+ return vu_request_str[req];
+ } else {
+ return "unknown";
+ }
+}
+
+static void
+vu_panic(VuDev *dev, const char *msg, ...)
+{
+ char *buf = NULL;
+ va_list ap;
+
+ va_start(ap, msg);
+ (void)vasprintf(&buf, msg, ap);
+ va_end(ap);
+
+ dev->broken = true;
+ dev->panic(dev, buf);
+ free(buf);
+
+ /* FIXME: find a way to call virtio_error? */
+}
+
+/* Translate guest physical address to our virtual address. */
+void *
+vu_gpa_to_va(VuDev *dev, uint64_t guest_addr)
+{
+ int i;
+
+ /* Find matching memory region. */
+ for (i = 0; i < dev->nregions; i++) {
+ VuDevRegion *r = &dev->regions[i];
+
+ if ((guest_addr >= r->gpa) && (guest_addr < (r->gpa + r->size))) {
+ return (void *)(uintptr_t)
+ guest_addr - r->gpa + r->mmap_addr + r->mmap_offset;
+ }
+ }
+
+ return NULL;
+}
+
+/* Translate qemu virtual address to our virtual address. */
+static void *
+qva_to_va(VuDev *dev, uint64_t qemu_addr)
+{
+ int i;
+
+ /* Find matching memory region. */
+ for (i = 0; i < dev->nregions; i++) {
+ VuDevRegion *r = &dev->regions[i];
+
+ if ((qemu_addr >= r->qva) && (qemu_addr < (r->qva + r->size))) {
+ return (void *)(uintptr_t)
+ qemu_addr - r->qva + r->mmap_addr + r->mmap_offset;
+ }
+ }
+
+ return NULL;
+}
+
+static void
+vmsg_close_fds(VhostUserMsg *vmsg)
+{
+ int i;
+
+ for (i = 0; i < vmsg->fd_num; i++) {
+ close(vmsg->fds[i]);
+ }
+}
+
+static bool
+vu_message_read(VuDev *dev, int conn_fd, VhostUserMsg *vmsg)
+{
+ char control[CMSG_SPACE(VHOST_MEMORY_MAX_NREGIONS * sizeof(int))] = { };
+ struct iovec iov = {
+ .iov_base = (char *)vmsg,
+ .iov_len = VHOST_USER_HDR_SIZE,
+ };
+ struct msghdr msg = {
+ .msg_iov = &iov,
+ .msg_iovlen = 1,
+ .msg_control = control,
+ .msg_controllen = sizeof(control),
+ };
+ size_t fd_size;
+ struct cmsghdr *cmsg;
+ int rc;
+
+ do {
+ rc = recvmsg(conn_fd, &msg, 0);
+ } while (rc < 0 && (errno == EINTR || errno == EAGAIN));
+
+ if (rc <= 0) {
+ vu_panic(dev, "Error while recvmsg: %s", strerror(errno));
+ return false;
+ }
+
+ vmsg->fd_num = 0;
+ for (cmsg = CMSG_FIRSTHDR(&msg);
+ cmsg != NULL;
+ cmsg = CMSG_NXTHDR(&msg, cmsg))
+ {
+ if (cmsg->cmsg_level == SOL_SOCKET && cmsg->cmsg_type == SCM_RIGHTS) {
+ fd_size = cmsg->cmsg_len - CMSG_LEN(0);
+ vmsg->fd_num = fd_size / sizeof(int);
+ memcpy(vmsg->fds, CMSG_DATA(cmsg), fd_size);
+ break;
+ }
+ }
+
+ if (vmsg->size > sizeof(vmsg->payload)) {
+ vu_panic(dev,
+ "Error: too big message request: %d, size: vmsg->size: %u, "
+ "while sizeof(vmsg->payload) = %zu\n",
+ vmsg->request, vmsg->size, sizeof(vmsg->payload));
+ goto fail;
+ }
+
+ if (vmsg->size) {
+ do {
+ rc = read(conn_fd, &vmsg->payload, vmsg->size);
+ } while (rc < 0 && (errno == EINTR || errno == EAGAIN));
+
+ if (rc <= 0) {
+ vu_panic(dev, "Error while reading: %s", strerror(errno));
+ goto fail;
+ }
+
+ assert(rc == vmsg->size);
+ }
+
+ return true;
+
+fail:
+ vmsg_close_fds(vmsg);
+
+ return false;
+}
+
+static bool
+vu_message_write(VuDev *dev, int conn_fd, VhostUserMsg *vmsg)
+{
+ int rc;
+ uint8_t *p = (uint8_t *)vmsg;
+
+ /* Set the version in the flags when sending the reply */
+ vmsg->flags &= ~VHOST_USER_VERSION_MASK;
+ vmsg->flags |= VHOST_USER_VERSION;
+ vmsg->flags |= VHOST_USER_REPLY_MASK;
+
+ do {
+ rc = write(conn_fd, p, VHOST_USER_HDR_SIZE);
+ } while (rc < 0 && (errno == EINTR || errno == EAGAIN));
+
+ do {
+ if (vmsg->data) {
+ rc = write(conn_fd, vmsg->data, vmsg->size);
+ } else {
+ rc = write(conn_fd, p + VHOST_USER_HDR_SIZE, vmsg->size);
+ }
+ } while (rc < 0 && (errno == EINTR || errno == EAGAIN));
+
+ if (rc <= 0) {
+ vu_panic(dev, "Error while writing: %s", strerror(errno));
+ return false;
+ }
+
+ return true;
+}
+
+/* Kick the log_call_fd if required. */
+static void
+vu_log_kick(VuDev *dev)
+{
+ if (dev->log_call_fd != -1) {
+ DPRINT("Kicking the QEMU's log...\n");
+ if (eventfd_write(dev->log_call_fd, 1) < 0) {
+ vu_panic(dev, "Error writing eventfd: %s", strerror(errno));
+ }
+ }
+}
+
+static void
+vu_log_page(uint8_t *log_table, uint64_t page)
+{
+ DPRINT("Logged dirty guest page: %"PRId64"\n", page);
+ atomic_or(&log_table[page / 8], 1 << (page % 8));
+}
+
+static void
+vu_log_write(VuDev *dev, uint64_t address, uint64_t length)
+{
+ uint64_t page;
+
+ if (!(dev->features & (1ULL << VHOST_F_LOG_ALL)) ||
+ !dev->log_table || !length) {
+ return;
+ }
+
+ assert(dev->log_size > ((address + length - 1) / VHOST_LOG_PAGE / 8));
+
+ page = address / VHOST_LOG_PAGE;
+ while (page * VHOST_LOG_PAGE < address + length) {
+ vu_log_page(dev->log_table, page);
+ page += VHOST_LOG_PAGE;
+ }
+
+ vu_log_kick(dev);
+}
+
+static void
+vu_kick_cb(VuDev *dev, int condition, void *data)
+{
+ int index = (intptr_t)data;
+ VuVirtq *vq = &dev->vq[index];
+ int sock = vq->kick_fd;
+ eventfd_t kick_data;
+ ssize_t rc;
+
+ rc = eventfd_read(sock, &kick_data);
+ if (rc == -1) {
+ vu_panic(dev, "kick eventfd_read(): %s", strerror(errno));
+ dev->remove_watch(dev, dev->vq[index].kick_fd);
+ } else {
+ DPRINT("Got kick_data: %016"PRIx64" handler:%p idx:%d\n",
+ kick_data, vq->handler, index);
+ if (vq->handler) {
+ vq->handler(dev, index);
+ }
+ }
+}
+
+static bool
+vu_get_features_exec(VuDev *dev, VhostUserMsg *vmsg)
+{
+ vmsg->payload.u64 =
+ 1ULL << VHOST_F_LOG_ALL |
+ 1ULL << VHOST_USER_F_PROTOCOL_FEATURES;
+
+ if (dev->iface->get_features) {
+ vmsg->payload.u64 |= dev->iface->get_features(dev);
+ }
+
+ vmsg->size = sizeof(vmsg->payload.u64);
+
+ DPRINT("Sending back to guest u64: 0x%016"PRIx64"\n", vmsg->payload.u64);
+
+ return true;
+}
+
+static void
+vu_set_enable_all_rings(VuDev *dev, bool enabled)
+{
+ int i;
+
+ for (i = 0; i < VHOST_MAX_NR_VIRTQUEUE; i++) {
+ dev->vq[i].enable = enabled;
+ }
+}
+
+static bool
+vu_set_features_exec(VuDev *dev, VhostUserMsg *vmsg)
+{
+ DPRINT("u64: 0x%016"PRIx64"\n", vmsg->payload.u64);
+
+ dev->features = vmsg->payload.u64;
+
+ if (!(dev->features & VHOST_USER_F_PROTOCOL_FEATURES)) {
+ vu_set_enable_all_rings(dev, true);
+ }
+
+ if (dev->iface->set_features) {
+ dev->iface->set_features(dev, dev->features);
+ }
+
+ return false;
+}
+
+static bool
+vu_set_owner_exec(VuDev *dev, VhostUserMsg *vmsg)
+{
+ return false;
+}
+
+static void
+vu_close_log(VuDev *dev)
+{
+ if (dev->log_table) {
+ if (munmap(dev->log_table, dev->log_size) != 0) {
+ perror("close log munmap() error");
+ }
+
+ dev->log_table = NULL;
+ }
+ if (dev->log_call_fd != -1) {
+ close(dev->log_call_fd);
+ dev->log_call_fd = -1;
+ }
+}
+
+static bool
+vu_reset_device_exec(VuDev *dev, VhostUserMsg *vmsg)
+{
+ vu_set_enable_all_rings(dev, false);
+
+ return false;
+}
+
+static bool
+vu_set_mem_table_exec(VuDev *dev, VhostUserMsg *vmsg)
+{
+ int i;
+ VhostUserMemory *memory = &vmsg->payload.memory;
+ dev->nregions = memory->nregions;
+
+ DPRINT("Nregions: %d\n", memory->nregions);
+ for (i = 0; i < dev->nregions; i++) {
+ void *mmap_addr;
+ VhostUserMemoryRegion *msg_region = &memory->regions[i];
+ VuDevRegion *dev_region = &dev->regions[i];
+
+ DPRINT("Region %d\n", i);
+ DPRINT(" guest_phys_addr: 0x%016"PRIx64"\n",
+ msg_region->guest_phys_addr);
+ DPRINT(" memory_size: 0x%016"PRIx64"\n",
+ msg_region->memory_size);
+ DPRINT(" userspace_addr 0x%016"PRIx64"\n",
+ msg_region->userspace_addr);
+ DPRINT(" mmap_offset 0x%016"PRIx64"\n",
+ msg_region->mmap_offset);
+
+ dev_region->gpa = msg_region->guest_phys_addr;
+ dev_region->size = msg_region->memory_size;
+ dev_region->qva = msg_region->userspace_addr;
+ dev_region->mmap_offset = msg_region->mmap_offset;
+
+ /* We don't use offset argument of mmap() since the
+ * mapped address has to be page aligned, and we use huge
+ * pages. */
+ mmap_addr = mmap(0, dev_region->size + dev_region->mmap_offset,
+ PROT_READ | PROT_WRITE, MAP_SHARED,
+ vmsg->fds[i], 0);
+
+ if (mmap_addr == MAP_FAILED) {
+ vu_panic(dev, "region mmap error: %s", strerror(errno));
+ } else {
+ dev_region->mmap_addr = (uint64_t)(uintptr_t)mmap_addr;
+ DPRINT(" mmap_addr: 0x%016"PRIx64"\n",
+ dev_region->mmap_addr);
+ }
+
+ close(vmsg->fds[i]);
+ }
+
+ return false;
+}
+
+static bool
+vu_set_log_base_exec(VuDev *dev, VhostUserMsg *vmsg)
+{
+ int fd;
+ uint64_t log_mmap_size, log_mmap_offset;
+ void *rc;
+
+ if (vmsg->fd_num != 1 ||
+ vmsg->size != sizeof(vmsg->payload.log)) {
+ vu_panic(dev, "Invalid log_base message");
+ return true;
+ }
+
+ fd = vmsg->fds[0];
+ log_mmap_offset = vmsg->payload.log.mmap_offset;
+ log_mmap_size = vmsg->payload.log.mmap_size;
+ DPRINT("Log mmap_offset: %"PRId64"\n", log_mmap_offset);
+ DPRINT("Log mmap_size: %"PRId64"\n", log_mmap_size);
+
+ rc = mmap(0, log_mmap_size, PROT_READ | PROT_WRITE, MAP_SHARED, fd,
+ log_mmap_offset);
+ if (rc == MAP_FAILED) {
+ perror("log mmap error");
+ }
+ dev->log_table = rc;
+ dev->log_size = log_mmap_size;
+
+ vmsg->size = sizeof(vmsg->payload.u64);
+
+ return true;
+}
+
+static bool
+vu_set_log_fd_exec(VuDev *dev, VhostUserMsg *vmsg)
+{
+ if (vmsg->fd_num != 1) {
+ vu_panic(dev, "Invalid log_fd message");
+ return false;
+ }
+
+ if (dev->log_call_fd != -1) {
+ close(dev->log_call_fd);
+ }
+ dev->log_call_fd = vmsg->fds[0];
+ DPRINT("Got log_call_fd: %d\n", vmsg->fds[0]);
+
+ return false;
+}
+
+static bool
+vu_set_vring_num_exec(VuDev *dev, VhostUserMsg *vmsg)
+{
+ unsigned int index = vmsg->payload.state.index;
+ unsigned int num = vmsg->payload.state.num;
+
+ DPRINT("State.index: %d\n", index);
+ DPRINT("State.num: %d\n", num);
+ dev->vq[index].vring.num = num;
+
+ return false;
+}
+
+static bool
+vu_set_vring_addr_exec(VuDev *dev, VhostUserMsg *vmsg)
+{
+ struct vhost_vring_addr *vra = &vmsg->payload.addr;
+ unsigned int index = vra->index;
+ VuVirtq *vq = &dev->vq[index];
+
+ DPRINT("vhost_vring_addr:\n");
+ DPRINT(" index: %d\n", vra->index);
+ DPRINT(" flags: %d\n", vra->flags);
+ DPRINT(" desc_user_addr: 0x%016llx\n", vra->desc_user_addr);
+ DPRINT(" used_user_addr: 0x%016llx\n", vra->used_user_addr);
+ DPRINT(" avail_user_addr: 0x%016llx\n", vra->avail_user_addr);
+ DPRINT(" log_guest_addr: 0x%016llx\n", vra->log_guest_addr);
+
+ vq->vring.flags = vra->flags;
+ vq->vring.desc = qva_to_va(dev, vra->desc_user_addr);
+ vq->vring.used = qva_to_va(dev, vra->used_user_addr);
+ vq->vring.avail = qva_to_va(dev, vra->avail_user_addr);
+ vq->vring.log_guest_addr = vra->log_guest_addr;
+
+ DPRINT("Setting virtq addresses:\n");
+ DPRINT(" vring_desc at %p\n", vq->vring.desc);
+ DPRINT(" vring_used at %p\n", vq->vring.used);
+ DPRINT(" vring_avail at %p\n", vq->vring.avail);
+
+ if (!(vq->vring.desc && vq->vring.used && vq->vring.avail)) {
+ vu_panic(dev, "Invalid vring_addr message");
+ return false;
+ }
+
+ vq->used_idx = vq->vring.used->idx;
+
+ return false;
+}
+
+static bool
+vu_set_vring_base_exec(VuDev *dev, VhostUserMsg *vmsg)
+{
+ unsigned int index = vmsg->payload.state.index;
+ unsigned int num = vmsg->payload.state.num;
+
+ DPRINT("State.index: %d\n", index);
+ DPRINT("State.num: %d\n", num);
+ dev->vq[index].shadow_avail_idx = dev->vq[index].last_avail_idx = num;
+
+ return false;
+}
+
+static bool
+vu_get_vring_base_exec(VuDev *dev, VhostUserMsg *vmsg)
+{
+ unsigned int index = vmsg->payload.state.index;
+
+ DPRINT("State.index: %d\n", index);
+ vmsg->payload.state.num = dev->vq[index].last_avail_idx;
+ vmsg->size = sizeof(vmsg->payload.state);
+
+ dev->vq[index].started = false;
+ if (dev->iface->queue_set_started) {
+ dev->iface->queue_set_started(dev, index, false);
+ }
+
+ if (dev->vq[index].call_fd != -1) {
+ close(dev->vq[index].call_fd);
+ dev->vq[index].call_fd = -1;
+ }
+ if (dev->vq[index].kick_fd != -1) {
+ dev->remove_watch(dev, dev->vq[index].kick_fd);
+ close(dev->vq[index].kick_fd);
+ dev->vq[index].kick_fd = -1;
+ }
+
+ return true;
+}
+
+static bool
+vu_check_queue_msg_file(VuDev *dev, VhostUserMsg *vmsg)
+{
+ int index = vmsg->payload.u64 & VHOST_USER_VRING_IDX_MASK;
+
+ if (index >= VHOST_MAX_NR_VIRTQUEUE) {
+ vmsg_close_fds(vmsg);
+ vu_panic(dev, "Invalid queue index: %u", index);
+ return false;
+ }
+
+ if (vmsg->payload.u64 & VHOST_USER_VRING_NOFD_MASK ||
+ vmsg->fd_num != 1) {
+ vmsg_close_fds(vmsg);
+ vu_panic(dev, "Invalid fds in request: %d", vmsg->request);
+ return false;
+ }
+
+ return true;
+}
+
+static bool
+vu_set_vring_kick_exec(VuDev *dev, VhostUserMsg *vmsg)
+{
+ int index = vmsg->payload.u64 & VHOST_USER_VRING_IDX_MASK;
+
+ DPRINT("u64: 0x%016"PRIx64"\n", vmsg->payload.u64);
+
+ if (!vu_check_queue_msg_file(dev, vmsg)) {
+ return false;
+ }
+
+ if (dev->vq[index].kick_fd != -1) {
+ dev->remove_watch(dev, dev->vq[index].kick_fd);
+ close(dev->vq[index].kick_fd);
+ dev->vq[index].kick_fd = -1;
+ }
+
+ if (!(vmsg->payload.u64 & VHOST_USER_VRING_NOFD_MASK)) {
+ dev->vq[index].kick_fd = vmsg->fds[0];
+ DPRINT("Got kick_fd: %d for vq: %d\n", vmsg->fds[0], index);
+ }
+
+ dev->vq[index].started = true;
+ if (dev->iface->queue_set_started) {
+ dev->iface->queue_set_started(dev, index, true);
+ }
+
+ if (dev->vq[index].kick_fd != -1 && dev->vq[index].handler) {
+ dev->set_watch(dev, dev->vq[index].kick_fd, VU_WATCH_IN,
+ vu_kick_cb, (void *)(long)index);
+
+ DPRINT("Waiting for kicks on fd: %d for vq: %d\n",
+ dev->vq[index].kick_fd, index);
+ }
+
+ return false;
+}
+
+void vu_set_queue_handler(VuDev *dev, VuVirtq *vq,
+ vu_queue_handler_cb handler)
+{
+ int qidx = vq - dev->vq;
+
+ vq->handler = handler;
+ if (vq->kick_fd >= 0) {
+ if (handler) {
+ dev->set_watch(dev, vq->kick_fd, VU_WATCH_IN,
+ vu_kick_cb, (void *)(long)qidx);
+ } else {
+ dev->remove_watch(dev, vq->kick_fd);
+ }
+ }
+}
+
+static bool
+vu_set_vring_call_exec(VuDev *dev, VhostUserMsg *vmsg)
+{
+ int index = vmsg->payload.u64 & VHOST_USER_VRING_IDX_MASK;
+
+ DPRINT("u64: 0x%016"PRIx64"\n", vmsg->payload.u64);
+
+ if (!vu_check_queue_msg_file(dev, vmsg)) {
+ return false;
+ }
+
+ if (dev->vq[index].call_fd != -1) {
+ close(dev->vq[index].call_fd);
+ dev->vq[index].call_fd = -1;
+ }
+
+ if (!(vmsg->payload.u64 & VHOST_USER_VRING_NOFD_MASK)) {
+ dev->vq[index].call_fd = vmsg->fds[0];
+ }
+
+ DPRINT("Got call_fd: %d for vq: %d\n", vmsg->fds[0], index);
+
+ return false;
+}
+
+static bool
+vu_set_vring_err_exec(VuDev *dev, VhostUserMsg *vmsg)
+{
+ int index = vmsg->payload.u64 & VHOST_USER_VRING_IDX_MASK;
+
+ DPRINT("u64: 0x%016"PRIx64"\n", vmsg->payload.u64);
+
+ if (!vu_check_queue_msg_file(dev, vmsg)) {
+ return false;
+ }
+
+ if (dev->vq[index].err_fd != -1) {
+ close(dev->vq[index].err_fd);
+ dev->vq[index].err_fd = -1;
+ }
+
+ if (!(vmsg->payload.u64 & VHOST_USER_VRING_NOFD_MASK)) {
+ dev->vq[index].err_fd = vmsg->fds[0];
+ }
+
+ return false;
+}
+
+static bool
+vu_get_protocol_features_exec(VuDev *dev, VhostUserMsg *vmsg)
+{
+ uint64_t features = 1ULL << VHOST_USER_PROTOCOL_F_LOG_SHMFD;
+
+ if (dev->iface->get_protocol_features) {
+ features |= dev->iface->get_protocol_features(dev);
+ }
+
+ vmsg->payload.u64 = features;
+ vmsg->size = sizeof(vmsg->payload.u64);
+
+ return true;
+}
+
+static bool
+vu_set_protocol_features_exec(VuDev *dev, VhostUserMsg *vmsg)
+{
+ uint64_t features = vmsg->payload.u64;
+
+ DPRINT("u64: 0x%016"PRIx64"\n", features);
+
+ dev->protocol_features = vmsg->payload.u64;
+
+ if (dev->iface->set_protocol_features) {
+ dev->iface->set_protocol_features(dev, features);
+ }
+
+ return false;
+}
+
+static bool
+vu_get_queue_num_exec(VuDev *dev, VhostUserMsg *vmsg)
+{
+ DPRINT("Function %s() not implemented yet.\n", __func__);
+ return false;
+}
+
+static bool
+vu_set_vring_enable_exec(VuDev *dev, VhostUserMsg *vmsg)
+{
+ unsigned int index = vmsg->payload.state.index;
+ unsigned int enable = vmsg->payload.state.num;
+
+ DPRINT("State.index: %d\n", index);
+ DPRINT("State.enable: %d\n", enable);
+
+ if (index >= VHOST_MAX_NR_VIRTQUEUE) {
+ vu_panic(dev, "Invalid vring_enable index: %u", index);
+ return false;
+ }
+
+ dev->vq[index].enable = enable;
+ return false;
+}
+
+static bool
+vu_process_message(VuDev *dev, VhostUserMsg *vmsg)
+{
+ int do_reply = 0;
+
+ /* Print out generic part of the request. */
+ DPRINT("================ Vhost user message ================\n");
+ DPRINT("Request: %s (%d)\n", vu_request_to_string(vmsg->request),
+ vmsg->request);
+ DPRINT("Flags: 0x%x\n", vmsg->flags);
+ DPRINT("Size: %d\n", vmsg->size);
+
+ if (vmsg->fd_num) {
+ int i;
+ DPRINT("Fds:");
+ for (i = 0; i < vmsg->fd_num; i++) {
+ DPRINT(" %d", vmsg->fds[i]);
+ }
+ DPRINT("\n");
+ }
+
+ if (dev->iface->process_msg &&
+ dev->iface->process_msg(dev, vmsg, &do_reply)) {
+ return do_reply;
+ }
+
+ switch (vmsg->request) {
+ case VHOST_USER_GET_FEATURES:
+ return vu_get_features_exec(dev, vmsg);
+ case VHOST_USER_SET_FEATURES:
+ return vu_set_features_exec(dev, vmsg);
+ case VHOST_USER_GET_PROTOCOL_FEATURES:
+ return vu_get_protocol_features_exec(dev, vmsg);
+ case VHOST_USER_SET_PROTOCOL_FEATURES:
+ return vu_set_protocol_features_exec(dev, vmsg);
+ case VHOST_USER_SET_OWNER:
+ return vu_set_owner_exec(dev, vmsg);
+ case VHOST_USER_RESET_OWNER:
+ return vu_reset_device_exec(dev, vmsg);
+ case VHOST_USER_SET_MEM_TABLE:
+ return vu_set_mem_table_exec(dev, vmsg);
+ case VHOST_USER_SET_LOG_BASE:
+ return vu_set_log_base_exec(dev, vmsg);
+ case VHOST_USER_SET_LOG_FD:
+ return vu_set_log_fd_exec(dev, vmsg);
+ case VHOST_USER_SET_VRING_NUM:
+ return vu_set_vring_num_exec(dev, vmsg);
+ case VHOST_USER_SET_VRING_ADDR:
+ return vu_set_vring_addr_exec(dev, vmsg);
+ case VHOST_USER_SET_VRING_BASE:
+ return vu_set_vring_base_exec(dev, vmsg);
+ case VHOST_USER_GET_VRING_BASE:
+ return vu_get_vring_base_exec(dev, vmsg);
+ case VHOST_USER_SET_VRING_KICK:
+ return vu_set_vring_kick_exec(dev, vmsg);
+ case VHOST_USER_SET_VRING_CALL:
+ return vu_set_vring_call_exec(dev, vmsg);
+ case VHOST_USER_SET_VRING_ERR:
+ return vu_set_vring_err_exec(dev, vmsg);
+ case VHOST_USER_GET_QUEUE_NUM:
+ return vu_get_queue_num_exec(dev, vmsg);
+ case VHOST_USER_SET_VRING_ENABLE:
+ return vu_set_vring_enable_exec(dev, vmsg);
+ default:
+ vmsg_close_fds(vmsg);
+ vu_panic(dev, "Unhandled request: %d", vmsg->request);
+ }
+
+ return false;
+}
+
+bool
+vu_dispatch(VuDev *dev)
+{
+ VhostUserMsg vmsg = { 0, };
+ int reply_requested;
+ bool success = false;
+
+ if (!vu_message_read(dev, dev->sock, &vmsg)) {
+ goto end;
+ }
+
+ reply_requested = vu_process_message(dev, &vmsg);
+ if (!reply_requested) {
+ success = true;
+ goto end;
+ }
+
+ if (!vu_message_write(dev, dev->sock, &vmsg)) {
+ goto end;
+ }
+
+ success = true;
+
+end:
+ g_free(vmsg.data);
+ return success;
+}
+
+void
+vu_deinit(VuDev *dev)
+{
+ int i;
+
+ for (i = 0; i < dev->nregions; i++) {
+ VuDevRegion *r = &dev->regions[i];
+ void *m = (void *) (uintptr_t) r->mmap_addr;
+ if (m != MAP_FAILED) {
+ munmap(m, r->size + r->mmap_offset);
+ }
+ }
+ dev->nregions = 0;
+
+ for (i = 0; i < VHOST_MAX_NR_VIRTQUEUE; i++) {
+ VuVirtq *vq = &dev->vq[i];
+
+ if (vq->call_fd != -1) {
+ close(vq->call_fd);
+ vq->call_fd = -1;
+ }
+
+ if (vq->kick_fd != -1) {
+ close(vq->kick_fd);
+ vq->kick_fd = -1;
+ }
+
+ if (vq->err_fd != -1) {
+ close(vq->err_fd);
+ vq->err_fd = -1;
+ }
+ }
+
+
+ vu_close_log(dev);
+
+ if (dev->sock != -1) {
+ close(dev->sock);
+ }
+}
+
+void
+vu_init(VuDev *dev,
+ int socket,
+ vu_panic_cb panic,
+ vu_set_watch_cb set_watch,
+ vu_remove_watch_cb remove_watch,
+ const VuDevIface *iface)
+{
+ int i;
+
+ assert(socket >= 0);
+ assert(set_watch);
+ assert(remove_watch);
+ assert(iface);
+ assert(panic);
+
+ memset(dev, 0, sizeof(*dev));
+
+ dev->sock = socket;
+ dev->panic = panic;
+ dev->set_watch = set_watch;
+ dev->remove_watch = remove_watch;
+ dev->iface = iface;
+ dev->log_call_fd = -1;
+ for (i = 0; i < VHOST_MAX_NR_VIRTQUEUE; i++) {
+ dev->vq[i] = (VuVirtq) {
+ .call_fd = -1, .kick_fd = -1, .err_fd = -1,
+ .notification = true,
+ };
+ }
+}
+
+VuVirtq *
+vu_get_queue(VuDev *dev, int qidx)
+{
+ assert(qidx < VHOST_MAX_NR_VIRTQUEUE);
+ return &dev->vq[qidx];
+}
+
+bool
+vu_queue_enabled(VuDev *dev, VuVirtq *vq)
+{
+ return vq->enable;
+}
+
+static inline uint16_t
+vring_avail_flags(VuVirtq *vq)
+{
+ return vq->vring.avail->flags;
+}
+
+static inline uint16_t
+vring_avail_idx(VuVirtq *vq)
+{
+ vq->shadow_avail_idx = vq->vring.avail->idx;
+
+ return vq->shadow_avail_idx;
+}
+
+static inline uint16_t
+vring_avail_ring(VuVirtq *vq, int i)
+{
+ return vq->vring.avail->ring[i];
+}
+
+static inline uint16_t
+vring_get_used_event(VuVirtq *vq)
+{
+ return vring_avail_ring(vq, vq->vring.num);
+}
+
+static int
+virtqueue_num_heads(VuDev *dev, VuVirtq *vq, unsigned int idx)
+{
+ uint16_t num_heads = vring_avail_idx(vq) - idx;
+
+ /* Check it isn't doing very strange things with descriptor numbers. */
+ if (num_heads > vq->vring.num) {
+ vu_panic(dev, "Guest moved used index from %u to %u",
+ idx, vq->shadow_avail_idx);
+ return -1;
+ }
+ if (num_heads) {
+ /* On success, callers read a descriptor at vq->last_avail_idx.
+ * Make sure descriptor read does not bypass avail index read. */
+ smp_rmb();
+ }
+
+ return num_heads;
+}
+
+static bool
+virtqueue_get_head(VuDev *dev, VuVirtq *vq,
+ unsigned int idx, unsigned int *head)
+{
+ /* Grab the next descriptor number they're advertising, and increment
+ * the index we've seen. */
+ *head = vring_avail_ring(vq, idx % vq->vring.num);
+
+ /* If their number is silly, that's a fatal mistake. */
+ if (*head >= vq->vring.num) {
+ vu_panic(dev, "Guest says index %u is available", head);
+ return false;
+ }
+
+ return true;
+}
+
+enum {
+ VIRTQUEUE_READ_DESC_ERROR = -1,
+ VIRTQUEUE_READ_DESC_DONE = 0, /* end of chain */
+ VIRTQUEUE_READ_DESC_MORE = 1, /* more buffers in chain */
+};
+
+static int
+virtqueue_read_next_desc(VuDev *dev, struct vring_desc *desc,
+ int i, unsigned int max, unsigned int *next)
+{
+ /* If this descriptor says it doesn't chain, we're done. */
+ if (!(desc[i].flags & VRING_DESC_F_NEXT)) {
+ return VIRTQUEUE_READ_DESC_DONE;
+ }
+
+ /* Check they're not leading us off end of descriptors. */
+ *next = desc[i].next;
+ /* Make sure compiler knows to grab that: we don't want it changing! */
+ smp_wmb();
+
+ if (*next >= max) {
+ vu_panic(dev, "Desc next is %u", next);
+ return VIRTQUEUE_READ_DESC_ERROR;
+ }
+
+ return VIRTQUEUE_READ_DESC_MORE;
+}
+
+void
+vu_queue_get_avail_bytes(VuDev *dev, VuVirtq *vq, unsigned int *in_bytes,
+ unsigned int *out_bytes,
+ unsigned max_in_bytes, unsigned max_out_bytes)
+{
+ unsigned int idx;
+ unsigned int total_bufs, in_total, out_total;
+ int rc;
+
+ idx = vq->last_avail_idx;
+
+ total_bufs = in_total = out_total = 0;
+ while ((rc = virtqueue_num_heads(dev, vq, idx)) > 0) {
+ unsigned int max, num_bufs, indirect = 0;
+ struct vring_desc *desc;
+ unsigned int i;
+
+ max = vq->vring.num;
+ num_bufs = total_bufs;
+ if (!virtqueue_get_head(dev, vq, idx++, &i)) {
+ goto err;
+ }
+ desc = vq->vring.desc;
+
+ if (desc[i].flags & VRING_DESC_F_INDIRECT) {
+ if (desc[i].len % sizeof(struct vring_desc)) {
+ vu_panic(dev, "Invalid size for indirect buffer table");
+ goto err;
+ }
+
+ /* If we've got too many, that implies a descriptor loop. */
+ if (num_bufs >= max) {
+ vu_panic(dev, "Looped descriptor");
+ goto err;
+ }
+
+ /* loop over the indirect descriptor table */
+ indirect = 1;
+ max = desc[i].len / sizeof(struct vring_desc);
+ desc = vu_gpa_to_va(dev, desc[i].addr);
+ num_bufs = i = 0;
+ }
+
+ do {
+ /* If we've got too many, that implies a descriptor loop. */
+ if (++num_bufs > max) {
+ vu_panic(dev, "Looped descriptor");
+ goto err;
+ }
+
+ if (desc[i].flags & VRING_DESC_F_WRITE) {
+ in_total += desc[i].len;
+ } else {
+ out_total += desc[i].len;
+ }
+ if (in_total >= max_in_bytes && out_total >= max_out_bytes) {
+ goto done;
+ }
+ rc = virtqueue_read_next_desc(dev, desc, i, max, &i);
+ } while (rc == VIRTQUEUE_READ_DESC_MORE);
+
+ if (rc == VIRTQUEUE_READ_DESC_ERROR) {
+ goto err;
+ }
+
+ if (!indirect) {
+ total_bufs = num_bufs;
+ } else {
+ total_bufs++;
+ }
+ }
+ if (rc < 0) {
+ goto err;
+ }
+done:
+ if (in_bytes) {
+ *in_bytes = in_total;
+ }
+ if (out_bytes) {
+ *out_bytes = out_total;
+ }
+ return;
+
+err:
+ in_total = out_total = 0;
+ goto done;
+}
+
+bool
+vu_queue_avail_bytes(VuDev *dev, VuVirtq *vq, unsigned int in_bytes,
+ unsigned int out_bytes)
+{
+ unsigned int in_total, out_total;
+
+ vu_queue_get_avail_bytes(dev, vq, &in_total, &out_total,
+ in_bytes, out_bytes);
+
+ return in_bytes <= in_total && out_bytes <= out_total;
+}
+
+/* Fetch avail_idx from VQ memory only when we really need to know if
+ * guest has added some buffers. */
+int
+vu_queue_empty(VuDev *dev, VuVirtq *vq)
+{
+ if (vq->shadow_avail_idx != vq->last_avail_idx) {
+ return 0;
+ }
+
+ return vring_avail_idx(vq) == vq->last_avail_idx;
+}
+
+static inline
+bool has_feature(uint64_t features, unsigned int fbit)
+{
+ assert(fbit < 64);
+ return !!(features & (1ULL << fbit));
+}
+
+static inline
+bool vu_has_feature(VuDev *dev,
+ unsigned int fbit)
+{
+ return has_feature(dev->features, fbit);
+}
+
+static bool
+vring_notify(VuDev *dev, VuVirtq *vq)
+{
+ uint16_t old, new;
+ bool v;
+
+ /* We need to expose used array entries before checking used event. */
+ smp_mb();
+
+ /* Always notify when queue is empty (when feature acknowledge) */
+ if (vu_has_feature(dev, VIRTIO_F_NOTIFY_ON_EMPTY) &&
+ !vq->inuse && vu_queue_empty(dev, vq)) {
+ return true;
+ }
+
+ if (!vu_has_feature(dev, VIRTIO_RING_F_EVENT_IDX)) {
+ return !(vring_avail_flags(vq) & VRING_AVAIL_F_NO_INTERRUPT);
+ }
+
+ v = vq->signalled_used_valid;
+ vq->signalled_used_valid = true;
+ old = vq->signalled_used;
+ new = vq->signalled_used = vq->used_idx;
+ return !v || vring_need_event(vring_get_used_event(vq), new, old);
+}
+
+void
+vu_queue_notify(VuDev *dev, VuVirtq *vq)
+{
+ if (unlikely(dev->broken)) {
+ return;
+ }
+
+ if (!vring_notify(dev, vq)) {
+ DPRINT("skipped notify...\n");
+ return;
+ }
+
+ if (eventfd_write(vq->call_fd, 1) < 0) {
+ vu_panic(dev, "Error writing eventfd: %s", strerror(errno));
+ }
+}
+
+static inline void
+vring_used_flags_set_bit(VuVirtq *vq, int mask)
+{
+ uint16_t *flags;
+
+ flags = (uint16_t *)((char*)vq->vring.used +
+ offsetof(struct vring_used, flags));
+ *flags |= mask;
+}
+
+static inline void
+vring_used_flags_unset_bit(VuVirtq *vq, int mask)
+{
+ uint16_t *flags;
+
+ flags = (uint16_t *)((char*)vq->vring.used +
+ offsetof(struct vring_used, flags));
+ *flags &= ~mask;
+}
+
+static inline void
+vring_set_avail_event(VuVirtq *vq, uint16_t val)
+{
+ if (!vq->notification) {
+ return;
+ }
+
+ *((uint16_t *) &vq->vring.used->ring[vq->vring.num]) = val;
+}
+
+void
+vu_queue_set_notification(VuDev *dev, VuVirtq *vq, int enable)
+{
+ vq->notification = enable;
+ if (vu_has_feature(dev, VIRTIO_RING_F_EVENT_IDX)) {
+ vring_set_avail_event(vq, vring_avail_idx(vq));
+ } else if (enable) {
+ vring_used_flags_unset_bit(vq, VRING_USED_F_NO_NOTIFY);
+ } else {
+ vring_used_flags_set_bit(vq, VRING_USED_F_NO_NOTIFY);
+ }
+ if (enable) {
+ /* Expose avail event/used flags before caller checks the avail idx. */
+ smp_mb();
+ }
+}
+
+static void
+virtqueue_map_desc(VuDev *dev,
+ unsigned int *p_num_sg, struct iovec *iov,
+ unsigned int max_num_sg, bool is_write,
+ uint64_t pa, size_t sz)
+{
+ unsigned num_sg = *p_num_sg;
+
+ assert(num_sg <= max_num_sg);
+
+ if (!sz) {
+ vu_panic(dev, "virtio: zero sized buffers are not allowed");
+ return;
+ }
+
+ iov[num_sg].iov_base = vu_gpa_to_va(dev, pa);
+ iov[num_sg].iov_len = sz;
+ num_sg++;
+
+ *p_num_sg = num_sg;
+}
+
+/* Round number down to multiple */
+#define ALIGN_DOWN(n, m) ((n) / (m) * (m))
+
+/* Round number up to multiple */
+#define ALIGN_UP(n, m) ALIGN_DOWN((n) + (m) - 1, (m))
+
+static void *
+virtqueue_alloc_element(size_t sz,
+ unsigned out_num, unsigned in_num)
+{
+ VuVirtqElement *elem;
+ size_t in_sg_ofs = ALIGN_UP(sz, __alignof__(elem->in_sg[0]));
+ size_t out_sg_ofs = in_sg_ofs + in_num * sizeof(elem->in_sg[0]);
+ size_t out_sg_end = out_sg_ofs + out_num * sizeof(elem->out_sg[0]);
+
+ assert(sz >= sizeof(VuVirtqElement));
+ elem = malloc(out_sg_end);
+ elem->out_num = out_num;
+ elem->in_num = in_num;
+ elem->in_sg = (void *)elem + in_sg_ofs;
+ elem->out_sg = (void *)elem + out_sg_ofs;
+ return elem;
+}
+
+void *
+vu_queue_pop(VuDev *dev, VuVirtq *vq, size_t sz)
+{
+ unsigned int i, head, max;
+ VuVirtqElement *elem;
+ unsigned out_num, in_num;
+ struct iovec iov[VIRTQUEUE_MAX_SIZE];
+ struct vring_desc *desc;
+ int rc;
+
+ if (unlikely(dev->broken)) {
+ return NULL;
+ }
+
+ if (vu_queue_empty(dev, vq)) {
+ return NULL;
+ }
+ /* Needed after virtio_queue_empty(), see comment in
+ * virtqueue_num_heads(). */
+ smp_rmb();
+
+ /* When we start there are none of either input nor output. */
+ out_num = in_num = 0;
+
+ max = vq->vring.num;
+ if (vq->inuse >= vq->vring.num) {
+ vu_panic(dev, "Virtqueue size exceeded");
+ return NULL;
+ }
+
+ if (!virtqueue_get_head(dev, vq, vq->last_avail_idx++, &head)) {
+ return NULL;
+ }
+
+ if (vu_has_feature(dev, VIRTIO_RING_F_EVENT_IDX)) {
+ vring_set_avail_event(vq, vq->last_avail_idx);
+ }
+
+ i = head;
+ desc = vq->vring.desc;
+ if (desc[i].flags & VRING_DESC_F_INDIRECT) {
+ if (desc[i].len % sizeof(struct vring_desc)) {
+ vu_panic(dev, "Invalid size for indirect buffer table");
+ }
+
+ /* loop over the indirect descriptor table */
+ max = desc[i].len / sizeof(struct vring_desc);
+ desc = vu_gpa_to_va(dev, desc[i].addr);
+ i = 0;
+ }
+
+ /* Collect all the descriptors */
+ do {
+ if (desc[i].flags & VRING_DESC_F_WRITE) {
+ virtqueue_map_desc(dev, &in_num, iov + out_num,
+ VIRTQUEUE_MAX_SIZE - out_num, true,
+ desc[i].addr, desc[i].len);
+ } else {
+ if (in_num) {
+ vu_panic(dev, "Incorrect order for descriptors");
+ return NULL;
+ }
+ virtqueue_map_desc(dev, &out_num, iov,
+ VIRTQUEUE_MAX_SIZE, false,
+ desc[i].addr, desc[i].len);
+ }
+
+ /* If we've got too many, that implies a descriptor loop. */
+ if ((in_num + out_num) > max) {
+ vu_panic(dev, "Looped descriptor");
+ }
+ rc = virtqueue_read_next_desc(dev, desc, i, max, &i);
+ } while (rc == VIRTQUEUE_READ_DESC_MORE);
+
+ if (rc == VIRTQUEUE_READ_DESC_ERROR) {
+ return NULL;
+ }
+
+ /* Now copy what we have collected and mapped */
+ elem = virtqueue_alloc_element(sz, out_num, in_num);
+ elem->index = head;
+ for (i = 0; i < out_num; i++) {
+ elem->out_sg[i] = iov[i];
+ }
+ for (i = 0; i < in_num; i++) {
+ elem->in_sg[i] = iov[out_num + i];
+ }
+
+ vq->inuse++;
+
+ return elem;
+}
+
+bool
+vu_queue_rewind(VuDev *dev, VuVirtq *vq, unsigned int num)
+{
+ if (num > vq->inuse) {
+ return false;
+ }
+ vq->last_avail_idx -= num;
+ vq->inuse -= num;
+ return true;
+}
+
+static inline
+void vring_used_write(VuDev *dev, VuVirtq *vq,
+ struct vring_used_elem *uelem, int i)
+{
+ struct vring_used *used = vq->vring.used;
+
+ used->ring[i] = *uelem;
+ vu_log_write(dev, vq->vring.log_guest_addr +
+ offsetof(struct vring_used, ring[i]),
+ sizeof(used->ring[i]));
+}
+
+
+static void
+vu_log_queue_fill(VuDev *dev, VuVirtq *vq,
+ const VuVirtqElement *elem,
+ unsigned int len)
+{
+ struct vring_desc *desc = vq->vring.desc;
+ unsigned int i, max, min;
+ unsigned num_bufs = 0;
+
+ max = vq->vring.num;
+ i = elem->index;
+
+ if (desc[i].flags & VRING_DESC_F_INDIRECT) {
+ if (desc[i].len % sizeof(struct vring_desc)) {
+ vu_panic(dev, "Invalid size for indirect buffer table");
+ }
+
+ /* loop over the indirect descriptor table */
+ max = desc[i].len / sizeof(struct vring_desc);
+ desc = vu_gpa_to_va(dev, desc[i].addr);
+ i = 0;
+ }
+
+ do {
+ if (++num_bufs > max) {
+ vu_panic(dev, "Looped descriptor");
+ return;
+ }
+
+ if (desc[i].flags & VRING_DESC_F_WRITE) {
+ min = MIN(desc[i].len, len);
+ vu_log_write(dev, desc[i].addr, min);
+ len -= min;
+ }
+
+ } while (len > 0 &&
+ (virtqueue_read_next_desc(dev, desc, i, max, &i)
+ == VIRTQUEUE_READ_DESC_MORE));
+}
+
+void
+vu_queue_fill(VuDev *dev, VuVirtq *vq,
+ const VuVirtqElement *elem,
+ unsigned int len, unsigned int idx)
+{
+ struct vring_used_elem uelem;
+
+ if (unlikely(dev->broken)) {
+ return;
+ }
+
+ vu_log_queue_fill(dev, vq, elem, len);
+
+ idx = (idx + vq->used_idx) % vq->vring.num;
+
+ uelem.id = elem->index;
+ uelem.len = len;
+ vring_used_write(dev, vq, &uelem, idx);
+}
+
+static inline
+void vring_used_idx_set(VuDev *dev, VuVirtq *vq, uint16_t val)
+{
+ vq->vring.used->idx = val;
+ vu_log_write(dev,
+ vq->vring.log_guest_addr + offsetof(struct vring_used, idx),
+ sizeof(vq->vring.used->idx));
+
+ vq->used_idx = val;
+}
+
+void
+vu_queue_flush(VuDev *dev, VuVirtq *vq, unsigned int count)
+{
+ uint16_t old, new;
+
+ if (unlikely(dev->broken)) {
+ return;
+ }
+
+ /* Make sure buffer is written before we update index. */
+ smp_wmb();
+
+ old = vq->used_idx;
+ new = old + count;
+ vring_used_idx_set(dev, vq, new);
+ vq->inuse -= count;
+ if (unlikely((int16_t)(new - vq->signalled_used) < (uint16_t)(new - old))) {
+ vq->signalled_used_valid = false;
+ }
+}
+
+void
+vu_queue_push(VuDev *dev, VuVirtq *vq,
+ const VuVirtqElement *elem, unsigned int len)
+{
+ vu_queue_fill(dev, vq, elem, len, 0);
+ vu_queue_flush(dev, vq, 1);
+}
diff --git a/contrib/libvhost-user/libvhost-user.h b/contrib/libvhost-user/libvhost-user.h
new file mode 100644
index 0000000000..156b50e989
--- /dev/null
+++ b/contrib/libvhost-user/libvhost-user.h
@@ -0,0 +1,435 @@
+/*
+ * Vhost User library
+ *
+ * Copyright (c) 2016 Red Hat, Inc.
+ *
+ * Authors:
+ * Victor Kaplansky <victork@redhat.com>
+ * Marc-André Lureau <mlureau@redhat.com>
+ *
+ * This work is licensed under the terms of the GNU GPL, version 2 or
+ * later. See the COPYING file in the top-level directory.
+ */
+
+#ifndef LIBVHOST_USER_H
+#define LIBVHOST_USER_H
+
+#include <stdint.h>
+#include <stdbool.h>
+#include <stddef.h>
+#include <linux/vhost.h>
+#include "standard-headers/linux/virtio_ring.h"
+
+/* Based on qemu/hw/virtio/vhost-user.c */
+#define VHOST_USER_F_PROTOCOL_FEATURES 30
+#define VHOST_LOG_PAGE 4096
+
+#define VHOST_MAX_NR_VIRTQUEUE 8
+#define VIRTQUEUE_MAX_SIZE 1024
+
+#define VHOST_MEMORY_MAX_NREGIONS 8
+
+enum VhostUserProtocolFeature {
+ VHOST_USER_PROTOCOL_F_MQ = 0,
+ VHOST_USER_PROTOCOL_F_LOG_SHMFD = 1,
+ VHOST_USER_PROTOCOL_F_RARP = 2,
+
+ VHOST_USER_PROTOCOL_F_MAX
+};
+
+#define VHOST_USER_PROTOCOL_FEATURE_MASK ((1 << VHOST_USER_PROTOCOL_F_MAX) - 1)
+
+typedef enum VhostUserRequest {
+ VHOST_USER_NONE = 0,
+ VHOST_USER_GET_FEATURES = 1,
+ VHOST_USER_SET_FEATURES = 2,
+ VHOST_USER_SET_OWNER = 3,
+ VHOST_USER_RESET_OWNER = 4,
+ VHOST_USER_SET_MEM_TABLE = 5,
+ VHOST_USER_SET_LOG_BASE = 6,
+ VHOST_USER_SET_LOG_FD = 7,
+ VHOST_USER_SET_VRING_NUM = 8,
+ VHOST_USER_SET_VRING_ADDR = 9,
+ VHOST_USER_SET_VRING_BASE = 10,
+ VHOST_USER_GET_VRING_BASE = 11,
+ VHOST_USER_SET_VRING_KICK = 12,
+ VHOST_USER_SET_VRING_CALL = 13,
+ VHOST_USER_SET_VRING_ERR = 14,
+ VHOST_USER_GET_PROTOCOL_FEATURES = 15,
+ VHOST_USER_SET_PROTOCOL_FEATURES = 16,
+ VHOST_USER_GET_QUEUE_NUM = 17,
+ VHOST_USER_SET_VRING_ENABLE = 18,
+ VHOST_USER_SEND_RARP = 19,
+ VHOST_USER_INPUT_GET_CONFIG = 20,
+ VHOST_USER_MAX
+} VhostUserRequest;
+
+typedef struct VhostUserMemoryRegion {
+ uint64_t guest_phys_addr;
+ uint64_t memory_size;
+ uint64_t userspace_addr;
+ uint64_t mmap_offset;
+} VhostUserMemoryRegion;
+
+typedef struct VhostUserMemory {
+ uint32_t nregions;
+ uint32_t padding;
+ VhostUserMemoryRegion regions[VHOST_MEMORY_MAX_NREGIONS];
+} VhostUserMemory;
+
+typedef struct VhostUserLog {
+ uint64_t mmap_size;
+ uint64_t mmap_offset;
+} VhostUserLog;
+
+#if defined(_WIN32)
+# define VU_PACKED __attribute__((gcc_struct, packed))
+#else
+# define VU_PACKED __attribute__((packed))
+#endif
+
+typedef struct VhostUserMsg {
+ VhostUserRequest request;
+
+#define VHOST_USER_VERSION_MASK (0x3)
+#define VHOST_USER_REPLY_MASK (0x1 << 2)
+ uint32_t flags;
+ uint32_t size; /* the following payload size */
+
+ union {
+#define VHOST_USER_VRING_IDX_MASK (0xff)
+#define VHOST_USER_VRING_NOFD_MASK (0x1 << 8)
+ uint64_t u64;
+ struct vhost_vring_state state;
+ struct vhost_vring_addr addr;
+ VhostUserMemory memory;
+ VhostUserLog log;
+ } payload;
+
+ int fds[VHOST_MEMORY_MAX_NREGIONS];
+ int fd_num;
+ uint8_t *data;
+} VU_PACKED VhostUserMsg;
+
+typedef struct VuDevRegion {
+ /* Guest Physical address. */
+ uint64_t gpa;
+ /* Memory region size. */
+ uint64_t size;
+ /* QEMU virtual address (userspace). */
+ uint64_t qva;
+ /* Starting offset in our mmaped space. */
+ uint64_t mmap_offset;
+ /* Start address of mmaped space. */
+ uint64_t mmap_addr;
+} VuDevRegion;
+
+typedef struct VuDev VuDev;
+
+typedef uint64_t (*vu_get_features_cb) (VuDev *dev);
+typedef void (*vu_set_features_cb) (VuDev *dev, uint64_t features);
+typedef int (*vu_process_msg_cb) (VuDev *dev, VhostUserMsg *vmsg,
+ int *do_reply);
+typedef void (*vu_queue_set_started_cb) (VuDev *dev, int qidx, bool started);
+
+typedef struct VuDevIface {
+ /* called by VHOST_USER_GET_FEATURES to get the features bitmask */
+ vu_get_features_cb get_features;
+ /* enable vhost implementation features */
+ vu_set_features_cb set_features;
+ /* get the protocol feature bitmask from the underlying vhost
+ * implementation */
+ vu_get_features_cb get_protocol_features;
+ /* enable protocol features in the underlying vhost implementation. */
+ vu_set_features_cb set_protocol_features;
+ /* process_msg is called for each vhost-user message received */
+ /* skip libvhost-user processing if return value != 0 */
+ vu_process_msg_cb process_msg;
+ /* tells when queues can be processed */
+ vu_queue_set_started_cb queue_set_started;
+} VuDevIface;
+
+typedef void (*vu_queue_handler_cb) (VuDev *dev, int qidx);
+
+typedef struct VuRing {
+ unsigned int num;
+ struct vring_desc *desc;
+ struct vring_avail *avail;
+ struct vring_used *used;
+ uint64_t log_guest_addr;
+ uint32_t flags;
+} VuRing;
+
+typedef struct VuVirtq {
+ VuRing vring;
+
+ /* Next head to pop */
+ uint16_t last_avail_idx;
+
+ /* Last avail_idx read from VQ. */
+ uint16_t shadow_avail_idx;
+
+ uint16_t used_idx;
+
+ /* Last used index value we have signalled on */
+ uint16_t signalled_used;
+
+ /* Last used index value we have signalled on */
+ bool signalled_used_valid;
+
+ /* Notification enabled? */
+ bool notification;
+
+ int inuse;
+
+ vu_queue_handler_cb handler;
+
+ int call_fd;
+ int kick_fd;
+ int err_fd;
+ unsigned int enable;
+ bool started;
+} VuVirtq;
+
+enum VuWatchCondtion {
+ VU_WATCH_IN = 1 << 0,
+ VU_WATCH_OUT = 1 << 1,
+ VU_WATCH_PRI = 1 << 2,
+ VU_WATCH_ERR = 1 << 3,
+ VU_WATCH_HUP = 1 << 4,
+};
+
+typedef void (*vu_panic_cb) (VuDev *dev, const char *err);
+typedef void (*vu_watch_cb) (VuDev *dev, int condition, void *data);
+typedef void (*vu_set_watch_cb) (VuDev *dev, int fd, int condition,
+ vu_watch_cb cb, void *data);
+typedef void (*vu_remove_watch_cb) (VuDev *dev, int fd);
+
+struct VuDev {
+ int sock;
+ uint32_t nregions;
+ VuDevRegion regions[VHOST_MEMORY_MAX_NREGIONS];
+ VuVirtq vq[VHOST_MAX_NR_VIRTQUEUE];
+ int log_call_fd;
+ uint64_t log_size;
+ uint8_t *log_table;
+ uint64_t features;
+ uint64_t protocol_features;
+ bool broken;
+
+ /* @set_watch: add or update the given fd to the watch set,
+ * call cb when condition is met */
+ vu_set_watch_cb set_watch;
+
+ /* @remove_watch: remove the given fd from the watch set */
+ vu_remove_watch_cb remove_watch;
+
+ /* @panic: encountered an unrecoverable error, you may try to
+ * re-initialize */
+ vu_panic_cb panic;
+ const VuDevIface *iface;
+};
+
+typedef struct VuVirtqElement {
+ unsigned int index;
+ unsigned int out_num;
+ unsigned int in_num;
+ struct iovec *in_sg;
+ struct iovec *out_sg;
+} VuVirtqElement;
+
+/**
+ * vu_init:
+ * @dev: a VuDev context
+ * @socket: the socket connected to vhost-user master
+ * @panic: a panic callback
+ * @set_watch: a set_watch callback
+ * @remove_watch: a remove_watch callback
+ * @iface: a VuDevIface structure with vhost-user device callbacks
+ *
+ * Intializes a VuDev vhost-user context.
+ **/
+void vu_init(VuDev *dev,
+ int socket,
+ vu_panic_cb panic,
+ vu_set_watch_cb set_watch,
+ vu_remove_watch_cb remove_watch,
+ const VuDevIface *iface);
+
+
+/**
+ * vu_deinit:
+ * @dev: a VuDev context
+ *
+ * Cleans up the VuDev context
+ */
+void vu_deinit(VuDev *dev);
+
+/**
+ * vu_dispatch:
+ * @dev: a VuDev context
+ *
+ * Process one vhost-user message.
+ *
+ * Returns: TRUE on success, FALSE on failure.
+ */
+bool vu_dispatch(VuDev *dev);
+
+/**
+ * vu_gpa_to_va:
+ * @dev: a VuDev context
+ * @guest_addr: guest address
+ *
+ * Translate a guest address to a pointer. Returns NULL on failure.
+ */
+void *vu_gpa_to_va(VuDev *dev, uint64_t guest_addr);
+
+/**
+ * vu_get_queue:
+ * @dev: a VuDev context
+ * @qidx: queue index
+ *
+ * Returns the queue number @qidx.
+ */
+VuVirtq *vu_get_queue(VuDev *dev, int qidx);
+
+/**
+ * vu_set_queue_handler:
+ * @dev: a VuDev context
+ * @vq: a VuVirtq queue
+ * @handler: the queue handler callback
+ *
+ * Set the queue handler. This function may be called several times
+ * for the same queue. If called with NULL @handler, the handler is
+ * removed.
+ */
+void vu_set_queue_handler(VuDev *dev, VuVirtq *vq,
+ vu_queue_handler_cb handler);
+
+
+/**
+ * vu_queue_set_notification:
+ * @dev: a VuDev context
+ * @vq: a VuVirtq queue
+ * @enable: state
+ *
+ * Set whether the queue notifies (via event index or interrupt)
+ */
+void vu_queue_set_notification(VuDev *dev, VuVirtq *vq, int enable);
+
+/**
+ * vu_queue_enabled:
+ * @dev: a VuDev context
+ * @vq: a VuVirtq queue
+ *
+ * Returns: whether the queue is enabled.
+ */
+bool vu_queue_enabled(VuDev *dev, VuVirtq *vq);
+
+/**
+ * vu_queue_enabled:
+ * @dev: a VuDev context
+ * @vq: a VuVirtq queue
+ *
+ * Returns: whether the queue is empty.
+ */
+int vu_queue_empty(VuDev *dev, VuVirtq *vq);
+
+/**
+ * vu_queue_notify:
+ * @dev: a VuDev context
+ * @vq: a VuVirtq queue
+ *
+ * Request to notify the queue via callfd (skipped if unnecessary)
+ */
+void vu_queue_notify(VuDev *dev, VuVirtq *vq);
+
+/**
+ * vu_queue_pop:
+ * @dev: a VuDev context
+ * @vq: a VuVirtq queue
+ * @sz: the size of struct to return (must be >= VuVirtqElement)
+ *
+ * Returns: a VuVirtqElement filled from the queue or NULL.
+ */
+void *vu_queue_pop(VuDev *dev, VuVirtq *vq, size_t sz);
+
+/**
+ * vu_queue_rewind:
+ * @dev: a VuDev context
+ * @vq: a VuVirtq queue
+ * @num: number of elements to push back
+ *
+ * Pretend that elements weren't popped from the virtqueue. The next
+ * virtqueue_pop() will refetch the oldest element.
+ *
+ * Returns: true on success, false if @num is greater than the number of in use
+ * elements.
+ */
+bool vu_queue_rewind(VuDev *dev, VuVirtq *vq, unsigned int num);
+
+/**
+ * vu_queue_fill:
+ * @dev: a VuDev context
+ * @vq: a VuVirtq queue
+ * @elem: a VuVirtqElement
+ * @len: length in bytes to write
+ * @idx: optional offset for the used ring index (0 in general)
+ *
+ * Fill the used ring with @elem element.
+ */
+void vu_queue_fill(VuDev *dev, VuVirtq *vq,
+ const VuVirtqElement *elem,
+ unsigned int len, unsigned int idx);
+
+/**
+ * vu_queue_push:
+ * @dev: a VuDev context
+ * @vq: a VuVirtq queue
+ * @elem: a VuVirtqElement
+ * @len: length in bytes to write
+ *
+ * Helper that combines vu_queue_fill() with a vu_queue_flush().
+ */
+void vu_queue_push(VuDev *dev, VuVirtq *vq,
+ const VuVirtqElement *elem, unsigned int len);
+
+/**
+ * vu_queue_flush:
+ * @dev: a VuDev context
+ * @vq: a VuVirtq queue
+ * @num: number of elements to flush
+ *
+ * Mark the last number of elements as done (used.idx is updated by
+ * num elements).
+*/
+void vu_queue_flush(VuDev *dev, VuVirtq *vq, unsigned int num);
+
+/**
+ * vu_queue_get_avail_bytes:
+ * @dev: a VuDev context
+ * @vq: a VuVirtq queue
+ * @in_bytes: in bytes
+ * @out_bytes: out bytes
+ * @max_in_bytes: stop counting after max_in_bytes
+ * @max_out_bytes: stop counting after max_out_bytes
+ *
+ * Count the number of available bytes, up to max_in_bytes/max_out_bytes.
+ */
+void vu_queue_get_avail_bytes(VuDev *vdev, VuVirtq *vq, unsigned int *in_bytes,
+ unsigned int *out_bytes,
+ unsigned max_in_bytes, unsigned max_out_bytes);
+
+/**
+ * vu_queue_avail_bytes:
+ * @dev: a VuDev context
+ * @vq: a VuVirtq queue
+ * @in_bytes: expected in bytes
+ * @out_bytes: expected out bytes
+ *
+ * Returns: true if in_bytes <= in_total && out_bytes <= out_total
+ */
+bool vu_queue_avail_bytes(VuDev *dev, VuVirtq *vq, unsigned int in_bytes,
+ unsigned int out_bytes);
+
+#endif /* LIBVHOST_USER_H */
diff --git a/docs/pcie.txt b/docs/pcie.txt
index 9fb20aaed9..5bada24a15 100644
--- a/docs/pcie.txt
+++ b/docs/pcie.txt
@@ -110,18 +110,18 @@ Plug only PCI Express devices into PCI Express Ports.
-device ioh3420,id=root_port1,chassis=x,slot=y[,bus=pcie.0][,addr=z] \
-device <dev>,bus=root_port1
2.2.2 Using multi-function PCI Express Root Ports:
- -device ioh3420,id=root_port1,multifunction=on,chassis=x,slot=y[,bus=pcie.0][,addr=z.0] \
- -device ioh3420,id=root_port2,chassis=x1,slot=y1[,bus=pcie.0][,addr=z.1] \
- -device ioh3420,id=root_port3,chassis=x2,slot=y2[,bus=pcie.0][,addr=z.2] \
-2.2.2 Plugging a PCI Express device into a Switch:
+ -device ioh3420,id=root_port1,multifunction=on,chassis=x,addr=z.0[,slot=y][,bus=pcie.0] \
+ -device ioh3420,id=root_port2,chassis=x1,addr=z.1[,slot=y1][,bus=pcie.0] \
+ -device ioh3420,id=root_port3,chassis=x2,addr=z.2[,slot=y2][,bus=pcie.0] \
+2.2.3 Plugging a PCI Express device into a Switch:
-device ioh3420,id=root_port1,chassis=x,slot=y[,bus=pcie.0][,addr=z] \
-device x3130-upstream,id=upstream_port1,bus=root_port1[,addr=x] \
-device xio3130-downstream,id=downstream_port1,bus=upstream_port1,chassis=x1,slot=y1[,addr=z1]] \
-device <dev>,bus=downstream_port1
Notes:
- - (slot, chassis) pair is mandatory and must be
- unique for each PCI Express Root Port.
+ - (slot, chassis) pair is mandatory and must be unique for each
+ PCI Express Root Port. slot defaults to 0 when not specified.
- 'addr' parameter can be 0 for all the examples above.
diff --git a/docs/replay.txt b/docs/replay.txt
index 779c6c059e..347b2ff055 100644
--- a/docs/replay.txt
+++ b/docs/replay.txt
@@ -195,3 +195,17 @@ Queue is flushed at checkpoints and information about processed requests
is recorded to the log. In replay phase the queue is matched with
events read from the log. Therefore block devices requests are processed
deterministically.
+
+Network devices
+---------------
+
+Record and replay for network interactions is performed with the network filter.
+Each backend must have its own instance of the replay filter as follows:
+ -netdev user,id=net1 -device rtl8139,netdev=net1
+ -object filter-replay,id=replay,netdev=net1
+
+Replay network filter is used to record and replay network packets. While
+recording the virtual machine this filter puts all packets coming from
+the outer world into the log. In replay mode packets from the log are
+injected into the network device. All interactions with network backend
+in replay mode are disabled.
diff --git a/docs/specs/vhost-user.txt b/docs/specs/vhost-user.txt
index d70bd83b13..036890feb0 100644
--- a/docs/specs/vhost-user.txt
+++ b/docs/specs/vhost-user.txt
@@ -259,6 +259,7 @@ Protocol features
#define VHOST_USER_PROTOCOL_F_LOG_SHMFD 1
#define VHOST_USER_PROTOCOL_F_RARP 2
#define VHOST_USER_PROTOCOL_F_REPLY_ACK 3
+#define VHOST_USER_PROTOCOL_F_MTU 4
Message types
-------------
@@ -470,6 +471,21 @@ Message types
The first 6 bytes of the payload contain the mac address of the guest to
allow the vhost user backend to construct and broadcast the fake RARP.
+ * VHOST_USER_NET_SET_MTU
+
+ Id: 20
+ Equivalent ioctl: N/A
+ Master payload: u64
+
+ Set host MTU value exposed to the guest.
+ This request should be sent only when VIRTIO_NET_F_MTU feature has been
+ successfully negotiated, VHOST_USER_F_PROTOCOL_FEATURES is present in
+ VHOST_USER_GET_FEATURES and protocol feature bit
+ VHOST_USER_PROTOCOL_F_NET_MTU is present in
+ VHOST_USER_GET_PROTOCOL_FEATURES.
+ If VHOST_USER_PROTOCOL_F_REPLY_ACK is negotiated, slave must respond
+ with zero in case the specified MTU is valid, or non-zero otherwise.
+
VHOST_USER_PROTOCOL_F_REPLY_ACK:
-------------------------------
The original vhost-user specification only demands replies for certain
diff --git a/exec.c b/exec.c
index 8d4bb0e8c1..47835c1dc1 100644
--- a/exec.c
+++ b/exec.c
@@ -449,6 +449,39 @@ address_space_translate_internal(AddressSpaceDispatch *d, hwaddr addr, hwaddr *x
}
/* Called from RCU critical section */
+IOMMUTLBEntry address_space_get_iotlb_entry(AddressSpace *as, hwaddr addr,
+ bool is_write)
+{
+ IOMMUTLBEntry iotlb = {0};
+ MemoryRegionSection *section;
+ MemoryRegion *mr;
+
+ for (;;) {
+ AddressSpaceDispatch *d = atomic_rcu_read(&as->dispatch);
+ section = address_space_lookup_region(d, addr, false);
+ addr = addr - section->offset_within_address_space
+ + section->offset_within_region;
+ mr = section->mr;
+
+ if (!mr->iommu_ops) {
+ break;
+ }
+
+ iotlb = mr->iommu_ops->translate(mr, addr, is_write);
+ if (!(iotlb.perm & (1 << is_write))) {
+ iotlb.target_as = NULL;
+ break;
+ }
+
+ addr = ((iotlb.translated_addr & ~iotlb.addr_mask)
+ | (addr & iotlb.addr_mask));
+ as = iotlb.target_as;
+ }
+
+ return iotlb;
+}
+
+/* Called from RCU critical section */
MemoryRegion *address_space_translate(AddressSpace *as, hwaddr addr,
hwaddr *xlat, hwaddr *plen,
bool is_write)
diff --git a/hw/acpi/Makefile.objs b/hw/acpi/Makefile.objs
index 489e63bb75..834c63b980 100644
--- a/hw/acpi/Makefile.objs
+++ b/hw/acpi/Makefile.objs
@@ -1,7 +1,7 @@
common-obj-$(CONFIG_ACPI_X86) += core.o piix4.o pcihp.o
common-obj-$(CONFIG_ACPI_X86_ICH) += ich9.o tco.o
common-obj-$(CONFIG_ACPI_CPU_HOTPLUG) += cpu_hotplug.o
-common-obj-$(CONFIG_ACPI_MEMORY_HOTPLUG) += memory_hotplug.o memory_hotplug_acpi_table.o
+common-obj-$(CONFIG_ACPI_MEMORY_HOTPLUG) += memory_hotplug.o
common-obj-$(CONFIG_ACPI_CPU_HOTPLUG) += cpu.o
common-obj-$(CONFIG_ACPI_NVDIMM) += nvdimm.o
common-obj-$(CONFIG_ACPI) += acpi_interface.o
diff --git a/hw/acpi/ich9.c b/hw/acpi/ich9.c
index 830c475127..5c279bbaca 100644
--- a/hw/acpi/ich9.c
+++ b/hw/acpi/ich9.c
@@ -306,7 +306,8 @@ void ich9_pm_init(PCIDevice *lpc_pci, ICH9LPCPMRegs *pm,
if (pm->acpi_memory_hotplug.is_enabled) {
acpi_memory_hotplug_init(pci_address_space_io(lpc_pci), OBJECT(lpc_pci),
- &pm->acpi_memory_hotplug);
+ &pm->acpi_memory_hotplug,
+ ACPI_MEMORY_HOTPLUG_BASE);
}
}
diff --git a/hw/acpi/memory_hotplug.c b/hw/acpi/memory_hotplug.c
index ec4e64b361..210073d283 100644
--- a/hw/acpi/memory_hotplug.c
+++ b/hw/acpi/memory_hotplug.c
@@ -7,6 +7,34 @@
#include "trace.h"
#include "qapi-event.h"
+#define MEMORY_SLOTS_NUMBER "MDNR"
+#define MEMORY_HOTPLUG_IO_REGION "HPMR"
+#define MEMORY_SLOT_ADDR_LOW "MRBL"
+#define MEMORY_SLOT_ADDR_HIGH "MRBH"
+#define MEMORY_SLOT_SIZE_LOW "MRLL"
+#define MEMORY_SLOT_SIZE_HIGH "MRLH"
+#define MEMORY_SLOT_PROXIMITY "MPX"
+#define MEMORY_SLOT_ENABLED "MES"
+#define MEMORY_SLOT_INSERT_EVENT "MINS"
+#define MEMORY_SLOT_REMOVE_EVENT "MRMV"
+#define MEMORY_SLOT_EJECT "MEJ"
+#define MEMORY_SLOT_SLECTOR "MSEL"
+#define MEMORY_SLOT_OST_EVENT "MOEV"
+#define MEMORY_SLOT_OST_STATUS "MOSC"
+#define MEMORY_SLOT_LOCK "MLCK"
+#define MEMORY_SLOT_STATUS_METHOD "MRST"
+#define MEMORY_SLOT_CRS_METHOD "MCRS"
+#define MEMORY_SLOT_OST_METHOD "MOST"
+#define MEMORY_SLOT_PROXIMITY_METHOD "MPXM"
+#define MEMORY_SLOT_EJECT_METHOD "MEJ0"
+#define MEMORY_SLOT_NOTIFY_METHOD "MTFY"
+#define MEMORY_SLOT_SCAN_METHOD "MSCN"
+#define MEMORY_HOTPLUG_DEVICE "MHPD"
+#define MEMORY_HOTPLUG_IO_LEN 24
+#define MEMORY_DEVICES_CONTAINER "\\_SB.MHPC"
+
+static uint16_t memhp_io_base;
+
static ACPIOSTInfo *acpi_memory_device_status(int slot, MemStatus *mdev)
{
ACPIOSTInfo *info = g_new0(ACPIOSTInfo, 1);
@@ -178,7 +206,7 @@ static const MemoryRegionOps acpi_memory_hotplug_ops = {
};
void acpi_memory_hotplug_init(MemoryRegion *as, Object *owner,
- MemHotplugState *state)
+ MemHotplugState *state, uint16_t io_base)
{
MachineState *machine = MACHINE(qdev_get_machine());
@@ -187,10 +215,12 @@ void acpi_memory_hotplug_init(MemoryRegion *as, Object *owner,
return;
}
+ assert(!memhp_io_base);
+ memhp_io_base = io_base;
state->devs = g_malloc0(sizeof(*state->devs) * state->dev_count);
memory_region_init_io(&state->io, owner, &acpi_memory_hotplug_ops, state,
- "acpi-mem-hotplug", ACPI_MEMORY_HOTPLUG_IO_LEN);
- memory_region_add_subregion(as, ACPI_MEMORY_HOTPLUG_BASE, &state->io);
+ "acpi-mem-hotplug", MEMORY_HOTPLUG_IO_LEN);
+ memory_region_add_subregion(as, memhp_io_base, &state->io);
}
/**
@@ -306,3 +336,387 @@ const VMStateDescription vmstate_memory_hotplug = {
VMSTATE_END_OF_LIST()
}
};
+
+void build_memory_hotplug_aml(Aml *table, uint32_t nr_mem,
+ const char *res_root,
+ const char *event_handler_method)
+{
+ int i;
+ Aml *ifctx;
+ Aml *method;
+ Aml *dev_container;
+ Aml *mem_ctrl_dev;
+ char *mhp_res_path;
+
+ if (!memhp_io_base) {
+ return;
+ }
+
+ mhp_res_path = g_strdup_printf("%s." MEMORY_HOTPLUG_DEVICE, res_root);
+ mem_ctrl_dev = aml_device("%s", mhp_res_path);
+ {
+ Aml *crs;
+
+ aml_append(mem_ctrl_dev, aml_name_decl("_HID", aml_string("PNP0A06")));
+ aml_append(mem_ctrl_dev,
+ aml_name_decl("_UID", aml_string("Memory hotplug resources")));
+
+ crs = aml_resource_template();
+ aml_append(crs,
+ aml_io(AML_DECODE16, memhp_io_base, memhp_io_base, 0,
+ MEMORY_HOTPLUG_IO_LEN)
+ );
+ aml_append(mem_ctrl_dev, aml_name_decl("_CRS", crs));
+
+ aml_append(mem_ctrl_dev, aml_operation_region(
+ MEMORY_HOTPLUG_IO_REGION, AML_SYSTEM_IO,
+ aml_int(memhp_io_base), MEMORY_HOTPLUG_IO_LEN)
+ );
+
+ }
+ aml_append(table, mem_ctrl_dev);
+
+ dev_container = aml_device(MEMORY_DEVICES_CONTAINER);
+ {
+ Aml *field;
+ Aml *one = aml_int(1);
+ Aml *zero = aml_int(0);
+ Aml *ret_val = aml_local(0);
+ Aml *slot_arg0 = aml_arg(0);
+ Aml *slots_nr = aml_name(MEMORY_SLOTS_NUMBER);
+ Aml *ctrl_lock = aml_name(MEMORY_SLOT_LOCK);
+ Aml *slot_selector = aml_name(MEMORY_SLOT_SLECTOR);
+ char *mmio_path = g_strdup_printf("%s." MEMORY_HOTPLUG_IO_REGION,
+ mhp_res_path);
+
+ aml_append(dev_container, aml_name_decl("_HID", aml_string("PNP0A06")));
+ aml_append(dev_container,
+ aml_name_decl("_UID", aml_string("DIMM devices")));
+
+ assert(nr_mem <= ACPI_MAX_RAM_SLOTS);
+ aml_append(dev_container,
+ aml_name_decl(MEMORY_SLOTS_NUMBER, aml_int(nr_mem))
+ );
+
+ field = aml_field(mmio_path, AML_DWORD_ACC,
+ AML_NOLOCK, AML_PRESERVE);
+ aml_append(field, /* read only */
+ aml_named_field(MEMORY_SLOT_ADDR_LOW, 32));
+ aml_append(field, /* read only */
+ aml_named_field(MEMORY_SLOT_ADDR_HIGH, 32));
+ aml_append(field, /* read only */
+ aml_named_field(MEMORY_SLOT_SIZE_LOW, 32));
+ aml_append(field, /* read only */
+ aml_named_field(MEMORY_SLOT_SIZE_HIGH, 32));
+ aml_append(field, /* read only */
+ aml_named_field(MEMORY_SLOT_PROXIMITY, 32));
+ aml_append(dev_container, field);
+
+ field = aml_field(mmio_path, AML_BYTE_ACC,
+ AML_NOLOCK, AML_WRITE_AS_ZEROS);
+ aml_append(field, aml_reserved_field(160 /* bits, Offset(20) */));
+ aml_append(field, /* 1 if enabled, read only */
+ aml_named_field(MEMORY_SLOT_ENABLED, 1));
+ aml_append(field,
+ /*(read) 1 if has a insert event. (write) 1 to clear event */
+ aml_named_field(MEMORY_SLOT_INSERT_EVENT, 1));
+ aml_append(field,
+ /* (read) 1 if has a remove event. (write) 1 to clear event */
+ aml_named_field(MEMORY_SLOT_REMOVE_EVENT, 1));
+ aml_append(field,
+ /* initiates device eject, write only */
+ aml_named_field(MEMORY_SLOT_EJECT, 1));
+ aml_append(dev_container, field);
+
+ field = aml_field(mmio_path, AML_DWORD_ACC,
+ AML_NOLOCK, AML_PRESERVE);
+ aml_append(field, /* DIMM selector, write only */
+ aml_named_field(MEMORY_SLOT_SLECTOR, 32));
+ aml_append(field, /* _OST event code, write only */
+ aml_named_field(MEMORY_SLOT_OST_EVENT, 32));
+ aml_append(field, /* _OST status code, write only */
+ aml_named_field(MEMORY_SLOT_OST_STATUS, 32));
+ aml_append(dev_container, field);
+ g_free(mmio_path);
+
+ method = aml_method("_STA", 0, AML_NOTSERIALIZED);
+ ifctx = aml_if(aml_equal(slots_nr, zero));
+ {
+ aml_append(ifctx, aml_return(zero));
+ }
+ aml_append(method, ifctx);
+ /* present, functioning, decoding, not shown in UI */
+ aml_append(method, aml_return(aml_int(0xB)));
+ aml_append(dev_container, method);
+
+ aml_append(dev_container, aml_mutex(MEMORY_SLOT_LOCK, 0));
+
+ method = aml_method(MEMORY_SLOT_SCAN_METHOD, 0, AML_NOTSERIALIZED);
+ {
+ Aml *else_ctx;
+ Aml *while_ctx;
+ Aml *idx = aml_local(0);
+ Aml *eject_req = aml_int(3);
+ Aml *dev_chk = aml_int(1);
+
+ ifctx = aml_if(aml_equal(slots_nr, zero));
+ {
+ aml_append(ifctx, aml_return(zero));
+ }
+ aml_append(method, ifctx);
+
+ aml_append(method, aml_store(zero, idx));
+ aml_append(method, aml_acquire(ctrl_lock, 0xFFFF));
+ /* build AML that:
+ * loops over all slots and Notifies DIMMs with
+ * Device Check or Eject Request notifications if
+ * slot has corresponding status bit set and clears
+ * slot status.
+ */
+ while_ctx = aml_while(aml_lless(idx, slots_nr));
+ {
+ Aml *ins_evt = aml_name(MEMORY_SLOT_INSERT_EVENT);
+ Aml *rm_evt = aml_name(MEMORY_SLOT_REMOVE_EVENT);
+
+ aml_append(while_ctx, aml_store(idx, slot_selector));
+ ifctx = aml_if(aml_equal(ins_evt, one));
+ {
+ aml_append(ifctx,
+ aml_call2(MEMORY_SLOT_NOTIFY_METHOD,
+ idx, dev_chk));
+ aml_append(ifctx, aml_store(one, ins_evt));
+ }
+ aml_append(while_ctx, ifctx);
+
+ else_ctx = aml_else();
+ ifctx = aml_if(aml_equal(rm_evt, one));
+ {
+ aml_append(ifctx,
+ aml_call2(MEMORY_SLOT_NOTIFY_METHOD,
+ idx, eject_req));
+ aml_append(ifctx, aml_store(one, rm_evt));
+ }
+ aml_append(else_ctx, ifctx);
+ aml_append(while_ctx, else_ctx);
+
+ aml_append(while_ctx, aml_add(idx, one, idx));
+ }
+ aml_append(method, while_ctx);
+ aml_append(method, aml_release(ctrl_lock));
+ aml_append(method, aml_return(one));
+ }
+ aml_append(dev_container, method);
+
+ method = aml_method(MEMORY_SLOT_STATUS_METHOD, 1, AML_NOTSERIALIZED);
+ {
+ Aml *slot_enabled = aml_name(MEMORY_SLOT_ENABLED);
+
+ aml_append(method, aml_store(zero, ret_val));
+ aml_append(method, aml_acquire(ctrl_lock, 0xFFFF));
+ aml_append(method,
+ aml_store(aml_to_integer(slot_arg0), slot_selector));
+
+ ifctx = aml_if(aml_equal(slot_enabled, one));
+ {
+ aml_append(ifctx, aml_store(aml_int(0xF), ret_val));
+ }
+ aml_append(method, ifctx);
+
+ aml_append(method, aml_release(ctrl_lock));
+ aml_append(method, aml_return(ret_val));
+ }
+ aml_append(dev_container, method);
+
+ method = aml_method(MEMORY_SLOT_CRS_METHOD, 1, AML_SERIALIZED);
+ {
+ Aml *mr64 = aml_name("MR64");
+ Aml *mr32 = aml_name("MR32");
+ Aml *crs_tmpl = aml_resource_template();
+ Aml *minl = aml_name("MINL");
+ Aml *minh = aml_name("MINH");
+ Aml *maxl = aml_name("MAXL");
+ Aml *maxh = aml_name("MAXH");
+ Aml *lenl = aml_name("LENL");
+ Aml *lenh = aml_name("LENH");
+
+ aml_append(method, aml_acquire(ctrl_lock, 0xFFFF));
+ aml_append(method, aml_store(aml_to_integer(slot_arg0),
+ slot_selector));
+
+ aml_append(crs_tmpl,
+ aml_qword_memory(AML_POS_DECODE, AML_MIN_FIXED, AML_MAX_FIXED,
+ AML_CACHEABLE, AML_READ_WRITE,
+ 0, 0x0, 0xFFFFFFFFFFFFFFFEULL, 0,
+ 0xFFFFFFFFFFFFFFFFULL));
+ aml_append(method, aml_name_decl("MR64", crs_tmpl));
+ aml_append(method,
+ aml_create_dword_field(mr64, aml_int(14), "MINL"));
+ aml_append(method,
+ aml_create_dword_field(mr64, aml_int(18), "MINH"));
+ aml_append(method,
+ aml_create_dword_field(mr64, aml_int(38), "LENL"));
+ aml_append(method,
+ aml_create_dword_field(mr64, aml_int(42), "LENH"));
+ aml_append(method,
+ aml_create_dword_field(mr64, aml_int(22), "MAXL"));
+ aml_append(method,
+ aml_create_dword_field(mr64, aml_int(26), "MAXH"));
+
+ aml_append(method,
+ aml_store(aml_name(MEMORY_SLOT_ADDR_HIGH), minh));
+ aml_append(method,
+ aml_store(aml_name(MEMORY_SLOT_ADDR_LOW), minl));
+ aml_append(method,
+ aml_store(aml_name(MEMORY_SLOT_SIZE_HIGH), lenh));
+ aml_append(method,
+ aml_store(aml_name(MEMORY_SLOT_SIZE_LOW), lenl));
+
+ /* 64-bit math: MAX = MIN + LEN - 1 */
+ aml_append(method, aml_add(minl, lenl, maxl));
+ aml_append(method, aml_add(minh, lenh, maxh));
+ ifctx = aml_if(aml_lless(maxl, minl));
+ {
+ aml_append(ifctx, aml_add(maxh, one, maxh));
+ }
+ aml_append(method, ifctx);
+ ifctx = aml_if(aml_lless(maxl, one));
+ {
+ aml_append(ifctx, aml_subtract(maxh, one, maxh));
+ }
+ aml_append(method, ifctx);
+ aml_append(method, aml_subtract(maxl, one, maxl));
+
+ /* return 32-bit _CRS if addr/size is in low mem */
+ /* TODO: remove it since all hotplugged DIMMs are in high mem */
+ ifctx = aml_if(aml_equal(maxh, zero));
+ {
+ crs_tmpl = aml_resource_template();
+ aml_append(crs_tmpl,
+ aml_dword_memory(AML_POS_DECODE, AML_MIN_FIXED,
+ AML_MAX_FIXED, AML_CACHEABLE,
+ AML_READ_WRITE,
+ 0, 0x0, 0xFFFFFFFE, 0,
+ 0xFFFFFFFF));
+ aml_append(ifctx, aml_name_decl("MR32", crs_tmpl));
+ aml_append(ifctx,
+ aml_create_dword_field(mr32, aml_int(10), "MIN"));
+ aml_append(ifctx,
+ aml_create_dword_field(mr32, aml_int(14), "MAX"));
+ aml_append(ifctx,
+ aml_create_dword_field(mr32, aml_int(22), "LEN"));
+ aml_append(ifctx, aml_store(minl, aml_name("MIN")));
+ aml_append(ifctx, aml_store(maxl, aml_name("MAX")));
+ aml_append(ifctx, aml_store(lenl, aml_name("LEN")));
+
+ aml_append(ifctx, aml_release(ctrl_lock));
+ aml_append(ifctx, aml_return(mr32));
+ }
+ aml_append(method, ifctx);
+
+ aml_append(method, aml_release(ctrl_lock));
+ aml_append(method, aml_return(mr64));
+ }
+ aml_append(dev_container, method);
+
+ method = aml_method(MEMORY_SLOT_PROXIMITY_METHOD, 1,
+ AML_NOTSERIALIZED);
+ {
+ Aml *proximity = aml_name(MEMORY_SLOT_PROXIMITY);
+
+ aml_append(method, aml_acquire(ctrl_lock, 0xFFFF));
+ aml_append(method, aml_store(aml_to_integer(slot_arg0),
+ slot_selector));
+ aml_append(method, aml_store(proximity, ret_val));
+ aml_append(method, aml_release(ctrl_lock));
+ aml_append(method, aml_return(ret_val));
+ }
+ aml_append(dev_container, method);
+
+ method = aml_method(MEMORY_SLOT_OST_METHOD, 4, AML_NOTSERIALIZED);
+ {
+ Aml *ost_evt = aml_name(MEMORY_SLOT_OST_EVENT);
+ Aml *ost_status = aml_name(MEMORY_SLOT_OST_STATUS);
+
+ aml_append(method, aml_acquire(ctrl_lock, 0xFFFF));
+ aml_append(method, aml_store(aml_to_integer(slot_arg0),
+ slot_selector));
+ aml_append(method, aml_store(aml_arg(1), ost_evt));
+ aml_append(method, aml_store(aml_arg(2), ost_status));
+ aml_append(method, aml_release(ctrl_lock));
+ }
+ aml_append(dev_container, method);
+
+ method = aml_method(MEMORY_SLOT_EJECT_METHOD, 2, AML_NOTSERIALIZED);
+ {
+ Aml *eject = aml_name(MEMORY_SLOT_EJECT);
+
+ aml_append(method, aml_acquire(ctrl_lock, 0xFFFF));
+ aml_append(method, aml_store(aml_to_integer(slot_arg0),
+ slot_selector));
+ aml_append(method, aml_store(one, eject));
+ aml_append(method, aml_release(ctrl_lock));
+ }
+ aml_append(dev_container, method);
+
+ /* build memory devices */
+ for (i = 0; i < nr_mem; i++) {
+ Aml *dev;
+ const char *s;
+
+ dev = aml_device("MP%02X", i);
+ aml_append(dev, aml_name_decl("_UID", aml_string("0x%02X", i)));
+ aml_append(dev, aml_name_decl("_HID", aml_eisaid("PNP0C80")));
+
+ method = aml_method("_CRS", 0, AML_NOTSERIALIZED);
+ s = MEMORY_SLOT_CRS_METHOD;
+ aml_append(method, aml_return(aml_call1(s, aml_name("_UID"))));
+ aml_append(dev, method);
+
+ method = aml_method("_STA", 0, AML_NOTSERIALIZED);
+ s = MEMORY_SLOT_STATUS_METHOD;
+ aml_append(method, aml_return(aml_call1(s, aml_name("_UID"))));
+ aml_append(dev, method);
+
+ method = aml_method("_PXM", 0, AML_NOTSERIALIZED);
+ s = MEMORY_SLOT_PROXIMITY_METHOD;
+ aml_append(method, aml_return(aml_call1(s, aml_name("_UID"))));
+ aml_append(dev, method);
+
+ method = aml_method("_OST", 3, AML_NOTSERIALIZED);
+ s = MEMORY_SLOT_OST_METHOD;
+ aml_append(method, aml_return(aml_call4(
+ s, aml_name("_UID"), aml_arg(0), aml_arg(1), aml_arg(2)
+ )));
+ aml_append(dev, method);
+
+ method = aml_method("_EJ0", 1, AML_NOTSERIALIZED);
+ s = MEMORY_SLOT_EJECT_METHOD;
+ aml_append(method, aml_return(aml_call2(
+ s, aml_name("_UID"), aml_arg(0))));
+ aml_append(dev, method);
+
+ aml_append(dev_container, dev);
+ }
+
+ /* build Method(MEMORY_SLOT_NOTIFY_METHOD, 2) {
+ * If (LEqual(Arg0, 0x00)) {Notify(MP00, Arg1)} ... }
+ */
+ method = aml_method(MEMORY_SLOT_NOTIFY_METHOD, 2, AML_NOTSERIALIZED);
+ for (i = 0; i < nr_mem; i++) {
+ ifctx = aml_if(aml_equal(aml_arg(0), aml_int(i)));
+ aml_append(ifctx,
+ aml_notify(aml_name("MP%.02X", i), aml_arg(1))
+ );
+ aml_append(method, ifctx);
+ }
+ aml_append(dev_container, method);
+ }
+ aml_append(table, dev_container);
+
+ method = aml_method(event_handler_method, 0, AML_NOTSERIALIZED);
+ aml_append(method,
+ aml_call0(MEMORY_DEVICES_CONTAINER "." MEMORY_SLOT_SCAN_METHOD));
+ aml_append(table, method);
+
+ g_free(mhp_res_path);
+}
diff --git a/hw/acpi/memory_hotplug_acpi_table.c b/hw/acpi/memory_hotplug_acpi_table.c
deleted file mode 100644
index c75660215d..0000000000
--- a/hw/acpi/memory_hotplug_acpi_table.c
+++ /dev/null
@@ -1,262 +0,0 @@
-/*
- * Memory hotplug AML code of DSDT ACPI table
- *
- * Copyright (C) 2015 Red Hat Inc
- *
- * Author: Igor Mammedov <imammedo@redhat.com>
- *
- * This work is licensed under the terms of the GNU GPL, version 2 or later.
- * See the COPYING file in the top-level directory.
- */
-
-#include "qemu/osdep.h"
-#include "hw/acpi/memory_hotplug.h"
-#include "include/hw/acpi/pc-hotplug.h"
-#include "hw/boards.h"
-
-void build_memory_hotplug_aml(Aml *ctx, uint32_t nr_mem,
- uint16_t io_base, uint16_t io_len)
-{
- Aml *ifctx;
- Aml *method;
- Aml *pci_scope;
- Aml *mem_ctrl_dev;
-
- /* scope for memory hotplug controller device node */
- pci_scope = aml_scope("_SB.PCI0");
- mem_ctrl_dev = aml_device(MEMORY_HOTPLUG_DEVICE);
- {
- Aml *one = aml_int(1);
- Aml *zero = aml_int(0);
- Aml *ret_val = aml_local(0);
- Aml *slot_arg0 = aml_arg(0);
- Aml *slots_nr = aml_name(MEMORY_SLOTS_NUMBER);
- Aml *ctrl_lock = aml_name(MEMORY_SLOT_LOCK);
- Aml *slot_selector = aml_name(MEMORY_SLOT_SLECTOR);
-
- aml_append(mem_ctrl_dev, aml_name_decl("_HID", aml_string("PNP0A06")));
- aml_append(mem_ctrl_dev,
- aml_name_decl("_UID", aml_string("Memory hotplug resources")));
-
- method = aml_method("_STA", 0, AML_NOTSERIALIZED);
- ifctx = aml_if(aml_equal(slots_nr, zero));
- {
- aml_append(ifctx, aml_return(zero));
- }
- aml_append(method, ifctx);
- /* present, functioning, decoding, not shown in UI */
- aml_append(method, aml_return(aml_int(0xB)));
- aml_append(mem_ctrl_dev, method);
-
- aml_append(mem_ctrl_dev, aml_mutex(MEMORY_SLOT_LOCK, 0));
-
- method = aml_method(MEMORY_SLOT_SCAN_METHOD, 0, AML_NOTSERIALIZED);
- {
- Aml *else_ctx;
- Aml *while_ctx;
- Aml *idx = aml_local(0);
- Aml *eject_req = aml_int(3);
- Aml *dev_chk = aml_int(1);
-
- ifctx = aml_if(aml_equal(slots_nr, zero));
- {
- aml_append(ifctx, aml_return(zero));
- }
- aml_append(method, ifctx);
-
- aml_append(method, aml_store(zero, idx));
- aml_append(method, aml_acquire(ctrl_lock, 0xFFFF));
- /* build AML that:
- * loops over all slots and Notifies DIMMs with
- * Device Check or Eject Request notifications if
- * slot has corresponding status bit set and clears
- * slot status.
- */
- while_ctx = aml_while(aml_lless(idx, slots_nr));
- {
- Aml *ins_evt = aml_name(MEMORY_SLOT_INSERT_EVENT);
- Aml *rm_evt = aml_name(MEMORY_SLOT_REMOVE_EVENT);
-
- aml_append(while_ctx, aml_store(idx, slot_selector));
- ifctx = aml_if(aml_equal(ins_evt, one));
- {
- aml_append(ifctx,
- aml_call2(MEMORY_SLOT_NOTIFY_METHOD,
- idx, dev_chk));
- aml_append(ifctx, aml_store(one, ins_evt));
- }
- aml_append(while_ctx, ifctx);
-
- else_ctx = aml_else();
- ifctx = aml_if(aml_equal(rm_evt, one));
- {
- aml_append(ifctx,
- aml_call2(MEMORY_SLOT_NOTIFY_METHOD,
- idx, eject_req));
- aml_append(ifctx, aml_store(one, rm_evt));
- }
- aml_append(else_ctx, ifctx);
- aml_append(while_ctx, else_ctx);
-
- aml_append(while_ctx, aml_add(idx, one, idx));
- }
- aml_append(method, while_ctx);
- aml_append(method, aml_release(ctrl_lock));
- aml_append(method, aml_return(one));
- }
- aml_append(mem_ctrl_dev, method);
-
- method = aml_method(MEMORY_SLOT_STATUS_METHOD, 1, AML_NOTSERIALIZED);
- {
- Aml *slot_enabled = aml_name(MEMORY_SLOT_ENABLED);
-
- aml_append(method, aml_store(zero, ret_val));
- aml_append(method, aml_acquire(ctrl_lock, 0xFFFF));
- aml_append(method,
- aml_store(aml_to_integer(slot_arg0), slot_selector));
-
- ifctx = aml_if(aml_equal(slot_enabled, one));
- {
- aml_append(ifctx, aml_store(aml_int(0xF), ret_val));
- }
- aml_append(method, ifctx);
-
- aml_append(method, aml_release(ctrl_lock));
- aml_append(method, aml_return(ret_val));
- }
- aml_append(mem_ctrl_dev, method);
-
- method = aml_method(MEMORY_SLOT_CRS_METHOD, 1, AML_SERIALIZED);
- {
- Aml *mr64 = aml_name("MR64");
- Aml *mr32 = aml_name("MR32");
- Aml *crs_tmpl = aml_resource_template();
- Aml *minl = aml_name("MINL");
- Aml *minh = aml_name("MINH");
- Aml *maxl = aml_name("MAXL");
- Aml *maxh = aml_name("MAXH");
- Aml *lenl = aml_name("LENL");
- Aml *lenh = aml_name("LENH");
-
- aml_append(method, aml_acquire(ctrl_lock, 0xFFFF));
- aml_append(method, aml_store(aml_to_integer(slot_arg0),
- slot_selector));
-
- aml_append(crs_tmpl,
- aml_qword_memory(AML_POS_DECODE, AML_MIN_FIXED, AML_MAX_FIXED,
- AML_CACHEABLE, AML_READ_WRITE,
- 0, 0x0, 0xFFFFFFFFFFFFFFFEULL, 0,
- 0xFFFFFFFFFFFFFFFFULL));
- aml_append(method, aml_name_decl("MR64", crs_tmpl));
- aml_append(method,
- aml_create_dword_field(mr64, aml_int(14), "MINL"));
- aml_append(method,
- aml_create_dword_field(mr64, aml_int(18), "MINH"));
- aml_append(method,
- aml_create_dword_field(mr64, aml_int(38), "LENL"));
- aml_append(method,
- aml_create_dword_field(mr64, aml_int(42), "LENH"));
- aml_append(method,
- aml_create_dword_field(mr64, aml_int(22), "MAXL"));
- aml_append(method,
- aml_create_dword_field(mr64, aml_int(26), "MAXH"));
-
- aml_append(method,
- aml_store(aml_name(MEMORY_SLOT_ADDR_HIGH), minh));
- aml_append(method,
- aml_store(aml_name(MEMORY_SLOT_ADDR_LOW), minl));
- aml_append(method,
- aml_store(aml_name(MEMORY_SLOT_SIZE_HIGH), lenh));
- aml_append(method,
- aml_store(aml_name(MEMORY_SLOT_SIZE_LOW), lenl));
-
- /* 64-bit math: MAX = MIN + LEN - 1 */
- aml_append(method, aml_add(minl, lenl, maxl));
- aml_append(method, aml_add(minh, lenh, maxh));
- ifctx = aml_if(aml_lless(maxl, minl));
- {
- aml_append(ifctx, aml_add(maxh, one, maxh));
- }
- aml_append(method, ifctx);
- ifctx = aml_if(aml_lless(maxl, one));
- {
- aml_append(ifctx, aml_subtract(maxh, one, maxh));
- }
- aml_append(method, ifctx);
- aml_append(method, aml_subtract(maxl, one, maxl));
-
- /* return 32-bit _CRS if addr/size is in low mem */
- /* TODO: remove it since all hotplugged DIMMs are in high mem */
- ifctx = aml_if(aml_equal(maxh, zero));
- {
- crs_tmpl = aml_resource_template();
- aml_append(crs_tmpl,
- aml_dword_memory(AML_POS_DECODE, AML_MIN_FIXED,
- AML_MAX_FIXED, AML_CACHEABLE,
- AML_READ_WRITE,
- 0, 0x0, 0xFFFFFFFE, 0,
- 0xFFFFFFFF));
- aml_append(ifctx, aml_name_decl("MR32", crs_tmpl));
- aml_append(ifctx,
- aml_create_dword_field(mr32, aml_int(10), "MIN"));
- aml_append(ifctx,
- aml_create_dword_field(mr32, aml_int(14), "MAX"));
- aml_append(ifctx,
- aml_create_dword_field(mr32, aml_int(22), "LEN"));
- aml_append(ifctx, aml_store(minl, aml_name("MIN")));
- aml_append(ifctx, aml_store(maxl, aml_name("MAX")));
- aml_append(ifctx, aml_store(lenl, aml_name("LEN")));
-
- aml_append(ifctx, aml_release(ctrl_lock));
- aml_append(ifctx, aml_return(mr32));
- }
- aml_append(method, ifctx);
-
- aml_append(method, aml_release(ctrl_lock));
- aml_append(method, aml_return(mr64));
- }
- aml_append(mem_ctrl_dev, method);
-
- method = aml_method(MEMORY_SLOT_PROXIMITY_METHOD, 1,
- AML_NOTSERIALIZED);
- {
- Aml *proximity = aml_name(MEMORY_SLOT_PROXIMITY);
-
- aml_append(method, aml_acquire(ctrl_lock, 0xFFFF));
- aml_append(method, aml_store(aml_to_integer(slot_arg0),
- slot_selector));
- aml_append(method, aml_store(proximity, ret_val));
- aml_append(method, aml_release(ctrl_lock));
- aml_append(method, aml_return(ret_val));
- }
- aml_append(mem_ctrl_dev, method);
-
- method = aml_method(MEMORY_SLOT_OST_METHOD, 4, AML_NOTSERIALIZED);
- {
- Aml *ost_evt = aml_name(MEMORY_SLOT_OST_EVENT);
- Aml *ost_status = aml_name(MEMORY_SLOT_OST_STATUS);
-
- aml_append(method, aml_acquire(ctrl_lock, 0xFFFF));
- aml_append(method, aml_store(aml_to_integer(slot_arg0),
- slot_selector));
- aml_append(method, aml_store(aml_arg(1), ost_evt));
- aml_append(method, aml_store(aml_arg(2), ost_status));
- aml_append(method, aml_release(ctrl_lock));
- }
- aml_append(mem_ctrl_dev, method);
-
- method = aml_method(MEMORY_SLOT_EJECT_METHOD, 2, AML_NOTSERIALIZED);
- {
- Aml *eject = aml_name(MEMORY_SLOT_EJECT);
-
- aml_append(method, aml_acquire(ctrl_lock, 0xFFFF));
- aml_append(method, aml_store(aml_to_integer(slot_arg0),
- slot_selector));
- aml_append(method, aml_store(one, eject));
- aml_append(method, aml_release(ctrl_lock));
- }
- aml_append(mem_ctrl_dev, method);
- }
- aml_append(pci_scope, mem_ctrl_dev);
- aml_append(ctx, pci_scope);
-}
diff --git a/hw/acpi/piix4.c b/hw/acpi/piix4.c
index 17d36bd595..6d99fe407c 100644
--- a/hw/acpi/piix4.c
+++ b/hw/acpi/piix4.c
@@ -644,7 +644,8 @@ static void piix4_acpi_system_hot_add_init(MemoryRegion *parent,
PIIX4_CPU_HOTPLUG_IO_BASE);
if (s->acpi_memory_hotplug.is_enabled) {
- acpi_memory_hotplug_init(parent, OBJECT(s), &s->acpi_memory_hotplug);
+ acpi_memory_hotplug_init(parent, OBJECT(s), &s->acpi_memory_hotplug,
+ ACPI_MEMORY_HOTPLUG_BASE);
}
}
diff --git a/hw/arm/pxa2xx.c b/hw/arm/pxa2xx.c
index bdcf6bcce7..d31b4577f0 100644
--- a/hw/arm/pxa2xx.c
+++ b/hw/arm/pxa2xx.c
@@ -1258,7 +1258,7 @@ static void pxa2xx_i2c_update(PXA2xxI2CState *s)
}
/* These are only stubs now. */
-static void pxa2xx_i2c_event(I2CSlave *i2c, enum i2c_event event)
+static int pxa2xx_i2c_event(I2CSlave *i2c, enum i2c_event event)
{
PXA2xxI2CSlaveState *slave = PXA2XX_I2C_SLAVE(i2c);
PXA2xxI2CState *s = slave->host;
@@ -1280,6 +1280,8 @@ static void pxa2xx_i2c_event(I2CSlave *i2c, enum i2c_event event)
break;
}
pxa2xx_i2c_update(s);
+
+ return 0;
}
static int pxa2xx_i2c_rx(I2CSlave *i2c)
diff --git a/hw/arm/tosa.c b/hw/arm/tosa.c
index 39d9dbbae6..c3db996930 100644
--- a/hw/arm/tosa.c
+++ b/hw/arm/tosa.c
@@ -172,7 +172,7 @@ static int tosa_dac_send(I2CSlave *i2c, uint8_t data)
return 0;
}
-static void tosa_dac_event(I2CSlave *i2c, enum i2c_event event)
+static int tosa_dac_event(I2CSlave *i2c, enum i2c_event event)
{
TosaDACState *s = TOSA_DAC(i2c);
@@ -194,6 +194,8 @@ static void tosa_dac_event(I2CSlave *i2c, enum i2c_event event)
default:
break;
}
+
+ return 0;
}
static int tosa_dac_recv(I2CSlave *s)
diff --git a/hw/arm/virt-acpi-build.c b/hw/arm/virt-acpi-build.c
index 7102686882..085a611173 100644
--- a/hw/arm/virt-acpi-build.c
+++ b/hw/arm/virt-acpi-build.c
@@ -29,7 +29,6 @@
#include "qemu/osdep.h"
#include "qapi/error.h"
#include "qemu-common.h"
-#include "hw/arm/virt-acpi-build.h"
#include "qemu/bitmap.h"
#include "trace.h"
#include "qom/cpu.h"
@@ -43,6 +42,7 @@
#include "hw/acpi/aml-build.h"
#include "hw/pci/pcie_host.h"
#include "hw/pci/pci.h"
+#include "hw/arm/virt.h"
#include "sysemu/numa.h"
#include "kvm_arm.h"
@@ -384,7 +384,7 @@ build_rsdp(GArray *rsdp_table, BIOSLinker *linker, unsigned rsdt_tbl_offset)
}
static void
-build_iort(GArray *table_data, BIOSLinker *linker, VirtGuestInfo *guest_info)
+build_iort(GArray *table_data, BIOSLinker *linker)
{
int iort_start = table_data->len;
AcpiIortIdMapping *idmap;
@@ -439,11 +439,11 @@ build_iort(GArray *table_data, BIOSLinker *linker, VirtGuestInfo *guest_info)
}
static void
-build_spcr(GArray *table_data, BIOSLinker *linker, VirtGuestInfo *guest_info)
+build_spcr(GArray *table_data, BIOSLinker *linker, VirtMachineState *vms)
{
AcpiSerialPortConsoleRedirection *spcr;
- const MemMapEntry *uart_memmap = &guest_info->memmap[VIRT_UART];
- int irq = guest_info->irqmap[VIRT_UART] + ARM_SPI_BASE;
+ const MemMapEntry *uart_memmap = &vms->memmap[VIRT_UART];
+ int irq = vms->irqmap[VIRT_UART] + ARM_SPI_BASE;
spcr = acpi_data_push(table_data, sizeof(*spcr));
@@ -472,16 +472,16 @@ build_spcr(GArray *table_data, BIOSLinker *linker, VirtGuestInfo *guest_info)
}
static void
-build_srat(GArray *table_data, BIOSLinker *linker, VirtGuestInfo *guest_info)
+build_srat(GArray *table_data, BIOSLinker *linker, VirtMachineState *vms)
{
AcpiSystemResourceAffinityTable *srat;
AcpiSratProcessorGiccAffinity *core;
AcpiSratMemoryAffinity *numamem;
int i, j, srat_start;
uint64_t mem_base;
- uint32_t *cpu_node = g_malloc0(guest_info->smp_cpus * sizeof(uint32_t));
+ uint32_t *cpu_node = g_malloc0(vms->smp_cpus * sizeof(uint32_t));
- for (i = 0; i < guest_info->smp_cpus; i++) {
+ for (i = 0; i < vms->smp_cpus; i++) {
j = numa_get_node_for_cpu(i);
if (j < nb_numa_nodes) {
cpu_node[i] = j;
@@ -492,7 +492,7 @@ build_srat(GArray *table_data, BIOSLinker *linker, VirtGuestInfo *guest_info)
srat = acpi_data_push(table_data, sizeof(*srat));
srat->reserved1 = cpu_to_le32(1);
- for (i = 0; i < guest_info->smp_cpus; ++i) {
+ for (i = 0; i < vms->smp_cpus; ++i) {
core = acpi_data_push(table_data, sizeof(*core));
core->type = ACPI_SRAT_PROCESSOR_GICC;
core->length = sizeof(*core);
@@ -502,7 +502,7 @@ build_srat(GArray *table_data, BIOSLinker *linker, VirtGuestInfo *guest_info)
}
g_free(cpu_node);
- mem_base = guest_info->memmap[VIRT_MEM].base;
+ mem_base = vms->memmap[VIRT_MEM].base;
for (i = 0; i < nb_numa_nodes; ++i) {
numamem = acpi_data_push(table_data, sizeof(*numamem));
build_srat_memory(numamem, mem_base, numa_info[i].node_mem, i,
@@ -515,10 +515,10 @@ build_srat(GArray *table_data, BIOSLinker *linker, VirtGuestInfo *guest_info)
}
static void
-build_mcfg(GArray *table_data, BIOSLinker *linker, VirtGuestInfo *guest_info)
+build_mcfg(GArray *table_data, BIOSLinker *linker, VirtMachineState *vms)
{
AcpiTableMcfg *mcfg;
- const MemMapEntry *memmap = guest_info->memmap;
+ const MemMapEntry *memmap = vms->memmap;
int len = sizeof(*mcfg) + sizeof(mcfg->allocation[0]);
mcfg = acpi_data_push(table_data, len);
@@ -535,24 +535,33 @@ build_mcfg(GArray *table_data, BIOSLinker *linker, VirtGuestInfo *guest_info)
/* GTDT */
static void
-build_gtdt(GArray *table_data, BIOSLinker *linker)
+build_gtdt(GArray *table_data, BIOSLinker *linker, VirtMachineState *vms)
{
+ VirtMachineClass *vmc = VIRT_MACHINE_GET_CLASS(vms);
int gtdt_start = table_data->len;
AcpiGenericTimerTable *gtdt;
+ uint32_t irqflags;
+
+ if (vmc->claim_edge_triggered_timers) {
+ irqflags = ACPI_GTDT_INTERRUPT_MODE_EDGE;
+ } else {
+ irqflags = ACPI_GTDT_INTERRUPT_MODE_LEVEL;
+ }
gtdt = acpi_data_push(table_data, sizeof *gtdt);
/* The interrupt values are the same with the device tree when adding 16 */
- gtdt->secure_el1_interrupt = ARCH_TIMER_S_EL1_IRQ + 16;
- gtdt->secure_el1_flags = ACPI_EDGE_SENSITIVE;
+ gtdt->secure_el1_interrupt = cpu_to_le32(ARCH_TIMER_S_EL1_IRQ + 16);
+ gtdt->secure_el1_flags = cpu_to_le32(irqflags);
- gtdt->non_secure_el1_interrupt = ARCH_TIMER_NS_EL1_IRQ + 16;
- gtdt->non_secure_el1_flags = ACPI_EDGE_SENSITIVE | ACPI_GTDT_ALWAYS_ON;
+ gtdt->non_secure_el1_interrupt = cpu_to_le32(ARCH_TIMER_NS_EL1_IRQ + 16);
+ gtdt->non_secure_el1_flags = cpu_to_le32(irqflags |
+ ACPI_GTDT_CAP_ALWAYS_ON);
- gtdt->virtual_timer_interrupt = ARCH_TIMER_VIRT_IRQ + 16;
- gtdt->virtual_timer_flags = ACPI_EDGE_SENSITIVE;
+ gtdt->virtual_timer_interrupt = cpu_to_le32(ARCH_TIMER_VIRT_IRQ + 16);
+ gtdt->virtual_timer_flags = cpu_to_le32(irqflags);
- gtdt->non_secure_el2_interrupt = ARCH_TIMER_NS_EL2_IRQ + 16;
- gtdt->non_secure_el2_flags = ACPI_EDGE_SENSITIVE;
+ gtdt->non_secure_el2_interrupt = cpu_to_le32(ARCH_TIMER_NS_EL2_IRQ + 16);
+ gtdt->non_secure_el2_flags = cpu_to_le32(irqflags);
build_header(linker, table_data,
(void *)(table_data->data + gtdt_start), "GTDT",
@@ -561,11 +570,12 @@ build_gtdt(GArray *table_data, BIOSLinker *linker)
/* MADT */
static void
-build_madt(GArray *table_data, BIOSLinker *linker, VirtGuestInfo *guest_info)
+build_madt(GArray *table_data, BIOSLinker *linker, VirtMachineState *vms)
{
+ VirtMachineClass *vmc = VIRT_MACHINE_GET_CLASS(vms);
int madt_start = table_data->len;
- const MemMapEntry *memmap = guest_info->memmap;
- const int *irqmap = guest_info->irqmap;
+ const MemMapEntry *memmap = vms->memmap;
+ const int *irqmap = vms->irqmap;
AcpiMultipleApicTable *madt;
AcpiMadtGenericDistributor *gicd;
AcpiMadtGenericMsiFrame *gic_msi;
@@ -576,30 +586,30 @@ build_madt(GArray *table_data, BIOSLinker *linker, VirtGuestInfo *guest_info)
gicd = acpi_data_push(table_data, sizeof *gicd);
gicd->type = ACPI_APIC_GENERIC_DISTRIBUTOR;
gicd->length = sizeof(*gicd);
- gicd->base_address = memmap[VIRT_GIC_DIST].base;
- gicd->version = guest_info->gic_version;
+ gicd->base_address = cpu_to_le64(memmap[VIRT_GIC_DIST].base);
+ gicd->version = vms->gic_version;
- for (i = 0; i < guest_info->smp_cpus; i++) {
- AcpiMadtGenericInterrupt *gicc = acpi_data_push(table_data,
- sizeof *gicc);
+ for (i = 0; i < vms->smp_cpus; i++) {
+ AcpiMadtGenericCpuInterface *gicc = acpi_data_push(table_data,
+ sizeof(*gicc));
ARMCPU *armcpu = ARM_CPU(qemu_get_cpu(i));
- gicc->type = ACPI_APIC_GENERIC_INTERRUPT;
+ gicc->type = ACPI_APIC_GENERIC_CPU_INTERFACE;
gicc->length = sizeof(*gicc);
- if (guest_info->gic_version == 2) {
- gicc->base_address = memmap[VIRT_GIC_CPU].base;
+ if (vms->gic_version == 2) {
+ gicc->base_address = cpu_to_le64(memmap[VIRT_GIC_CPU].base);
}
- gicc->cpu_interface_number = i;
- gicc->arm_mpidr = armcpu->mp_affinity;
- gicc->uid = i;
- gicc->flags = cpu_to_le32(ACPI_GICC_ENABLED);
+ gicc->cpu_interface_number = cpu_to_le32(i);
+ gicc->arm_mpidr = cpu_to_le64(armcpu->mp_affinity);
+ gicc->uid = cpu_to_le32(i);
+ gicc->flags = cpu_to_le32(ACPI_MADT_GICC_ENABLED);
if (arm_feature(&armcpu->env, ARM_FEATURE_PMU)) {
gicc->performance_interrupt = cpu_to_le32(PPI(VIRTUAL_PMU_IRQ));
}
}
- if (guest_info->gic_version == 3) {
+ if (vms->gic_version == 3) {
AcpiMadtGenericTranslator *gic_its;
AcpiMadtGenericRedistributor *gicr = acpi_data_push(table_data,
sizeof *gicr);
@@ -609,7 +619,7 @@ build_madt(GArray *table_data, BIOSLinker *linker, VirtGuestInfo *guest_info)
gicr->base_address = cpu_to_le64(memmap[VIRT_GIC_REDIST].base);
gicr->range_length = cpu_to_le32(memmap[VIRT_GIC_REDIST].size);
- if (its_class_name() && !guest_info->no_its) {
+ if (its_class_name() && !vmc->no_its) {
gic_its = acpi_data_push(table_data, sizeof *gic_its);
gic_its->type = ACPI_APIC_GENERIC_TRANSLATOR;
gic_its->length = sizeof(*gic_its);
@@ -641,8 +651,8 @@ build_fadt(GArray *table_data, BIOSLinker *linker, unsigned dsdt_tbl_offset)
/* Hardware Reduced = 1 and use PSCI 0.2+ and with HVC */
fadt->flags = cpu_to_le32(1 << ACPI_FADT_F_HW_REDUCED_ACPI);
- fadt->arm_boot_flags = cpu_to_le16((1 << ACPI_FADT_ARM_USE_PSCI_G_0_2) |
- (1 << ACPI_FADT_ARM_PSCI_USE_HVC));
+ fadt->arm_boot_flags = cpu_to_le16(ACPI_FADT_ARM_PSCI_COMPLIANT |
+ ACPI_FADT_ARM_PSCI_USE_HVC);
/* ACPI v5.1 (fadt->revision.fadt->minor_revision) */
fadt->minor_revision = 0x1;
@@ -658,11 +668,11 @@ build_fadt(GArray *table_data, BIOSLinker *linker, unsigned dsdt_tbl_offset)
/* DSDT */
static void
-build_dsdt(GArray *table_data, BIOSLinker *linker, VirtGuestInfo *guest_info)
+build_dsdt(GArray *table_data, BIOSLinker *linker, VirtMachineState *vms)
{
Aml *scope, *dsdt;
- const MemMapEntry *memmap = guest_info->memmap;
- const int *irqmap = guest_info->irqmap;
+ const MemMapEntry *memmap = vms->memmap;
+ const int *irqmap = vms->irqmap;
dsdt = init_aml_allocator();
/* Reserve space for header */
@@ -674,7 +684,7 @@ build_dsdt(GArray *table_data, BIOSLinker *linker, VirtGuestInfo *guest_info)
* the RTC ACPI device at all when using UEFI.
*/
scope = aml_scope("\\_SB");
- acpi_dsdt_add_cpus(scope, guest_info->smp_cpus);
+ acpi_dsdt_add_cpus(scope, vms->smp_cpus);
acpi_dsdt_add_uart(scope, &memmap[VIRT_UART],
(irqmap[VIRT_UART] + ARM_SPI_BASE));
acpi_dsdt_add_flash(scope, &memmap[VIRT_FLASH]);
@@ -682,7 +692,7 @@ build_dsdt(GArray *table_data, BIOSLinker *linker, VirtGuestInfo *guest_info)
acpi_dsdt_add_virtio(scope, &memmap[VIRT_MMIO],
(irqmap[VIRT_MMIO] + ARM_SPI_BASE), NUM_VIRTIO_TRANSPORTS);
acpi_dsdt_add_pci(scope, memmap, (irqmap[VIRT_PCIE] + ARM_SPI_BASE),
- guest_info->use_highmem);
+ vms->highmem);
acpi_dsdt_add_gpio(scope, &memmap[VIRT_GPIO],
(irqmap[VIRT_GPIO] + ARM_SPI_BASE));
acpi_dsdt_add_power_button(scope);
@@ -705,12 +715,12 @@ struct AcpiBuildState {
MemoryRegion *linker_mr;
/* Is table patched? */
bool patched;
- VirtGuestInfo *guest_info;
} AcpiBuildState;
static
-void virt_acpi_build(VirtGuestInfo *guest_info, AcpiBuildTables *tables)
+void virt_acpi_build(VirtMachineState *vms, AcpiBuildTables *tables)
{
+ VirtMachineClass *vmc = VIRT_MACHINE_GET_CLASS(vms);
GArray *table_offsets;
unsigned dsdt, rsdt;
GArray *tables_blob = tables->table_data;
@@ -724,32 +734,32 @@ void virt_acpi_build(VirtGuestInfo *guest_info, AcpiBuildTables *tables)
/* DSDT is pointed to by FADT */
dsdt = tables_blob->len;
- build_dsdt(tables_blob, tables->linker, guest_info);
+ build_dsdt(tables_blob, tables->linker, vms);
/* FADT MADT GTDT MCFG SPCR pointed to by RSDT */
acpi_add_table(table_offsets, tables_blob);
build_fadt(tables_blob, tables->linker, dsdt);
acpi_add_table(table_offsets, tables_blob);
- build_madt(tables_blob, tables->linker, guest_info);
+ build_madt(tables_blob, tables->linker, vms);
acpi_add_table(table_offsets, tables_blob);
- build_gtdt(tables_blob, tables->linker);
+ build_gtdt(tables_blob, tables->linker, vms);
acpi_add_table(table_offsets, tables_blob);
- build_mcfg(tables_blob, tables->linker, guest_info);
+ build_mcfg(tables_blob, tables->linker, vms);
acpi_add_table(table_offsets, tables_blob);
- build_spcr(tables_blob, tables->linker, guest_info);
+ build_spcr(tables_blob, tables->linker, vms);
if (nb_numa_nodes > 0) {
acpi_add_table(table_offsets, tables_blob);
- build_srat(tables_blob, tables->linker, guest_info);
+ build_srat(tables_blob, tables->linker, vms);
}
- if (its_class_name() && !guest_info->no_its) {
+ if (its_class_name() && !vmc->no_its) {
acpi_add_table(table_offsets, tables_blob);
- build_iort(tables_blob, tables->linker, guest_info);
+ build_iort(tables_blob, tables->linker);
}
/* RSDT is pointed to by RSDP */
@@ -788,13 +798,12 @@ static void virt_acpi_build_update(void *build_opaque)
acpi_build_tables_init(&tables);
- virt_acpi_build(build_state->guest_info, &tables);
+ virt_acpi_build(VIRT_MACHINE(qdev_get_machine()), &tables);
acpi_ram_update(build_state->table_mr, tables.table_data);
acpi_ram_update(build_state->rsdp_mr, tables.rsdp);
acpi_ram_update(build_state->linker_mr, tables.linker->cmd_blob);
-
acpi_build_tables_cleanup(&tables, true);
}
@@ -822,12 +831,12 @@ static const VMStateDescription vmstate_virt_acpi_build = {
},
};
-void virt_acpi_setup(VirtGuestInfo *guest_info)
+void virt_acpi_setup(VirtMachineState *vms)
{
AcpiBuildTables tables;
AcpiBuildState *build_state;
- if (!guest_info->fw_cfg) {
+ if (!vms->fw_cfg) {
trace_virt_acpi_setup();
return;
}
@@ -838,10 +847,9 @@ void virt_acpi_setup(VirtGuestInfo *guest_info)
}
build_state = g_malloc0(sizeof *build_state);
- build_state->guest_info = guest_info;
acpi_build_tables_init(&tables);
- virt_acpi_build(build_state->guest_info, &tables);
+ virt_acpi_build(vms, &tables);
/* Now expose it all to Guest */
build_state->table_mr = acpi_add_rom_blob(build_state, tables.table_data,
@@ -853,8 +861,8 @@ void virt_acpi_setup(VirtGuestInfo *guest_info)
acpi_add_rom_blob(build_state, tables.linker->cmd_blob,
"etc/table-loader", 0);
- fw_cfg_add_file(guest_info->fw_cfg, ACPI_BUILD_TPMLOG_FILE,
- tables.tcpalog->data, acpi_data_len(tables.tcpalog));
+ fw_cfg_add_file(vms->fw_cfg, ACPI_BUILD_TPMLOG_FILE, tables.tcpalog->data,
+ acpi_data_len(tables.tcpalog));
build_state->rsdp_mr = acpi_add_rom_blob(build_state, tables.rsdp,
ACPI_BUILD_RSDP_FILE, 0);
diff --git a/hw/arm/virt.c b/hw/arm/virt.c
index 11c53a56e0..7a03f84051 100644
--- a/hw/arm/virt.c
+++ b/hw/arm/virt.c
@@ -41,14 +41,12 @@
#include "sysemu/numa.h"
#include "sysemu/sysemu.h"
#include "sysemu/kvm.h"
-#include "hw/boards.h"
#include "hw/compat.h"
#include "hw/loader.h"
#include "exec/address-spaces.h"
#include "qemu/bitops.h"
#include "qemu/error-report.h"
#include "hw/pci-host/gpex.h"
-#include "hw/arm/virt-acpi-build.h"
#include "hw/arm/sysbus-fdt.h"
#include "hw/platform-bus.h"
#include "hw/arm/fdt.h"
@@ -59,51 +57,6 @@
#include "qapi/visitor.h"
#include "standard-headers/linux/input.h"
-/* Number of external interrupt lines to configure the GIC with */
-#define NUM_IRQS 256
-
-#define PLATFORM_BUS_NUM_IRQS 64
-
-static ARMPlatformBusSystemParams platform_bus_params;
-
-typedef struct VirtBoardInfo {
- struct arm_boot_info bootinfo;
- const char *cpu_model;
- const MemMapEntry *memmap;
- const int *irqmap;
- int smp_cpus;
- void *fdt;
- int fdt_size;
- uint32_t clock_phandle;
- uint32_t gic_phandle;
- uint32_t msi_phandle;
- bool using_psci;
-} VirtBoardInfo;
-
-typedef struct {
- MachineClass parent;
- VirtBoardInfo *daughterboard;
- bool disallow_affinity_adjustment;
- bool no_its;
- bool no_pmu;
-} VirtMachineClass;
-
-typedef struct {
- MachineState parent;
- bool secure;
- bool highmem;
- int32_t gic_version;
-} VirtMachineState;
-
-#define TYPE_VIRT_MACHINE MACHINE_TYPE_NAME("virt")
-#define VIRT_MACHINE(obj) \
- OBJECT_CHECK(VirtMachineState, (obj), TYPE_VIRT_MACHINE)
-#define VIRT_MACHINE_GET_CLASS(obj) \
- OBJECT_GET_CLASS(VirtMachineClass, obj, TYPE_VIRT_MACHINE)
-#define VIRT_MACHINE_CLASS(klass) \
- OBJECT_CLASS_CHECK(VirtMachineClass, klass, TYPE_VIRT_MACHINE)
-
-
#define DEFINE_VIRT_MACHINE_LATEST(major, minor, latest) \
static void virt_##major##_##minor##_class_init(ObjectClass *oc, \
void *data) \
@@ -133,6 +86,13 @@ typedef struct {
DEFINE_VIRT_MACHINE_LATEST(major, minor, false)
+/* Number of external interrupt lines to configure the GIC with */
+#define NUM_IRQS 256
+
+#define PLATFORM_BUS_NUM_IRQS 64
+
+static ARMPlatformBusSystemParams platform_bus_params;
+
/* RAM limit in GB. Since VIRT_MEM starts at the 1GB mark, this means
* RAM can go up to the 256GB mark, leaving 256GB of the physical
* address space unallocated and free for future use between 256G and 512G.
@@ -202,51 +162,36 @@ static const int a15irqmap[] = {
[VIRT_PLATFORM_BUS] = 112, /* ...to 112 + PLATFORM_BUS_NUM_IRQS -1 */
};
-static VirtBoardInfo machines[] = {
- {
- .cpu_model = "cortex-a15",
- .memmap = a15memmap,
- .irqmap = a15irqmap,
- },
- {
- .cpu_model = "cortex-a53",
- .memmap = a15memmap,
- .irqmap = a15irqmap,
- },
- {
- .cpu_model = "cortex-a57",
- .memmap = a15memmap,
- .irqmap = a15irqmap,
- },
- {
- .cpu_model = "host",
- .memmap = a15memmap,
- .irqmap = a15irqmap,
- },
+static const char *valid_cpus[] = {
+ "cortex-a15",
+ "cortex-a53",
+ "cortex-a57",
+ "host",
+ NULL
};
-static VirtBoardInfo *find_machine_info(const char *cpu)
+static bool cpuname_valid(const char *cpu)
{
int i;
- for (i = 0; i < ARRAY_SIZE(machines); i++) {
- if (strcmp(cpu, machines[i].cpu_model) == 0) {
- return &machines[i];
+ for (i = 0; i < ARRAY_SIZE(valid_cpus); i++) {
+ if (strcmp(cpu, valid_cpus[i]) == 0) {
+ return true;
}
}
- return NULL;
+ return false;
}
-static void create_fdt(VirtBoardInfo *vbi)
+static void create_fdt(VirtMachineState *vms)
{
- void *fdt = create_device_tree(&vbi->fdt_size);
+ void *fdt = create_device_tree(&vms->fdt_size);
if (!fdt) {
error_report("create_device_tree() failed");
exit(1);
}
- vbi->fdt = fdt;
+ vms->fdt = fdt;
/* Header */
qemu_fdt_setprop_string(fdt, "/", "compatible", "linux,dummy-virt");
@@ -266,27 +211,27 @@ static void create_fdt(VirtBoardInfo *vbi)
* optional but in practice if you omit them the kernel refuses to
* probe for the device.
*/
- vbi->clock_phandle = qemu_fdt_alloc_phandle(fdt);
+ vms->clock_phandle = qemu_fdt_alloc_phandle(fdt);
qemu_fdt_add_subnode(fdt, "/apb-pclk");
qemu_fdt_setprop_string(fdt, "/apb-pclk", "compatible", "fixed-clock");
qemu_fdt_setprop_cell(fdt, "/apb-pclk", "#clock-cells", 0x0);
qemu_fdt_setprop_cell(fdt, "/apb-pclk", "clock-frequency", 24000000);
qemu_fdt_setprop_string(fdt, "/apb-pclk", "clock-output-names",
"clk24mhz");
- qemu_fdt_setprop_cell(fdt, "/apb-pclk", "phandle", vbi->clock_phandle);
+ qemu_fdt_setprop_cell(fdt, "/apb-pclk", "phandle", vms->clock_phandle);
}
-static void fdt_add_psci_node(const VirtBoardInfo *vbi)
+static void fdt_add_psci_node(const VirtMachineState *vms)
{
uint32_t cpu_suspend_fn;
uint32_t cpu_off_fn;
uint32_t cpu_on_fn;
uint32_t migrate_fn;
- void *fdt = vbi->fdt;
+ void *fdt = vms->fdt;
ARMCPU *armcpu = ARM_CPU(qemu_get_cpu(0));
- if (!vbi->using_psci) {
+ if (!vms->using_psci) {
return;
}
@@ -327,41 +272,60 @@ static void fdt_add_psci_node(const VirtBoardInfo *vbi)
qemu_fdt_setprop_cell(fdt, "/psci", "migrate", migrate_fn);
}
-static void fdt_add_timer_nodes(const VirtBoardInfo *vbi, int gictype)
+static void fdt_add_timer_nodes(const VirtMachineState *vms)
{
- /* Note that on A15 h/w these interrupts are level-triggered,
- * but for the GIC implementation provided by both QEMU and KVM
- * they are edge-triggered.
+ /* On real hardware these interrupts are level-triggered.
+ * On KVM they were edge-triggered before host kernel version 4.4,
+ * and level-triggered afterwards.
+ * On emulated QEMU they are level-triggered.
+ *
+ * Getting the DTB info about them wrong is awkward for some
+ * guest kernels:
+ * pre-4.8 ignore the DT and leave the interrupt configured
+ * with whatever the GIC reset value (or the bootloader) left it at
+ * 4.8 before rc6 honour the incorrect data by programming it back
+ * into the GIC, causing problems
+ * 4.8rc6 and later ignore the DT and always write "level triggered"
+ * into the GIC
+ *
+ * For backwards-compatibility, virt-2.8 and earlier will continue
+ * to say these are edge-triggered, but later machines will report
+ * the correct information.
*/
ARMCPU *armcpu;
- uint32_t irqflags = GIC_FDT_IRQ_FLAGS_EDGE_LO_HI;
+ VirtMachineClass *vmc = VIRT_MACHINE_GET_CLASS(vms);
+ uint32_t irqflags = GIC_FDT_IRQ_FLAGS_LEVEL_HI;
+
+ if (vmc->claim_edge_triggered_timers) {
+ irqflags = GIC_FDT_IRQ_FLAGS_EDGE_LO_HI;
+ }
- if (gictype == 2) {
+ if (vms->gic_version == 2) {
irqflags = deposit32(irqflags, GIC_FDT_IRQ_PPI_CPU_START,
GIC_FDT_IRQ_PPI_CPU_WIDTH,
- (1 << vbi->smp_cpus) - 1);
+ (1 << vms->smp_cpus) - 1);
}
- qemu_fdt_add_subnode(vbi->fdt, "/timer");
+ qemu_fdt_add_subnode(vms->fdt, "/timer");
armcpu = ARM_CPU(qemu_get_cpu(0));
if (arm_feature(&armcpu->env, ARM_FEATURE_V8)) {
const char compat[] = "arm,armv8-timer\0arm,armv7-timer";
- qemu_fdt_setprop(vbi->fdt, "/timer", "compatible",
+ qemu_fdt_setprop(vms->fdt, "/timer", "compatible",
compat, sizeof(compat));
} else {
- qemu_fdt_setprop_string(vbi->fdt, "/timer", "compatible",
+ qemu_fdt_setprop_string(vms->fdt, "/timer", "compatible",
"arm,armv7-timer");
}
- qemu_fdt_setprop(vbi->fdt, "/timer", "always-on", NULL, 0);
- qemu_fdt_setprop_cells(vbi->fdt, "/timer", "interrupts",
+ qemu_fdt_setprop(vms->fdt, "/timer", "always-on", NULL, 0);
+ qemu_fdt_setprop_cells(vms->fdt, "/timer", "interrupts",
GIC_FDT_IRQ_TYPE_PPI, ARCH_TIMER_S_EL1_IRQ, irqflags,
GIC_FDT_IRQ_TYPE_PPI, ARCH_TIMER_NS_EL1_IRQ, irqflags,
GIC_FDT_IRQ_TYPE_PPI, ARCH_TIMER_VIRT_IRQ, irqflags,
GIC_FDT_IRQ_TYPE_PPI, ARCH_TIMER_NS_EL2_IRQ, irqflags);
}
-static void fdt_add_cpu_nodes(const VirtBoardInfo *vbi)
+static void fdt_add_cpu_nodes(const VirtMachineState *vms)
{
int cpu;
int addr_cells = 1;
@@ -380,7 +344,7 @@ static void fdt_add_cpu_nodes(const VirtBoardInfo *vbi)
* The simplest way to go is to examine affinity IDs of all our CPUs. If
* at least one of them has Aff3 populated, we set #address-cells to 2.
*/
- for (cpu = 0; cpu < vbi->smp_cpus; cpu++) {
+ for (cpu = 0; cpu < vms->smp_cpus; cpu++) {
ARMCPU *armcpu = ARM_CPU(qemu_get_cpu(cpu));
if (armcpu->mp_affinity & ARM_AFF3_MASK) {
@@ -389,101 +353,101 @@ static void fdt_add_cpu_nodes(const VirtBoardInfo *vbi)
}
}
- qemu_fdt_add_subnode(vbi->fdt, "/cpus");
- qemu_fdt_setprop_cell(vbi->fdt, "/cpus", "#address-cells", addr_cells);
- qemu_fdt_setprop_cell(vbi->fdt, "/cpus", "#size-cells", 0x0);
+ qemu_fdt_add_subnode(vms->fdt, "/cpus");
+ qemu_fdt_setprop_cell(vms->fdt, "/cpus", "#address-cells", addr_cells);
+ qemu_fdt_setprop_cell(vms->fdt, "/cpus", "#size-cells", 0x0);
- for (cpu = vbi->smp_cpus - 1; cpu >= 0; cpu--) {
+ for (cpu = vms->smp_cpus - 1; cpu >= 0; cpu--) {
char *nodename = g_strdup_printf("/cpus/cpu@%d", cpu);
ARMCPU *armcpu = ARM_CPU(qemu_get_cpu(cpu));
- qemu_fdt_add_subnode(vbi->fdt, nodename);
- qemu_fdt_setprop_string(vbi->fdt, nodename, "device_type", "cpu");
- qemu_fdt_setprop_string(vbi->fdt, nodename, "compatible",
+ qemu_fdt_add_subnode(vms->fdt, nodename);
+ qemu_fdt_setprop_string(vms->fdt, nodename, "device_type", "cpu");
+ qemu_fdt_setprop_string(vms->fdt, nodename, "compatible",
armcpu->dtb_compatible);
- if (vbi->using_psci && vbi->smp_cpus > 1) {
- qemu_fdt_setprop_string(vbi->fdt, nodename,
+ if (vms->using_psci && vms->smp_cpus > 1) {
+ qemu_fdt_setprop_string(vms->fdt, nodename,
"enable-method", "psci");
}
if (addr_cells == 2) {
- qemu_fdt_setprop_u64(vbi->fdt, nodename, "reg",
+ qemu_fdt_setprop_u64(vms->fdt, nodename, "reg",
armcpu->mp_affinity);
} else {
- qemu_fdt_setprop_cell(vbi->fdt, nodename, "reg",
+ qemu_fdt_setprop_cell(vms->fdt, nodename, "reg",
armcpu->mp_affinity);
}
i = numa_get_node_for_cpu(cpu);
if (i < nb_numa_nodes) {
- qemu_fdt_setprop_cell(vbi->fdt, nodename, "numa-node-id", i);
+ qemu_fdt_setprop_cell(vms->fdt, nodename, "numa-node-id", i);
}
g_free(nodename);
}
}
-static void fdt_add_its_gic_node(VirtBoardInfo *vbi)
+static void fdt_add_its_gic_node(VirtMachineState *vms)
{
- vbi->msi_phandle = qemu_fdt_alloc_phandle(vbi->fdt);
- qemu_fdt_add_subnode(vbi->fdt, "/intc/its");
- qemu_fdt_setprop_string(vbi->fdt, "/intc/its", "compatible",
+ vms->msi_phandle = qemu_fdt_alloc_phandle(vms->fdt);
+ qemu_fdt_add_subnode(vms->fdt, "/intc/its");
+ qemu_fdt_setprop_string(vms->fdt, "/intc/its", "compatible",
"arm,gic-v3-its");
- qemu_fdt_setprop(vbi->fdt, "/intc/its", "msi-controller", NULL, 0);
- qemu_fdt_setprop_sized_cells(vbi->fdt, "/intc/its", "reg",
- 2, vbi->memmap[VIRT_GIC_ITS].base,
- 2, vbi->memmap[VIRT_GIC_ITS].size);
- qemu_fdt_setprop_cell(vbi->fdt, "/intc/its", "phandle", vbi->msi_phandle);
+ qemu_fdt_setprop(vms->fdt, "/intc/its", "msi-controller", NULL, 0);
+ qemu_fdt_setprop_sized_cells(vms->fdt, "/intc/its", "reg",
+ 2, vms->memmap[VIRT_GIC_ITS].base,
+ 2, vms->memmap[VIRT_GIC_ITS].size);
+ qemu_fdt_setprop_cell(vms->fdt, "/intc/its", "phandle", vms->msi_phandle);
}
-static void fdt_add_v2m_gic_node(VirtBoardInfo *vbi)
+static void fdt_add_v2m_gic_node(VirtMachineState *vms)
{
- vbi->msi_phandle = qemu_fdt_alloc_phandle(vbi->fdt);
- qemu_fdt_add_subnode(vbi->fdt, "/intc/v2m");
- qemu_fdt_setprop_string(vbi->fdt, "/intc/v2m", "compatible",
+ vms->msi_phandle = qemu_fdt_alloc_phandle(vms->fdt);
+ qemu_fdt_add_subnode(vms->fdt, "/intc/v2m");
+ qemu_fdt_setprop_string(vms->fdt, "/intc/v2m", "compatible",
"arm,gic-v2m-frame");
- qemu_fdt_setprop(vbi->fdt, "/intc/v2m", "msi-controller", NULL, 0);
- qemu_fdt_setprop_sized_cells(vbi->fdt, "/intc/v2m", "reg",
- 2, vbi->memmap[VIRT_GIC_V2M].base,
- 2, vbi->memmap[VIRT_GIC_V2M].size);
- qemu_fdt_setprop_cell(vbi->fdt, "/intc/v2m", "phandle", vbi->msi_phandle);
+ qemu_fdt_setprop(vms->fdt, "/intc/v2m", "msi-controller", NULL, 0);
+ qemu_fdt_setprop_sized_cells(vms->fdt, "/intc/v2m", "reg",
+ 2, vms->memmap[VIRT_GIC_V2M].base,
+ 2, vms->memmap[VIRT_GIC_V2M].size);
+ qemu_fdt_setprop_cell(vms->fdt, "/intc/v2m", "phandle", vms->msi_phandle);
}
-static void fdt_add_gic_node(VirtBoardInfo *vbi, int type)
+static void fdt_add_gic_node(VirtMachineState *vms)
{
- vbi->gic_phandle = qemu_fdt_alloc_phandle(vbi->fdt);
- qemu_fdt_setprop_cell(vbi->fdt, "/", "interrupt-parent", vbi->gic_phandle);
-
- qemu_fdt_add_subnode(vbi->fdt, "/intc");
- qemu_fdt_setprop_cell(vbi->fdt, "/intc", "#interrupt-cells", 3);
- qemu_fdt_setprop(vbi->fdt, "/intc", "interrupt-controller", NULL, 0);
- qemu_fdt_setprop_cell(vbi->fdt, "/intc", "#address-cells", 0x2);
- qemu_fdt_setprop_cell(vbi->fdt, "/intc", "#size-cells", 0x2);
- qemu_fdt_setprop(vbi->fdt, "/intc", "ranges", NULL, 0);
- if (type == 3) {
- qemu_fdt_setprop_string(vbi->fdt, "/intc", "compatible",
+ vms->gic_phandle = qemu_fdt_alloc_phandle(vms->fdt);
+ qemu_fdt_setprop_cell(vms->fdt, "/", "interrupt-parent", vms->gic_phandle);
+
+ qemu_fdt_add_subnode(vms->fdt, "/intc");
+ qemu_fdt_setprop_cell(vms->fdt, "/intc", "#interrupt-cells", 3);
+ qemu_fdt_setprop(vms->fdt, "/intc", "interrupt-controller", NULL, 0);
+ qemu_fdt_setprop_cell(vms->fdt, "/intc", "#address-cells", 0x2);
+ qemu_fdt_setprop_cell(vms->fdt, "/intc", "#size-cells", 0x2);
+ qemu_fdt_setprop(vms->fdt, "/intc", "ranges", NULL, 0);
+ if (vms->gic_version == 3) {
+ qemu_fdt_setprop_string(vms->fdt, "/intc", "compatible",
"arm,gic-v3");
- qemu_fdt_setprop_sized_cells(vbi->fdt, "/intc", "reg",
- 2, vbi->memmap[VIRT_GIC_DIST].base,
- 2, vbi->memmap[VIRT_GIC_DIST].size,
- 2, vbi->memmap[VIRT_GIC_REDIST].base,
- 2, vbi->memmap[VIRT_GIC_REDIST].size);
+ qemu_fdt_setprop_sized_cells(vms->fdt, "/intc", "reg",
+ 2, vms->memmap[VIRT_GIC_DIST].base,
+ 2, vms->memmap[VIRT_GIC_DIST].size,
+ 2, vms->memmap[VIRT_GIC_REDIST].base,
+ 2, vms->memmap[VIRT_GIC_REDIST].size);
} else {
/* 'cortex-a15-gic' means 'GIC v2' */
- qemu_fdt_setprop_string(vbi->fdt, "/intc", "compatible",
+ qemu_fdt_setprop_string(vms->fdt, "/intc", "compatible",
"arm,cortex-a15-gic");
- qemu_fdt_setprop_sized_cells(vbi->fdt, "/intc", "reg",
- 2, vbi->memmap[VIRT_GIC_DIST].base,
- 2, vbi->memmap[VIRT_GIC_DIST].size,
- 2, vbi->memmap[VIRT_GIC_CPU].base,
- 2, vbi->memmap[VIRT_GIC_CPU].size);
+ qemu_fdt_setprop_sized_cells(vms->fdt, "/intc", "reg",
+ 2, vms->memmap[VIRT_GIC_DIST].base,
+ 2, vms->memmap[VIRT_GIC_DIST].size,
+ 2, vms->memmap[VIRT_GIC_CPU].base,
+ 2, vms->memmap[VIRT_GIC_CPU].size);
}
- qemu_fdt_setprop_cell(vbi->fdt, "/intc", "phandle", vbi->gic_phandle);
+ qemu_fdt_setprop_cell(vms->fdt, "/intc", "phandle", vms->gic_phandle);
}
-static void fdt_add_pmu_nodes(const VirtBoardInfo *vbi, int gictype)
+static void fdt_add_pmu_nodes(const VirtMachineState *vms)
{
CPUState *cpu;
ARMCPU *armcpu;
@@ -497,24 +461,24 @@ static void fdt_add_pmu_nodes(const VirtBoardInfo *vbi, int gictype)
}
}
- if (gictype == 2) {
+ if (vms->gic_version == 2) {
irqflags = deposit32(irqflags, GIC_FDT_IRQ_PPI_CPU_START,
GIC_FDT_IRQ_PPI_CPU_WIDTH,
- (1 << vbi->smp_cpus) - 1);
+ (1 << vms->smp_cpus) - 1);
}
armcpu = ARM_CPU(qemu_get_cpu(0));
- qemu_fdt_add_subnode(vbi->fdt, "/pmu");
+ qemu_fdt_add_subnode(vms->fdt, "/pmu");
if (arm_feature(&armcpu->env, ARM_FEATURE_V8)) {
const char compat[] = "arm,armv8-pmuv3";
- qemu_fdt_setprop(vbi->fdt, "/pmu", "compatible",
+ qemu_fdt_setprop(vms->fdt, "/pmu", "compatible",
compat, sizeof(compat));
- qemu_fdt_setprop_cells(vbi->fdt, "/pmu", "interrupts",
+ qemu_fdt_setprop_cells(vms->fdt, "/pmu", "interrupts",
GIC_FDT_IRQ_TYPE_PPI, VIRTUAL_PMU_IRQ, irqflags);
}
}
-static void create_its(VirtBoardInfo *vbi, DeviceState *gicdev)
+static void create_its(VirtMachineState *vms, DeviceState *gicdev)
{
const char *itsclass = its_class_name();
DeviceState *dev;
@@ -529,19 +493,19 @@ static void create_its(VirtBoardInfo *vbi, DeviceState *gicdev)
object_property_set_link(OBJECT(dev), OBJECT(gicdev), "parent-gicv3",
&error_abort);
qdev_init_nofail(dev);
- sysbus_mmio_map(SYS_BUS_DEVICE(dev), 0, vbi->memmap[VIRT_GIC_ITS].base);
+ sysbus_mmio_map(SYS_BUS_DEVICE(dev), 0, vms->memmap[VIRT_GIC_ITS].base);
- fdt_add_its_gic_node(vbi);
+ fdt_add_its_gic_node(vms);
}
-static void create_v2m(VirtBoardInfo *vbi, qemu_irq *pic)
+static void create_v2m(VirtMachineState *vms, qemu_irq *pic)
{
int i;
- int irq = vbi->irqmap[VIRT_GIC_V2M];
+ int irq = vms->irqmap[VIRT_GIC_V2M];
DeviceState *dev;
dev = qdev_create(NULL, "arm-gicv2m");
- sysbus_mmio_map(SYS_BUS_DEVICE(dev), 0, vbi->memmap[VIRT_GIC_V2M].base);
+ sysbus_mmio_map(SYS_BUS_DEVICE(dev), 0, vms->memmap[VIRT_GIC_V2M].base);
qdev_prop_set_uint32(dev, "base-spi", irq);
qdev_prop_set_uint32(dev, "num-spi", NUM_GICV2M_SPIS);
qdev_init_nofail(dev);
@@ -550,17 +514,17 @@ static void create_v2m(VirtBoardInfo *vbi, qemu_irq *pic)
sysbus_connect_irq(SYS_BUS_DEVICE(dev), i, pic[irq + i]);
}
- fdt_add_v2m_gic_node(vbi);
+ fdt_add_v2m_gic_node(vms);
}
-static void create_gic(VirtBoardInfo *vbi, qemu_irq *pic, int type,
- bool secure, bool no_its)
+static void create_gic(VirtMachineState *vms, qemu_irq *pic)
{
/* We create a standalone GIC */
+ VirtMachineClass *vmc = VIRT_MACHINE_GET_CLASS(vms);
DeviceState *gicdev;
SysBusDevice *gicbusdev;
const char *gictype;
- int i;
+ int type = vms->gic_version, i;
gictype = (type == 3) ? gicv3_class_name() : gic_class_name();
@@ -572,15 +536,15 @@ static void create_gic(VirtBoardInfo *vbi, qemu_irq *pic, int type,
*/
qdev_prop_set_uint32(gicdev, "num-irq", NUM_IRQS + 32);
if (!kvm_irqchip_in_kernel()) {
- qdev_prop_set_bit(gicdev, "has-security-extensions", secure);
+ qdev_prop_set_bit(gicdev, "has-security-extensions", vms->secure);
}
qdev_init_nofail(gicdev);
gicbusdev = SYS_BUS_DEVICE(gicdev);
- sysbus_mmio_map(gicbusdev, 0, vbi->memmap[VIRT_GIC_DIST].base);
+ sysbus_mmio_map(gicbusdev, 0, vms->memmap[VIRT_GIC_DIST].base);
if (type == 3) {
- sysbus_mmio_map(gicbusdev, 1, vbi->memmap[VIRT_GIC_REDIST].base);
+ sysbus_mmio_map(gicbusdev, 1, vms->memmap[VIRT_GIC_REDIST].base);
} else {
- sysbus_mmio_map(gicbusdev, 1, vbi->memmap[VIRT_GIC_CPU].base);
+ sysbus_mmio_map(gicbusdev, 1, vms->memmap[VIRT_GIC_CPU].base);
}
/* Wire the outputs from each CPU's generic timer to the
@@ -616,22 +580,22 @@ static void create_gic(VirtBoardInfo *vbi, qemu_irq *pic, int type,
pic[i] = qdev_get_gpio_in(gicdev, i);
}
- fdt_add_gic_node(vbi, type);
+ fdt_add_gic_node(vms);
- if (type == 3 && !no_its) {
- create_its(vbi, gicdev);
+ if (type == 3 && !vmc->no_its) {
+ create_its(vms, gicdev);
} else if (type == 2) {
- create_v2m(vbi, pic);
+ create_v2m(vms, pic);
}
}
-static void create_uart(const VirtBoardInfo *vbi, qemu_irq *pic, int uart,
+static void create_uart(const VirtMachineState *vms, qemu_irq *pic, int uart,
MemoryRegion *mem, CharDriverState *chr)
{
char *nodename;
- hwaddr base = vbi->memmap[uart].base;
- hwaddr size = vbi->memmap[uart].size;
- int irq = vbi->irqmap[uart];
+ hwaddr base = vms->memmap[uart].base;
+ hwaddr size = vms->memmap[uart].size;
+ int irq = vms->irqmap[uart];
const char compat[] = "arm,pl011\0arm,primecell";
const char clocknames[] = "uartclk\0apb_pclk";
DeviceState *dev = qdev_create(NULL, "pl011");
@@ -644,51 +608,51 @@ static void create_uart(const VirtBoardInfo *vbi, qemu_irq *pic, int uart,
sysbus_connect_irq(s, 0, pic[irq]);
nodename = g_strdup_printf("/pl011@%" PRIx64, base);
- qemu_fdt_add_subnode(vbi->fdt, nodename);
+ qemu_fdt_add_subnode(vms->fdt, nodename);
/* Note that we can't use setprop_string because of the embedded NUL */
- qemu_fdt_setprop(vbi->fdt, nodename, "compatible",
+ qemu_fdt_setprop(vms->fdt, nodename, "compatible",
compat, sizeof(compat));
- qemu_fdt_setprop_sized_cells(vbi->fdt, nodename, "reg",
+ qemu_fdt_setprop_sized_cells(vms->fdt, nodename, "reg",
2, base, 2, size);
- qemu_fdt_setprop_cells(vbi->fdt, nodename, "interrupts",
+ qemu_fdt_setprop_cells(vms->fdt, nodename, "interrupts",
GIC_FDT_IRQ_TYPE_SPI, irq,
GIC_FDT_IRQ_FLAGS_LEVEL_HI);
- qemu_fdt_setprop_cells(vbi->fdt, nodename, "clocks",
- vbi->clock_phandle, vbi->clock_phandle);
- qemu_fdt_setprop(vbi->fdt, nodename, "clock-names",
+ qemu_fdt_setprop_cells(vms->fdt, nodename, "clocks",
+ vms->clock_phandle, vms->clock_phandle);
+ qemu_fdt_setprop(vms->fdt, nodename, "clock-names",
clocknames, sizeof(clocknames));
if (uart == VIRT_UART) {
- qemu_fdt_setprop_string(vbi->fdt, "/chosen", "stdout-path", nodename);
+ qemu_fdt_setprop_string(vms->fdt, "/chosen", "stdout-path", nodename);
} else {
/* Mark as not usable by the normal world */
- qemu_fdt_setprop_string(vbi->fdt, nodename, "status", "disabled");
- qemu_fdt_setprop_string(vbi->fdt, nodename, "secure-status", "okay");
+ qemu_fdt_setprop_string(vms->fdt, nodename, "status", "disabled");
+ qemu_fdt_setprop_string(vms->fdt, nodename, "secure-status", "okay");
}
g_free(nodename);
}
-static void create_rtc(const VirtBoardInfo *vbi, qemu_irq *pic)
+static void create_rtc(const VirtMachineState *vms, qemu_irq *pic)
{
char *nodename;
- hwaddr base = vbi->memmap[VIRT_RTC].base;
- hwaddr size = vbi->memmap[VIRT_RTC].size;
- int irq = vbi->irqmap[VIRT_RTC];
+ hwaddr base = vms->memmap[VIRT_RTC].base;
+ hwaddr size = vms->memmap[VIRT_RTC].size;
+ int irq = vms->irqmap[VIRT_RTC];
const char compat[] = "arm,pl031\0arm,primecell";
sysbus_create_simple("pl031", base, pic[irq]);
nodename = g_strdup_printf("/pl031@%" PRIx64, base);
- qemu_fdt_add_subnode(vbi->fdt, nodename);
- qemu_fdt_setprop(vbi->fdt, nodename, "compatible", compat, sizeof(compat));
- qemu_fdt_setprop_sized_cells(vbi->fdt, nodename, "reg",
+ qemu_fdt_add_subnode(vms->fdt, nodename);
+ qemu_fdt_setprop(vms->fdt, nodename, "compatible", compat, sizeof(compat));
+ qemu_fdt_setprop_sized_cells(vms->fdt, nodename, "reg",
2, base, 2, size);
- qemu_fdt_setprop_cells(vbi->fdt, nodename, "interrupts",
+ qemu_fdt_setprop_cells(vms->fdt, nodename, "interrupts",
GIC_FDT_IRQ_TYPE_SPI, irq,
GIC_FDT_IRQ_FLAGS_LEVEL_HI);
- qemu_fdt_setprop_cell(vbi->fdt, nodename, "clocks", vbi->clock_phandle);
- qemu_fdt_setprop_string(vbi->fdt, nodename, "clock-names", "apb_pclk");
+ qemu_fdt_setprop_cell(vms->fdt, nodename, "clocks", vms->clock_phandle);
+ qemu_fdt_setprop_string(vms->fdt, nodename, "clock-names", "apb_pclk");
g_free(nodename);
}
@@ -703,45 +667,45 @@ static Notifier virt_system_powerdown_notifier = {
.notify = virt_powerdown_req
};
-static void create_gpio(const VirtBoardInfo *vbi, qemu_irq *pic)
+static void create_gpio(const VirtMachineState *vms, qemu_irq *pic)
{
char *nodename;
DeviceState *pl061_dev;
- hwaddr base = vbi->memmap[VIRT_GPIO].base;
- hwaddr size = vbi->memmap[VIRT_GPIO].size;
- int irq = vbi->irqmap[VIRT_GPIO];
+ hwaddr base = vms->memmap[VIRT_GPIO].base;
+ hwaddr size = vms->memmap[VIRT_GPIO].size;
+ int irq = vms->irqmap[VIRT_GPIO];
const char compat[] = "arm,pl061\0arm,primecell";
pl061_dev = sysbus_create_simple("pl061", base, pic[irq]);
- uint32_t phandle = qemu_fdt_alloc_phandle(vbi->fdt);
+ uint32_t phandle = qemu_fdt_alloc_phandle(vms->fdt);
nodename = g_strdup_printf("/pl061@%" PRIx64, base);
- qemu_fdt_add_subnode(vbi->fdt, nodename);
- qemu_fdt_setprop_sized_cells(vbi->fdt, nodename, "reg",
+ qemu_fdt_add_subnode(vms->fdt, nodename);
+ qemu_fdt_setprop_sized_cells(vms->fdt, nodename, "reg",
2, base, 2, size);
- qemu_fdt_setprop(vbi->fdt, nodename, "compatible", compat, sizeof(compat));
- qemu_fdt_setprop_cell(vbi->fdt, nodename, "#gpio-cells", 2);
- qemu_fdt_setprop(vbi->fdt, nodename, "gpio-controller", NULL, 0);
- qemu_fdt_setprop_cells(vbi->fdt, nodename, "interrupts",
+ qemu_fdt_setprop(vms->fdt, nodename, "compatible", compat, sizeof(compat));
+ qemu_fdt_setprop_cell(vms->fdt, nodename, "#gpio-cells", 2);
+ qemu_fdt_setprop(vms->fdt, nodename, "gpio-controller", NULL, 0);
+ qemu_fdt_setprop_cells(vms->fdt, nodename, "interrupts",
GIC_FDT_IRQ_TYPE_SPI, irq,
GIC_FDT_IRQ_FLAGS_LEVEL_HI);
- qemu_fdt_setprop_cell(vbi->fdt, nodename, "clocks", vbi->clock_phandle);
- qemu_fdt_setprop_string(vbi->fdt, nodename, "clock-names", "apb_pclk");
- qemu_fdt_setprop_cell(vbi->fdt, nodename, "phandle", phandle);
+ qemu_fdt_setprop_cell(vms->fdt, nodename, "clocks", vms->clock_phandle);
+ qemu_fdt_setprop_string(vms->fdt, nodename, "clock-names", "apb_pclk");
+ qemu_fdt_setprop_cell(vms->fdt, nodename, "phandle", phandle);
gpio_key_dev = sysbus_create_simple("gpio-key", -1,
qdev_get_gpio_in(pl061_dev, 3));
- qemu_fdt_add_subnode(vbi->fdt, "/gpio-keys");
- qemu_fdt_setprop_string(vbi->fdt, "/gpio-keys", "compatible", "gpio-keys");
- qemu_fdt_setprop_cell(vbi->fdt, "/gpio-keys", "#size-cells", 0);
- qemu_fdt_setprop_cell(vbi->fdt, "/gpio-keys", "#address-cells", 1);
+ qemu_fdt_add_subnode(vms->fdt, "/gpio-keys");
+ qemu_fdt_setprop_string(vms->fdt, "/gpio-keys", "compatible", "gpio-keys");
+ qemu_fdt_setprop_cell(vms->fdt, "/gpio-keys", "#size-cells", 0);
+ qemu_fdt_setprop_cell(vms->fdt, "/gpio-keys", "#address-cells", 1);
- qemu_fdt_add_subnode(vbi->fdt, "/gpio-keys/poweroff");
- qemu_fdt_setprop_string(vbi->fdt, "/gpio-keys/poweroff",
+ qemu_fdt_add_subnode(vms->fdt, "/gpio-keys/poweroff");
+ qemu_fdt_setprop_string(vms->fdt, "/gpio-keys/poweroff",
"label", "GPIO Key Poweroff");
- qemu_fdt_setprop_cell(vbi->fdt, "/gpio-keys/poweroff", "linux,code",
+ qemu_fdt_setprop_cell(vms->fdt, "/gpio-keys/poweroff", "linux,code",
KEY_POWER);
- qemu_fdt_setprop_cells(vbi->fdt, "/gpio-keys/poweroff",
+ qemu_fdt_setprop_cells(vms->fdt, "/gpio-keys/poweroff",
"gpios", phandle, 3, 0);
/* connect powerdown request */
@@ -750,10 +714,10 @@ static void create_gpio(const VirtBoardInfo *vbi, qemu_irq *pic)
g_free(nodename);
}
-static void create_virtio_devices(const VirtBoardInfo *vbi, qemu_irq *pic)
+static void create_virtio_devices(const VirtMachineState *vms, qemu_irq *pic)
{
int i;
- hwaddr size = vbi->memmap[VIRT_MMIO].size;
+ hwaddr size = vms->memmap[VIRT_MMIO].size;
/* We create the transports in forwards order. Since qbus_realize()
* prepends (not appends) new child buses, the incrementing loop below will
@@ -783,8 +747,8 @@ static void create_virtio_devices(const VirtBoardInfo *vbi, qemu_irq *pic)
* of disks users must use UUIDs or similar mechanisms.
*/
for (i = 0; i < NUM_VIRTIO_TRANSPORTS; i++) {
- int irq = vbi->irqmap[VIRT_MMIO] + i;
- hwaddr base = vbi->memmap[VIRT_MMIO].base + i * size;
+ int irq = vms->irqmap[VIRT_MMIO] + i;
+ hwaddr base = vms->memmap[VIRT_MMIO].base + i * size;
sysbus_create_simple("virtio-mmio", base, pic[irq]);
}
@@ -798,16 +762,16 @@ static void create_virtio_devices(const VirtBoardInfo *vbi, qemu_irq *pic)
*/
for (i = NUM_VIRTIO_TRANSPORTS - 1; i >= 0; i--) {
char *nodename;
- int irq = vbi->irqmap[VIRT_MMIO] + i;
- hwaddr base = vbi->memmap[VIRT_MMIO].base + i * size;
+ int irq = vms->irqmap[VIRT_MMIO] + i;
+ hwaddr base = vms->memmap[VIRT_MMIO].base + i * size;
nodename = g_strdup_printf("/virtio_mmio@%" PRIx64, base);
- qemu_fdt_add_subnode(vbi->fdt, nodename);
- qemu_fdt_setprop_string(vbi->fdt, nodename,
+ qemu_fdt_add_subnode(vms->fdt, nodename);
+ qemu_fdt_setprop_string(vms->fdt, nodename,
"compatible", "virtio,mmio");
- qemu_fdt_setprop_sized_cells(vbi->fdt, nodename, "reg",
+ qemu_fdt_setprop_sized_cells(vms->fdt, nodename, "reg",
2, base, 2, size);
- qemu_fdt_setprop_cells(vbi->fdt, nodename, "interrupts",
+ qemu_fdt_setprop_cells(vms->fdt, nodename, "interrupts",
GIC_FDT_IRQ_TYPE_SPI, irq,
GIC_FDT_IRQ_FLAGS_EDGE_LO_HI);
g_free(nodename);
@@ -870,7 +834,7 @@ static void create_one_flash(const char *name, hwaddr flashbase,
}
}
-static void create_flash(const VirtBoardInfo *vbi,
+static void create_flash(const VirtMachineState *vms,
MemoryRegion *sysmem,
MemoryRegion *secure_sysmem)
{
@@ -882,8 +846,8 @@ static void create_flash(const VirtBoardInfo *vbi,
* If sysmem == secure_sysmem this means there is no separate Secure
* address space and both flash devices are generally visible.
*/
- hwaddr flashsize = vbi->memmap[VIRT_FLASH].size / 2;
- hwaddr flashbase = vbi->memmap[VIRT_FLASH].base;
+ hwaddr flashsize = vms->memmap[VIRT_FLASH].size / 2;
+ hwaddr flashbase = vms->memmap[VIRT_FLASH].base;
char *nodename;
create_one_flash("virt.flash0", flashbase, flashsize,
@@ -894,41 +858,41 @@ static void create_flash(const VirtBoardInfo *vbi,
if (sysmem == secure_sysmem) {
/* Report both flash devices as a single node in the DT */
nodename = g_strdup_printf("/flash@%" PRIx64, flashbase);
- qemu_fdt_add_subnode(vbi->fdt, nodename);
- qemu_fdt_setprop_string(vbi->fdt, nodename, "compatible", "cfi-flash");
- qemu_fdt_setprop_sized_cells(vbi->fdt, nodename, "reg",
+ qemu_fdt_add_subnode(vms->fdt, nodename);
+ qemu_fdt_setprop_string(vms->fdt, nodename, "compatible", "cfi-flash");
+ qemu_fdt_setprop_sized_cells(vms->fdt, nodename, "reg",
2, flashbase, 2, flashsize,
2, flashbase + flashsize, 2, flashsize);
- qemu_fdt_setprop_cell(vbi->fdt, nodename, "bank-width", 4);
+ qemu_fdt_setprop_cell(vms->fdt, nodename, "bank-width", 4);
g_free(nodename);
} else {
/* Report the devices as separate nodes so we can mark one as
* only visible to the secure world.
*/
nodename = g_strdup_printf("/secflash@%" PRIx64, flashbase);
- qemu_fdt_add_subnode(vbi->fdt, nodename);
- qemu_fdt_setprop_string(vbi->fdt, nodename, "compatible", "cfi-flash");
- qemu_fdt_setprop_sized_cells(vbi->fdt, nodename, "reg",
+ qemu_fdt_add_subnode(vms->fdt, nodename);
+ qemu_fdt_setprop_string(vms->fdt, nodename, "compatible", "cfi-flash");
+ qemu_fdt_setprop_sized_cells(vms->fdt, nodename, "reg",
2, flashbase, 2, flashsize);
- qemu_fdt_setprop_cell(vbi->fdt, nodename, "bank-width", 4);
- qemu_fdt_setprop_string(vbi->fdt, nodename, "status", "disabled");
- qemu_fdt_setprop_string(vbi->fdt, nodename, "secure-status", "okay");
+ qemu_fdt_setprop_cell(vms->fdt, nodename, "bank-width", 4);
+ qemu_fdt_setprop_string(vms->fdt, nodename, "status", "disabled");
+ qemu_fdt_setprop_string(vms->fdt, nodename, "secure-status", "okay");
g_free(nodename);
nodename = g_strdup_printf("/flash@%" PRIx64, flashbase);
- qemu_fdt_add_subnode(vbi->fdt, nodename);
- qemu_fdt_setprop_string(vbi->fdt, nodename, "compatible", "cfi-flash");
- qemu_fdt_setprop_sized_cells(vbi->fdt, nodename, "reg",
+ qemu_fdt_add_subnode(vms->fdt, nodename);
+ qemu_fdt_setprop_string(vms->fdt, nodename, "compatible", "cfi-flash");
+ qemu_fdt_setprop_sized_cells(vms->fdt, nodename, "reg",
2, flashbase + flashsize, 2, flashsize);
- qemu_fdt_setprop_cell(vbi->fdt, nodename, "bank-width", 4);
+ qemu_fdt_setprop_cell(vms->fdt, nodename, "bank-width", 4);
g_free(nodename);
}
}
-static void create_fw_cfg(const VirtBoardInfo *vbi, AddressSpace *as)
+static FWCfgState *create_fw_cfg(const VirtMachineState *vms, AddressSpace *as)
{
- hwaddr base = vbi->memmap[VIRT_FW_CFG].base;
- hwaddr size = vbi->memmap[VIRT_FW_CFG].size;
+ hwaddr base = vms->memmap[VIRT_FW_CFG].base;
+ hwaddr size = vms->memmap[VIRT_FW_CFG].size;
FWCfgState *fw_cfg;
char *nodename;
@@ -936,15 +900,17 @@ static void create_fw_cfg(const VirtBoardInfo *vbi, AddressSpace *as)
fw_cfg_add_i16(fw_cfg, FW_CFG_NB_CPUS, (uint16_t)smp_cpus);
nodename = g_strdup_printf("/fw-cfg@%" PRIx64, base);
- qemu_fdt_add_subnode(vbi->fdt, nodename);
- qemu_fdt_setprop_string(vbi->fdt, nodename,
+ qemu_fdt_add_subnode(vms->fdt, nodename);
+ qemu_fdt_setprop_string(vms->fdt, nodename,
"compatible", "qemu,fw-cfg-mmio");
- qemu_fdt_setprop_sized_cells(vbi->fdt, nodename, "reg",
+ qemu_fdt_setprop_sized_cells(vms->fdt, nodename, "reg",
2, base, 2, size);
g_free(nodename);
+ return fw_cfg;
}
-static void create_pcie_irq_map(const VirtBoardInfo *vbi, uint32_t gic_phandle,
+static void create_pcie_irq_map(const VirtMachineState *vms,
+ uint32_t gic_phandle,
int first_irq, const char *nodename)
{
int devfn, pin;
@@ -971,28 +937,27 @@ static void create_pcie_irq_map(const VirtBoardInfo *vbi, uint32_t gic_phandle,
}
}
- qemu_fdt_setprop(vbi->fdt, nodename, "interrupt-map",
+ qemu_fdt_setprop(vms->fdt, nodename, "interrupt-map",
full_irq_map, sizeof(full_irq_map));
- qemu_fdt_setprop_cells(vbi->fdt, nodename, "interrupt-map-mask",
+ qemu_fdt_setprop_cells(vms->fdt, nodename, "interrupt-map-mask",
0x1800, 0, 0, /* devfn (PCI_SLOT(3)) */
0x7 /* PCI irq */);
}
-static void create_pcie(const VirtBoardInfo *vbi, qemu_irq *pic,
- bool use_highmem)
+static void create_pcie(const VirtMachineState *vms, qemu_irq *pic)
{
- hwaddr base_mmio = vbi->memmap[VIRT_PCIE_MMIO].base;
- hwaddr size_mmio = vbi->memmap[VIRT_PCIE_MMIO].size;
- hwaddr base_mmio_high = vbi->memmap[VIRT_PCIE_MMIO_HIGH].base;
- hwaddr size_mmio_high = vbi->memmap[VIRT_PCIE_MMIO_HIGH].size;
- hwaddr base_pio = vbi->memmap[VIRT_PCIE_PIO].base;
- hwaddr size_pio = vbi->memmap[VIRT_PCIE_PIO].size;
- hwaddr base_ecam = vbi->memmap[VIRT_PCIE_ECAM].base;
- hwaddr size_ecam = vbi->memmap[VIRT_PCIE_ECAM].size;
+ hwaddr base_mmio = vms->memmap[VIRT_PCIE_MMIO].base;
+ hwaddr size_mmio = vms->memmap[VIRT_PCIE_MMIO].size;
+ hwaddr base_mmio_high = vms->memmap[VIRT_PCIE_MMIO_HIGH].base;
+ hwaddr size_mmio_high = vms->memmap[VIRT_PCIE_MMIO_HIGH].size;
+ hwaddr base_pio = vms->memmap[VIRT_PCIE_PIO].base;
+ hwaddr size_pio = vms->memmap[VIRT_PCIE_PIO].size;
+ hwaddr base_ecam = vms->memmap[VIRT_PCIE_ECAM].base;
+ hwaddr size_ecam = vms->memmap[VIRT_PCIE_ECAM].size;
hwaddr base = base_mmio;
int nr_pcie_buses = size_ecam / PCIE_MMCFG_SIZE_MIN;
- int irq = vbi->irqmap[VIRT_PCIE];
+ int irq = vms->irqmap[VIRT_PCIE];
MemoryRegion *mmio_alias;
MemoryRegion *mmio_reg;
MemoryRegion *ecam_alias;
@@ -1023,7 +988,7 @@ static void create_pcie(const VirtBoardInfo *vbi, qemu_irq *pic,
mmio_reg, base_mmio, size_mmio);
memory_region_add_subregion(get_system_memory(), base_mmio, mmio_alias);
- if (use_highmem) {
+ if (vms->highmem) {
/* Map high MMIO space */
MemoryRegion *high_mmio_alias = g_new0(MemoryRegion, 1);
@@ -1054,26 +1019,26 @@ static void create_pcie(const VirtBoardInfo *vbi, qemu_irq *pic,
}
nodename = g_strdup_printf("/pcie@%" PRIx64, base);
- qemu_fdt_add_subnode(vbi->fdt, nodename);
- qemu_fdt_setprop_string(vbi->fdt, nodename,
+ qemu_fdt_add_subnode(vms->fdt, nodename);
+ qemu_fdt_setprop_string(vms->fdt, nodename,
"compatible", "pci-host-ecam-generic");
- qemu_fdt_setprop_string(vbi->fdt, nodename, "device_type", "pci");
- qemu_fdt_setprop_cell(vbi->fdt, nodename, "#address-cells", 3);
- qemu_fdt_setprop_cell(vbi->fdt, nodename, "#size-cells", 2);
- qemu_fdt_setprop_cells(vbi->fdt, nodename, "bus-range", 0,
+ qemu_fdt_setprop_string(vms->fdt, nodename, "device_type", "pci");
+ qemu_fdt_setprop_cell(vms->fdt, nodename, "#address-cells", 3);
+ qemu_fdt_setprop_cell(vms->fdt, nodename, "#size-cells", 2);
+ qemu_fdt_setprop_cells(vms->fdt, nodename, "bus-range", 0,
nr_pcie_buses - 1);
- qemu_fdt_setprop(vbi->fdt, nodename, "dma-coherent", NULL, 0);
+ qemu_fdt_setprop(vms->fdt, nodename, "dma-coherent", NULL, 0);
- if (vbi->msi_phandle) {
- qemu_fdt_setprop_cells(vbi->fdt, nodename, "msi-parent",
- vbi->msi_phandle);
+ if (vms->msi_phandle) {
+ qemu_fdt_setprop_cells(vms->fdt, nodename, "msi-parent",
+ vms->msi_phandle);
}
- qemu_fdt_setprop_sized_cells(vbi->fdt, nodename, "reg",
+ qemu_fdt_setprop_sized_cells(vms->fdt, nodename, "reg",
2, base_ecam, 2, size_ecam);
- if (use_highmem) {
- qemu_fdt_setprop_sized_cells(vbi->fdt, nodename, "ranges",
+ if (vms->highmem) {
+ qemu_fdt_setprop_sized_cells(vms->fdt, nodename, "ranges",
1, FDT_PCI_RANGE_IOPORT, 2, 0,
2, base_pio, 2, size_pio,
1, FDT_PCI_RANGE_MMIO, 2, base_mmio,
@@ -1082,20 +1047,20 @@ static void create_pcie(const VirtBoardInfo *vbi, qemu_irq *pic,
2, base_mmio_high,
2, base_mmio_high, 2, size_mmio_high);
} else {
- qemu_fdt_setprop_sized_cells(vbi->fdt, nodename, "ranges",
+ qemu_fdt_setprop_sized_cells(vms->fdt, nodename, "ranges",
1, FDT_PCI_RANGE_IOPORT, 2, 0,
2, base_pio, 2, size_pio,
1, FDT_PCI_RANGE_MMIO, 2, base_mmio,
2, base_mmio, 2, size_mmio);
}
- qemu_fdt_setprop_cell(vbi->fdt, nodename, "#interrupt-cells", 1);
- create_pcie_irq_map(vbi, vbi->gic_phandle, irq, nodename);
+ qemu_fdt_setprop_cell(vms->fdt, nodename, "#interrupt-cells", 1);
+ create_pcie_irq_map(vms, vms->gic_phandle, irq, nodename);
g_free(nodename);
}
-static void create_platform_bus(VirtBoardInfo *vbi, qemu_irq *pic)
+static void create_platform_bus(VirtMachineState *vms, qemu_irq *pic)
{
DeviceState *dev;
SysBusDevice *s;
@@ -1103,13 +1068,13 @@ static void create_platform_bus(VirtBoardInfo *vbi, qemu_irq *pic)
ARMPlatformBusFDTParams *fdt_params = g_new(ARMPlatformBusFDTParams, 1);
MemoryRegion *sysmem = get_system_memory();
- platform_bus_params.platform_bus_base = vbi->memmap[VIRT_PLATFORM_BUS].base;
- platform_bus_params.platform_bus_size = vbi->memmap[VIRT_PLATFORM_BUS].size;
- platform_bus_params.platform_bus_first_irq = vbi->irqmap[VIRT_PLATFORM_BUS];
+ platform_bus_params.platform_bus_base = vms->memmap[VIRT_PLATFORM_BUS].base;
+ platform_bus_params.platform_bus_size = vms->memmap[VIRT_PLATFORM_BUS].size;
+ platform_bus_params.platform_bus_first_irq = vms->irqmap[VIRT_PLATFORM_BUS];
platform_bus_params.platform_bus_num_irqs = PLATFORM_BUS_NUM_IRQS;
fdt_params->system_params = &platform_bus_params;
- fdt_params->binfo = &vbi->bootinfo;
+ fdt_params->binfo = &vms->bootinfo;
fdt_params->intc = "/intc";
/*
* register a machine init done notifier that creates the device tree
@@ -1136,43 +1101,44 @@ static void create_platform_bus(VirtBoardInfo *vbi, qemu_irq *pic)
sysbus_mmio_get_region(s, 0));
}
-static void create_secure_ram(VirtBoardInfo *vbi, MemoryRegion *secure_sysmem)
+static void create_secure_ram(VirtMachineState *vms,
+ MemoryRegion *secure_sysmem)
{
MemoryRegion *secram = g_new(MemoryRegion, 1);
char *nodename;
- hwaddr base = vbi->memmap[VIRT_SECURE_MEM].base;
- hwaddr size = vbi->memmap[VIRT_SECURE_MEM].size;
+ hwaddr base = vms->memmap[VIRT_SECURE_MEM].base;
+ hwaddr size = vms->memmap[VIRT_SECURE_MEM].size;
memory_region_init_ram(secram, NULL, "virt.secure-ram", size, &error_fatal);
vmstate_register_ram_global(secram);
memory_region_add_subregion(secure_sysmem, base, secram);
nodename = g_strdup_printf("/secram@%" PRIx64, base);
- qemu_fdt_add_subnode(vbi->fdt, nodename);
- qemu_fdt_setprop_string(vbi->fdt, nodename, "device_type", "memory");
- qemu_fdt_setprop_sized_cells(vbi->fdt, nodename, "reg", 2, base, 2, size);
- qemu_fdt_setprop_string(vbi->fdt, nodename, "status", "disabled");
- qemu_fdt_setprop_string(vbi->fdt, nodename, "secure-status", "okay");
+ qemu_fdt_add_subnode(vms->fdt, nodename);
+ qemu_fdt_setprop_string(vms->fdt, nodename, "device_type", "memory");
+ qemu_fdt_setprop_sized_cells(vms->fdt, nodename, "reg", 2, base, 2, size);
+ qemu_fdt_setprop_string(vms->fdt, nodename, "status", "disabled");
+ qemu_fdt_setprop_string(vms->fdt, nodename, "secure-status", "okay");
g_free(nodename);
}
static void *machvirt_dtb(const struct arm_boot_info *binfo, int *fdt_size)
{
- const VirtBoardInfo *board = (const VirtBoardInfo *)binfo;
+ const VirtMachineState *board = container_of(binfo, VirtMachineState,
+ bootinfo);
*fdt_size = board->fdt_size;
return board->fdt;
}
-static void virt_build_smbios(VirtGuestInfo *guest_info)
+static void virt_build_smbios(VirtMachineState *vms)
{
- FWCfgState *fw_cfg = guest_info->fw_cfg;
uint8_t *smbios_tables, *smbios_anchor;
size_t smbios_tables_len, smbios_anchor_len;
const char *product = "QEMU Virtual Machine";
- if (!fw_cfg) {
+ if (!vms->fw_cfg) {
return;
}
@@ -1187,20 +1153,21 @@ static void virt_build_smbios(VirtGuestInfo *guest_info)
&smbios_anchor, &smbios_anchor_len);
if (smbios_anchor) {
- fw_cfg_add_file(fw_cfg, "etc/smbios/smbios-tables",
+ fw_cfg_add_file(vms->fw_cfg, "etc/smbios/smbios-tables",
smbios_tables, smbios_tables_len);
- fw_cfg_add_file(fw_cfg, "etc/smbios/smbios-anchor",
+ fw_cfg_add_file(vms->fw_cfg, "etc/smbios/smbios-anchor",
smbios_anchor, smbios_anchor_len);
}
}
static
-void virt_guest_info_machine_done(Notifier *notifier, void *data)
+void virt_machine_done(Notifier *notifier, void *data)
{
- VirtGuestInfoState *guest_info_state = container_of(notifier,
- VirtGuestInfoState, machine_done);
- virt_acpi_setup(&guest_info_state->info);
- virt_build_smbios(&guest_info_state->info);
+ VirtMachineState *vms = container_of(notifier, VirtMachineState,
+ machine_done);
+
+ virt_acpi_setup(vms);
+ virt_build_smbios(vms);
}
static void machvirt_init(MachineState *machine)
@@ -1210,13 +1177,9 @@ static void machvirt_init(MachineState *machine)
qemu_irq pic[NUM_IRQS];
MemoryRegion *sysmem = get_system_memory();
MemoryRegion *secure_sysmem = NULL;
- int gic_version = vms->gic_version;
int n, virt_max_cpus;
MemoryRegion *ram = g_new(MemoryRegion, 1);
const char *cpu_model = machine->cpu_model;
- VirtBoardInfo *vbi;
- VirtGuestInfoState *guest_info_state = g_malloc0(sizeof *guest_info_state);
- VirtGuestInfo *guest_info = &guest_info_state->info;
char **cpustr;
ObjectClass *oc;
const char *typename;
@@ -1232,14 +1195,14 @@ static void machvirt_init(MachineState *machine)
/* We can probe only here because during property set
* KVM is not available yet
*/
- if (!gic_version) {
+ if (!vms->gic_version) {
if (!kvm_enabled()) {
error_report("gic-version=host requires KVM");
exit(1);
}
- gic_version = kvm_arm_vgic_probe();
- if (!gic_version) {
+ vms->gic_version = kvm_arm_vgic_probe();
+ if (!vms->gic_version) {
error_report("Unable to determine GIC version supported by host");
exit(1);
}
@@ -1248,9 +1211,7 @@ static void machvirt_init(MachineState *machine)
/* Separate the actual CPU model name from any appended features */
cpustr = g_strsplit(cpu_model, ",", 2);
- vbi = find_machine_info(cpustr[0]);
-
- if (!vbi) {
+ if (!cpuname_valid(cpustr[0])) {
error_report("mach-virt: CPU %s not supported", cpustr[0]);
exit(1);
}
@@ -1262,13 +1223,13 @@ static void machvirt_init(MachineState *machine)
* let the boot ROM sort them out.
* The usual case is that we do use QEMU's PSCI implementation.
*/
- vbi->using_psci = !(vms->secure && firmware_loaded);
+ vms->using_psci = !(vms->secure && firmware_loaded);
/* The maximum number of CPUs depends on the GIC version, or on how
* many redistributors we can fit into the memory map.
*/
- if (gic_version == 3) {
- virt_max_cpus = vbi->memmap[VIRT_GIC_REDIST].size / 0x20000;
+ if (vms->gic_version == 3) {
+ virt_max_cpus = vms->memmap[VIRT_GIC_REDIST].size / 0x20000;
clustersz = GICV3_TARGETLIST_BITS;
} else {
virt_max_cpus = GIC_NCPU;
@@ -1282,9 +1243,9 @@ static void machvirt_init(MachineState *machine)
exit(1);
}
- vbi->smp_cpus = smp_cpus;
+ vms->smp_cpus = smp_cpus;
- if (machine->ram_size > vbi->memmap[VIRT_MEM].size) {
+ if (machine->ram_size > vms->memmap[VIRT_MEM].size) {
error_report("mach-virt: cannot model more than %dGB RAM", RAMLIMIT_GB);
exit(1);
}
@@ -1306,7 +1267,7 @@ static void machvirt_init(MachineState *machine)
memory_region_add_subregion_overlap(secure_sysmem, 0, sysmem, -1);
}
- create_fdt(vbi);
+ create_fdt(vms);
oc = cpu_class_by_name(TYPE_ARM_CPU, cpustr[0]);
if (!oc) {
@@ -1345,7 +1306,7 @@ static void machvirt_init(MachineState *machine)
object_property_set_bool(cpuobj, false, "has_el3", NULL);
}
- if (vbi->using_psci) {
+ if (vms->using_psci) {
object_property_set_int(cpuobj, QEMU_PSCI_CONDUIT_HVC,
"psci-conduit", NULL);
@@ -1361,7 +1322,7 @@ static void machvirt_init(MachineState *machine)
}
if (object_property_find(cpuobj, "reset-cbar", NULL)) {
- object_property_set_int(cpuobj, vbi->memmap[VIRT_CPUPERIPHS].base,
+ object_property_set_int(cpuobj, vms->memmap[VIRT_CPUPERIPHS].base,
"reset-cbar", &error_abort);
}
@@ -1374,62 +1335,55 @@ static void machvirt_init(MachineState *machine)
object_property_set_bool(cpuobj, true, "realized", NULL);
}
- fdt_add_timer_nodes(vbi, gic_version);
- fdt_add_cpu_nodes(vbi);
- fdt_add_psci_node(vbi);
+ fdt_add_timer_nodes(vms);
+ fdt_add_cpu_nodes(vms);
+ fdt_add_psci_node(vms);
memory_region_allocate_system_memory(ram, NULL, "mach-virt.ram",
machine->ram_size);
- memory_region_add_subregion(sysmem, vbi->memmap[VIRT_MEM].base, ram);
+ memory_region_add_subregion(sysmem, vms->memmap[VIRT_MEM].base, ram);
- create_flash(vbi, sysmem, secure_sysmem ? secure_sysmem : sysmem);
+ create_flash(vms, sysmem, secure_sysmem ? secure_sysmem : sysmem);
- create_gic(vbi, pic, gic_version, vms->secure, vmc->no_its);
+ create_gic(vms, pic);
- fdt_add_pmu_nodes(vbi, gic_version);
+ fdt_add_pmu_nodes(vms);
- create_uart(vbi, pic, VIRT_UART, sysmem, serial_hds[0]);
+ create_uart(vms, pic, VIRT_UART, sysmem, serial_hds[0]);
if (vms->secure) {
- create_secure_ram(vbi, secure_sysmem);
- create_uart(vbi, pic, VIRT_SECURE_UART, secure_sysmem, serial_hds[1]);
+ create_secure_ram(vms, secure_sysmem);
+ create_uart(vms, pic, VIRT_SECURE_UART, secure_sysmem, serial_hds[1]);
}
- create_rtc(vbi, pic);
+ create_rtc(vms, pic);
- create_pcie(vbi, pic, vms->highmem);
+ create_pcie(vms, pic);
- create_gpio(vbi, pic);
+ create_gpio(vms, pic);
/* Create mmio transports, so the user can create virtio backends
* (which will be automatically plugged in to the transports). If
* no backend is created the transport will just sit harmlessly idle.
*/
- create_virtio_devices(vbi, pic);
-
- create_fw_cfg(vbi, &address_space_memory);
- rom_set_fw(fw_cfg_find());
-
- guest_info->smp_cpus = smp_cpus;
- guest_info->fw_cfg = fw_cfg_find();
- guest_info->memmap = vbi->memmap;
- guest_info->irqmap = vbi->irqmap;
- guest_info->use_highmem = vms->highmem;
- guest_info->gic_version = gic_version;
- guest_info->no_its = vmc->no_its;
- guest_info_state->machine_done.notify = virt_guest_info_machine_done;
- qemu_add_machine_init_done_notifier(&guest_info_state->machine_done);
-
- vbi->bootinfo.ram_size = machine->ram_size;
- vbi->bootinfo.kernel_filename = machine->kernel_filename;
- vbi->bootinfo.kernel_cmdline = machine->kernel_cmdline;
- vbi->bootinfo.initrd_filename = machine->initrd_filename;
- vbi->bootinfo.nb_cpus = smp_cpus;
- vbi->bootinfo.board_id = -1;
- vbi->bootinfo.loader_start = vbi->memmap[VIRT_MEM].base;
- vbi->bootinfo.get_dtb = machvirt_dtb;
- vbi->bootinfo.firmware_loaded = firmware_loaded;
- arm_load_kernel(ARM_CPU(first_cpu), &vbi->bootinfo);
+ create_virtio_devices(vms, pic);
+
+ vms->fw_cfg = create_fw_cfg(vms, &address_space_memory);
+ rom_set_fw(vms->fw_cfg);
+
+ vms->machine_done.notify = virt_machine_done;
+ qemu_add_machine_init_done_notifier(&vms->machine_done);
+
+ vms->bootinfo.ram_size = machine->ram_size;
+ vms->bootinfo.kernel_filename = machine->kernel_filename;
+ vms->bootinfo.kernel_cmdline = machine->kernel_cmdline;
+ vms->bootinfo.initrd_filename = machine->initrd_filename;
+ vms->bootinfo.nb_cpus = smp_cpus;
+ vms->bootinfo.board_id = -1;
+ vms->bootinfo.loader_start = vms->memmap[VIRT_MEM].base;
+ vms->bootinfo.get_dtb = machvirt_dtb;
+ vms->bootinfo.firmware_loaded = firmware_loaded;
+ arm_load_kernel(ARM_CPU(first_cpu), &vms->bootinfo);
/*
* arm_load_kernel machine init done notifier registration must
@@ -1437,7 +1391,7 @@ static void machvirt_init(MachineState *machine)
* another notifier is registered which adds platform bus nodes.
* Notifiers are executed in registration reverse order.
*/
- create_platform_bus(vbi, pic);
+ create_platform_bus(vms, pic);
}
static bool virt_get_secure(Object *obj, Error **errp)
@@ -1556,6 +1510,9 @@ static void virt_2_9_instance_init(Object *obj)
object_property_set_description(obj, "gic-version",
"Set GIC version. "
"Valid values are 2, 3 and host", NULL);
+
+ vms->memmap = a15memmap;
+ vms->irqmap = a15irqmap;
}
static void virt_machine_2_9_options(MachineClass *mc)
@@ -1573,8 +1530,14 @@ static void virt_2_8_instance_init(Object *obj)
static void virt_machine_2_8_options(MachineClass *mc)
{
+ VirtMachineClass *vmc = VIRT_MACHINE_CLASS(OBJECT_CLASS(mc));
+
virt_machine_2_9_options(mc);
SET_MACHINE_COMPAT(mc, VIRT_COMPAT_2_8);
+ /* For 2.8 and earlier we falsely claimed in the DT that
+ * our timers were edge-triggered, not level-triggered.
+ */
+ vmc->claim_edge_triggered_timers = true;
}
DEFINE_VIRT_MACHINE(2, 8)
diff --git a/hw/arm/z2.c b/hw/arm/z2.c
index b3a6bbd210..1607cbdb03 100644
--- a/hw/arm/z2.c
+++ b/hw/arm/z2.c
@@ -220,7 +220,7 @@ static int aer915_send(I2CSlave *i2c, uint8_t data)
return 0;
}
-static void aer915_event(I2CSlave *i2c, enum i2c_event event)
+static int aer915_event(I2CSlave *i2c, enum i2c_event event)
{
AER915State *s = AER915(i2c);
@@ -238,6 +238,8 @@ static void aer915_event(I2CSlave *i2c, enum i2c_event event)
default:
break;
}
+
+ return 0;
}
static int aer915_recv(I2CSlave *slave)
diff --git a/hw/audio/wm8750.c b/hw/audio/wm8750.c
index 0c6500e96a..f8b5bebfc2 100644
--- a/hw/audio/wm8750.c
+++ b/hw/audio/wm8750.c
@@ -303,7 +303,7 @@ static void wm8750_reset(I2CSlave *i2c)
s->i2c_len = 0;
}
-static void wm8750_event(I2CSlave *i2c, enum i2c_event event)
+static int wm8750_event(I2CSlave *i2c, enum i2c_event event)
{
WM8750State *s = WM8750(i2c);
@@ -321,6 +321,8 @@ static void wm8750_event(I2CSlave *i2c, enum i2c_event event)
default:
break;
}
+
+ return 0;
}
#define WM8750_LINVOL 0x00
diff --git a/hw/block/m25p80.c b/hw/block/m25p80.c
index e3c1166ea6..4c5f8c3590 100644
--- a/hw/block/m25p80.c
+++ b/hw/block/m25p80.c
@@ -28,6 +28,7 @@
#include "hw/ssi/ssi.h"
#include "qemu/bitops.h"
#include "qemu/log.h"
+#include "qemu/error-report.h"
#include "qapi/error.h"
#ifndef M25P80_ERR_DEBUG
@@ -377,6 +378,8 @@ typedef enum {
MAN_GENERIC,
} Manufacturer;
+#define M25P80_INTERNAL_DATA_BUFFER_SZ 16
+
typedef struct Flash {
SSISlave parent_obj;
@@ -387,7 +390,7 @@ typedef struct Flash {
int page_size;
uint8_t state;
- uint8_t data[16];
+ uint8_t data[M25P80_INTERNAL_DATA_BUFFER_SZ];
uint32_t len;
uint32_t pos;
uint8_t needed_bytes;
@@ -1115,6 +1118,17 @@ static uint32_t m25p80_transfer8(SSISlave *ss, uint32_t tx)
case STATE_COLLECTING_DATA:
case STATE_COLLECTING_VAR_LEN_DATA:
+
+ if (s->len >= M25P80_INTERNAL_DATA_BUFFER_SZ) {
+ qemu_log_mask(LOG_GUEST_ERROR,
+ "M25P80: Write overrun internal data buffer. "
+ "SPI controller (QEMU emulator or guest driver) "
+ "is misbehaving\n");
+ s->len = s->pos = 0;
+ s->state = STATE_IDLE;
+ break;
+ }
+
s->data[s->len] = (uint8_t)tx;
s->len++;
@@ -1124,6 +1138,17 @@ static uint32_t m25p80_transfer8(SSISlave *ss, uint32_t tx)
break;
case STATE_READING_DATA:
+
+ if (s->pos >= M25P80_INTERNAL_DATA_BUFFER_SZ) {
+ qemu_log_mask(LOG_GUEST_ERROR,
+ "M25P80: Read overrun internal data buffer. "
+ "SPI controller (QEMU emulator or guest driver) "
+ "is misbehaving\n");
+ s->len = s->pos = 0;
+ s->state = STATE_IDLE;
+ break;
+ }
+
r = s->data[s->pos];
s->pos++;
if (s->pos == s->len) {
@@ -1196,7 +1221,7 @@ static const VMStateDescription vmstate_m25p80 = {
.pre_save = m25p80_pre_save,
.fields = (VMStateField[]) {
VMSTATE_UINT8(state, Flash),
- VMSTATE_UINT8_ARRAY(data, Flash, 16),
+ VMSTATE_UINT8_ARRAY(data, Flash, M25P80_INTERNAL_DATA_BUFFER_SZ),
VMSTATE_UINT32(len, Flash),
VMSTATE_UINT32(pos, Flash),
VMSTATE_UINT8(needed_bytes, Flash),
diff --git a/hw/block/virtio-blk.c b/hw/block/virtio-blk.c
index 50bb0cbb93..702eda863e 100644
--- a/hw/block/virtio-blk.c
+++ b/hw/block/virtio-blk.c
@@ -863,7 +863,7 @@ static int virtio_blk_load_device(VirtIODevice *vdev, QEMUFile *f,
}
}
- req = qemu_get_virtqueue_element(f, sizeof(VirtIOBlockReq));
+ req = qemu_get_virtqueue_element(vdev, f, sizeof(VirtIOBlockReq));
virtio_blk_init_request(s, virtio_get_queue(vdev, vq_idx), req);
req->next = s->rq;
s->rq = req;
diff --git a/hw/char/exynos4210_uart.c b/hw/char/exynos4210_uart.c
index 571c324004..820d1abeb9 100644
--- a/hw/char/exynos4210_uart.c
+++ b/hw/char/exynos4210_uart.c
@@ -629,22 +629,26 @@ DeviceState *exynos4210_uart_create(hwaddr addr,
return dev;
}
-static int exynos4210_uart_init(SysBusDevice *dev)
+static void exynos4210_uart_init(Object *obj)
{
+ SysBusDevice *dev = SYS_BUS_DEVICE(obj);
Exynos4210UartState *s = EXYNOS4210_UART(dev);
/* memory mapping */
- memory_region_init_io(&s->iomem, OBJECT(s), &exynos4210_uart_ops, s,
+ memory_region_init_io(&s->iomem, obj, &exynos4210_uart_ops, s,
"exynos4210.uart", EXYNOS4210_UART_REGS_MEM_SIZE);
sysbus_init_mmio(dev, &s->iomem);
sysbus_init_irq(dev, &s->irq);
+}
+
+static void exynos4210_uart_realize(DeviceState *dev, Error **errp)
+{
+ Exynos4210UartState *s = EXYNOS4210_UART(dev);
qemu_chr_fe_set_handlers(&s->chr, exynos4210_uart_can_receive,
exynos4210_uart_receive, exynos4210_uart_event,
s, NULL, true);
-
- return 0;
}
static Property exynos4210_uart_properties[] = {
@@ -658,9 +662,8 @@ static Property exynos4210_uart_properties[] = {
static void exynos4210_uart_class_init(ObjectClass *klass, void *data)
{
DeviceClass *dc = DEVICE_CLASS(klass);
- SysBusDeviceClass *k = SYS_BUS_DEVICE_CLASS(klass);
- k->init = exynos4210_uart_init;
+ dc->realize = exynos4210_uart_realize;
dc->reset = exynos4210_uart_reset;
dc->props = exynos4210_uart_properties;
dc->vmsd = &vmstate_exynos4210_uart;
@@ -670,6 +673,7 @@ static const TypeInfo exynos4210_uart_info = {
.name = TYPE_EXYNOS4210_UART,
.parent = TYPE_SYS_BUS_DEVICE,
.instance_size = sizeof(Exynos4210UartState),
+ .instance_init = exynos4210_uart_init,
.class_init = exynos4210_uart_class_init,
};
diff --git a/hw/char/virtio-serial-bus.c b/hw/char/virtio-serial-bus.c
index 7975c2cda1..d544cd91c0 100644
--- a/hw/char/virtio-serial-bus.c
+++ b/hw/char/virtio-serial-bus.c
@@ -732,6 +732,7 @@ static void virtio_serial_post_load_timer_cb(void *opaque)
static int fetch_active_ports_list(QEMUFile *f,
VirtIOSerial *s, uint32_t nr_active_ports)
{
+ VirtIODevice *vdev = VIRTIO_DEVICE(s);
uint32_t i;
s->post_load = g_malloc0(sizeof(*s->post_load));
@@ -765,7 +766,7 @@ static int fetch_active_ports_list(QEMUFile *f,
qemu_get_be64s(f, &port->iov_offset);
port->elem =
- qemu_get_virtqueue_element(f, sizeof(VirtQueueElement));
+ qemu_get_virtqueue_element(vdev, f, sizeof(VirtQueueElement));
/*
* Port was throttled on source machine. Let's
diff --git a/hw/display/ssd0303.c b/hw/display/ssd0303.c
index d3017563f3..68a80b9d64 100644
--- a/hw/display/ssd0303.c
+++ b/hw/display/ssd0303.c
@@ -179,7 +179,7 @@ static int ssd0303_send(I2CSlave *i2c, uint8_t data)
return 0;
}
-static void ssd0303_event(I2CSlave *i2c, enum i2c_event event)
+static int ssd0303_event(I2CSlave *i2c, enum i2c_event event)
{
ssd0303_state *s = SSD0303(i2c);
@@ -193,6 +193,8 @@ static void ssd0303_event(I2CSlave *i2c, enum i2c_event event)
/* Nothing to do. */
break;
}
+
+ return 0;
}
static void ssd0303_update_display(void *opaque)
diff --git a/hw/gpio/max7310.c b/hw/gpio/max7310.c
index 1bd5eaf911..f82e3e6555 100644
--- a/hw/gpio/max7310.c
+++ b/hw/gpio/max7310.c
@@ -129,7 +129,7 @@ static int max7310_tx(I2CSlave *i2c, uint8_t data)
return 0;
}
-static void max7310_event(I2CSlave *i2c, enum i2c_event event)
+static int max7310_event(I2CSlave *i2c, enum i2c_event event)
{
MAX7310State *s = MAX7310(i2c);
s->len = 0;
@@ -147,6 +147,8 @@ static void max7310_event(I2CSlave *i2c, enum i2c_event event)
default:
break;
}
+
+ return 0;
}
static const VMStateDescription vmstate_max7310 = {
diff --git a/hw/i2c/core.c b/hw/i2c/core.c
index e40781ea3b..2c1234cdff 100644
--- a/hw/i2c/core.c
+++ b/hw/i2c/core.c
@@ -88,18 +88,26 @@ int i2c_bus_busy(I2CBus *bus)
return !QLIST_EMPTY(&bus->current_devs);
}
+/* TODO: Make this handle multiple masters. */
/*
- * Returns non-zero if the address is not valid. If this is called
- * again without an intervening i2c_end_transfer(), like in the SMBus
- * case where the operation is switched from write to read, this
- * function will not rescan the bus and thus cannot fail.
+ * Start or continue an i2c transaction. When this is called for the
+ * first time or after an i2c_end_transfer(), if it returns an error
+ * the bus transaction is terminated (or really never started). If
+ * this is called after another i2c_start_transfer() without an
+ * intervening i2c_end_transfer(), and it returns an error, the
+ * transaction will not be terminated. The caller must do it.
+ *
+ * This corresponds with the way real hardware works. The SMBus
+ * protocol uses a start transfer to switch from write to read mode
+ * without releasing the bus. If that fails, the bus is still
+ * in a transaction.
*/
-/* TODO: Make this handle multiple masters. */
int i2c_start_transfer(I2CBus *bus, uint8_t address, int recv)
{
BusChild *kid;
I2CSlaveClass *sc;
I2CNode *node;
+ bool bus_scanned = false;
if (address == I2C_BROADCAST) {
/*
@@ -130,6 +138,7 @@ int i2c_start_transfer(I2CBus *bus, uint8_t address, int recv)
}
}
}
+ bus_scanned = true;
}
if (QLIST_EMPTY(&bus->current_devs)) {
@@ -137,11 +146,21 @@ int i2c_start_transfer(I2CBus *bus, uint8_t address, int recv)
}
QLIST_FOREACH(node, &bus->current_devs, next) {
+ int rv;
+
sc = I2C_SLAVE_GET_CLASS(node->elt);
/* If the bus is already busy, assume this is a repeated
start condition. */
+
if (sc->event) {
- sc->event(node->elt, recv ? I2C_START_RECV : I2C_START_SEND);
+ rv = sc->event(node->elt, recv ? I2C_START_RECV : I2C_START_SEND);
+ if (rv && !bus->broadcast) {
+ if (bus_scanned) {
+ /* First call, terminate the transfer. */
+ i2c_end_transfer(bus);
+ }
+ return rv;
+ }
}
}
return 0;
diff --git a/hw/i2c/i2c-ddc.c b/hw/i2c/i2c-ddc.c
index 1227212934..66899d7233 100644
--- a/hw/i2c/i2c-ddc.c
+++ b/hw/i2c/i2c-ddc.c
@@ -230,13 +230,15 @@ static void i2c_ddc_reset(DeviceState *ds)
s->reg = 0;
}
-static void i2c_ddc_event(I2CSlave *i2c, enum i2c_event event)
+static int i2c_ddc_event(I2CSlave *i2c, enum i2c_event event)
{
I2CDDCState *s = I2CDDC(i2c);
if (event == I2C_START_SEND) {
s->firstbyte = true;
}
+
+ return 0;
}
static int i2c_ddc_rx(I2CSlave *i2c)
diff --git a/hw/i2c/smbus.c b/hw/i2c/smbus.c
index 5b4dd3eba4..2d1b79a689 100644
--- a/hw/i2c/smbus.c
+++ b/hw/i2c/smbus.c
@@ -67,7 +67,7 @@ static void smbus_do_write(SMBusDevice *dev)
}
}
-static void smbus_i2c_event(I2CSlave *s, enum i2c_event event)
+static int smbus_i2c_event(I2CSlave *s, enum i2c_event event)
{
SMBusDevice *dev = SMBUS_DEVICE(s);
@@ -148,6 +148,8 @@ static void smbus_i2c_event(I2CSlave *s, enum i2c_event event)
break;
}
}
+
+ return 0;
}
static int smbus_i2c_recv(I2CSlave *s)
@@ -249,7 +251,8 @@ int smbus_read_byte(I2CBus *bus, uint8_t addr, uint8_t command)
}
i2c_send(bus, command);
if (i2c_start_transfer(bus, addr, 1)) {
- assert(0);
+ i2c_end_transfer(bus);
+ return -1;
}
data = i2c_recv(bus);
i2c_nack(bus);
@@ -276,7 +279,8 @@ int smbus_read_word(I2CBus *bus, uint8_t addr, uint8_t command)
}
i2c_send(bus, command);
if (i2c_start_transfer(bus, addr, 1)) {
- assert(0);
+ i2c_end_transfer(bus);
+ return -1;
}
data = i2c_recv(bus);
data |= i2c_recv(bus) << 8;
@@ -307,7 +311,8 @@ int smbus_read_block(I2CBus *bus, uint8_t addr, uint8_t command, uint8_t *data)
}
i2c_send(bus, command);
if (i2c_start_transfer(bus, addr, 1)) {
- assert(0);
+ i2c_end_transfer(bus);
+ return -1;
}
len = i2c_recv(bus);
if (len > 32) {
diff --git a/hw/i386/acpi-build.c b/hw/i386/acpi-build.c
index 42ecf619d5..0c8912fd86 100644
--- a/hw/i386/acpi-build.c
+++ b/hw/i386/acpi-build.c
@@ -101,8 +101,6 @@ typedef struct AcpiPmInfo {
uint32_t gpe0_blk_len;
uint32_t io_base;
uint16_t cpu_hp_io_base;
- uint16_t mem_hp_io_base;
- uint16_t mem_hp_io_len;
uint16_t pcihp_io_base;
uint16_t pcihp_io_len;
} AcpiPmInfo;
@@ -148,9 +146,6 @@ static void acpi_get_pm_info(AcpiPmInfo *pm)
}
assert(obj);
- pm->mem_hp_io_base = ACPI_MEMORY_HOTPLUG_BASE;
- pm->mem_hp_io_len = ACPI_MEMORY_HOTPLUG_IO_LEN;
-
/* Fill in optional s3/s4 related properties */
o = object_property_get_qobject(obj, ACPI_PM_PROP_S3_DISABLED, NULL);
if (o) {
@@ -1038,130 +1033,6 @@ static Aml *build_crs(PCIHostState *host, CrsRangeSet *range_set)
return crs;
}
-static void build_memory_devices(Aml *sb_scope, int nr_mem,
- uint16_t io_base, uint16_t io_len)
-{
- int i;
- Aml *scope;
- Aml *crs;
- Aml *field;
- Aml *dev;
- Aml *method;
- Aml *ifctx;
-
- /* build memory devices */
- assert(nr_mem <= ACPI_MAX_RAM_SLOTS);
- scope = aml_scope("\\_SB.PCI0." MEMORY_HOTPLUG_DEVICE);
- aml_append(scope,
- aml_name_decl(MEMORY_SLOTS_NUMBER, aml_int(nr_mem))
- );
-
- crs = aml_resource_template();
- aml_append(crs,
- aml_io(AML_DECODE16, io_base, io_base, 0, io_len)
- );
- aml_append(scope, aml_name_decl("_CRS", crs));
-
- aml_append(scope, aml_operation_region(
- MEMORY_HOTPLUG_IO_REGION, AML_SYSTEM_IO,
- aml_int(io_base), io_len)
- );
-
- field = aml_field(MEMORY_HOTPLUG_IO_REGION, AML_DWORD_ACC,
- AML_NOLOCK, AML_PRESERVE);
- aml_append(field, /* read only */
- aml_named_field(MEMORY_SLOT_ADDR_LOW, 32));
- aml_append(field, /* read only */
- aml_named_field(MEMORY_SLOT_ADDR_HIGH, 32));
- aml_append(field, /* read only */
- aml_named_field(MEMORY_SLOT_SIZE_LOW, 32));
- aml_append(field, /* read only */
- aml_named_field(MEMORY_SLOT_SIZE_HIGH, 32));
- aml_append(field, /* read only */
- aml_named_field(MEMORY_SLOT_PROXIMITY, 32));
- aml_append(scope, field);
-
- field = aml_field(MEMORY_HOTPLUG_IO_REGION, AML_BYTE_ACC,
- AML_NOLOCK, AML_WRITE_AS_ZEROS);
- aml_append(field, aml_reserved_field(160 /* bits, Offset(20) */));
- aml_append(field, /* 1 if enabled, read only */
- aml_named_field(MEMORY_SLOT_ENABLED, 1));
- aml_append(field,
- /*(read) 1 if has a insert event. (write) 1 to clear event */
- aml_named_field(MEMORY_SLOT_INSERT_EVENT, 1));
- aml_append(field,
- /* (read) 1 if has a remove event. (write) 1 to clear event */
- aml_named_field(MEMORY_SLOT_REMOVE_EVENT, 1));
- aml_append(field,
- /* initiates device eject, write only */
- aml_named_field(MEMORY_SLOT_EJECT, 1));
- aml_append(scope, field);
-
- field = aml_field(MEMORY_HOTPLUG_IO_REGION, AML_DWORD_ACC,
- AML_NOLOCK, AML_PRESERVE);
- aml_append(field, /* DIMM selector, write only */
- aml_named_field(MEMORY_SLOT_SLECTOR, 32));
- aml_append(field, /* _OST event code, write only */
- aml_named_field(MEMORY_SLOT_OST_EVENT, 32));
- aml_append(field, /* _OST status code, write only */
- aml_named_field(MEMORY_SLOT_OST_STATUS, 32));
- aml_append(scope, field);
- aml_append(sb_scope, scope);
-
- for (i = 0; i < nr_mem; i++) {
- #define BASEPATH "\\_SB.PCI0." MEMORY_HOTPLUG_DEVICE "."
- const char *s;
-
- dev = aml_device("MP%02X", i);
- aml_append(dev, aml_name_decl("_UID", aml_string("0x%02X", i)));
- aml_append(dev, aml_name_decl("_HID", aml_eisaid("PNP0C80")));
-
- method = aml_method("_CRS", 0, AML_NOTSERIALIZED);
- s = BASEPATH MEMORY_SLOT_CRS_METHOD;
- aml_append(method, aml_return(aml_call1(s, aml_name("_UID"))));
- aml_append(dev, method);
-
- method = aml_method("_STA", 0, AML_NOTSERIALIZED);
- s = BASEPATH MEMORY_SLOT_STATUS_METHOD;
- aml_append(method, aml_return(aml_call1(s, aml_name("_UID"))));
- aml_append(dev, method);
-
- method = aml_method("_PXM", 0, AML_NOTSERIALIZED);
- s = BASEPATH MEMORY_SLOT_PROXIMITY_METHOD;
- aml_append(method, aml_return(aml_call1(s, aml_name("_UID"))));
- aml_append(dev, method);
-
- method = aml_method("_OST", 3, AML_NOTSERIALIZED);
- s = BASEPATH MEMORY_SLOT_OST_METHOD;
-
- aml_append(method, aml_return(aml_call4(
- s, aml_name("_UID"), aml_arg(0), aml_arg(1), aml_arg(2)
- )));
- aml_append(dev, method);
-
- method = aml_method("_EJ0", 1, AML_NOTSERIALIZED);
- s = BASEPATH MEMORY_SLOT_EJECT_METHOD;
- aml_append(method, aml_return(aml_call2(
- s, aml_name("_UID"), aml_arg(0))));
- aml_append(dev, method);
-
- aml_append(sb_scope, dev);
- }
-
- /* build Method(MEMORY_SLOT_NOTIFY_METHOD, 2) {
- * If (LEqual(Arg0, 0x00)) {Notify(MP00, Arg1)} ... }
- */
- method = aml_method(MEMORY_SLOT_NOTIFY_METHOD, 2, AML_NOTSERIALIZED);
- for (i = 0; i < nr_mem; i++) {
- ifctx = aml_if(aml_equal(aml_arg(0), aml_int(i)));
- aml_append(ifctx,
- aml_notify(aml_name("MP%.02X", i), aml_arg(1))
- );
- aml_append(method, ifctx);
- }
- aml_append(sb_scope, method);
-}
-
static void build_hpet_aml(Aml *table)
{
Aml *crs;
@@ -2049,8 +1920,7 @@ build_dsdt(GArray *table_data, BIOSLinker *linker,
build_cpus_aml(dsdt, machine, opts, pm->cpu_hp_io_base,
"\\_SB.PCI0", "\\_GPE._E02");
}
- build_memory_hotplug_aml(dsdt, nr_mem, pm->mem_hp_io_base,
- pm->mem_hp_io_len);
+ build_memory_hotplug_aml(dsdt, nr_mem, "\\_SB.PCI0", "\\_GPE._E03");
scope = aml_scope("_GPE");
{
@@ -2065,10 +1935,6 @@ build_dsdt(GArray *table_data, BIOSLinker *linker,
aml_append(scope, method);
}
- method = aml_method("_E03", 0, AML_NOTSERIALIZED);
- aml_append(method, aml_call0(MEMORY_HOTPLUG_HANDLER_PATH));
- aml_append(scope, method);
-
if (pcms->acpi_nvdimm_state.is_enabled) {
method = aml_method("_E04", 0, AML_NOTSERIALIZED);
aml_append(method, aml_notify(aml_name("\\_SB.NVDR"),
@@ -2321,45 +2187,40 @@ build_dsdt(GArray *table_data, BIOSLinker *linker,
sb_scope = aml_scope("\\_SB");
{
- build_memory_devices(sb_scope, nr_mem, pm->mem_hp_io_base,
- pm->mem_hp_io_len);
+ Object *pci_host;
+ PCIBus *bus = NULL;
- {
- Object *pci_host;
- PCIBus *bus = NULL;
+ pci_host = acpi_get_i386_pci_host();
+ if (pci_host) {
+ bus = PCI_HOST_BRIDGE(pci_host)->bus;
+ }
- pci_host = acpi_get_i386_pci_host();
- if (pci_host) {
- bus = PCI_HOST_BRIDGE(pci_host)->bus;
+ if (bus) {
+ Aml *scope = aml_scope("PCI0");
+ /* Scan all PCI buses. Generate tables to support hotplug. */
+ build_append_pci_bus_devices(scope, bus, pm->pcihp_bridge_en);
+
+ if (misc->tpm_version != TPM_VERSION_UNSPEC) {
+ dev = aml_device("ISA.TPM");
+ aml_append(dev, aml_name_decl("_HID", aml_eisaid("PNP0C31")));
+ aml_append(dev, aml_name_decl("_STA", aml_int(0xF)));
+ crs = aml_resource_template();
+ aml_append(crs, aml_memory32_fixed(TPM_TIS_ADDR_BASE,
+ TPM_TIS_ADDR_SIZE, AML_READ_WRITE));
+ /*
+ FIXME: TPM_TIS_IRQ=5 conflicts with PNP0C0F irqs,
+ Rewrite to take IRQ from TPM device model and
+ fix default IRQ value there to use some unused IRQ
+ */
+ /* aml_append(crs, aml_irq_no_flags(TPM_TIS_IRQ)); */
+ aml_append(dev, aml_name_decl("_CRS", crs));
+ aml_append(scope, dev);
}
- if (bus) {
- Aml *scope = aml_scope("PCI0");
- /* Scan all PCI buses. Generate tables to support hotplug. */
- build_append_pci_bus_devices(scope, bus, pm->pcihp_bridge_en);
-
- if (misc->tpm_version != TPM_VERSION_UNSPEC) {
- dev = aml_device("ISA.TPM");
- aml_append(dev, aml_name_decl("_HID", aml_eisaid("PNP0C31")));
- aml_append(dev, aml_name_decl("_STA", aml_int(0xF)));
- crs = aml_resource_template();
- aml_append(crs, aml_memory32_fixed(TPM_TIS_ADDR_BASE,
- TPM_TIS_ADDR_SIZE, AML_READ_WRITE));
- /*
- FIXME: TPM_TIS_IRQ=5 conflicts with PNP0C0F irqs,
- Rewrite to take IRQ from TPM device model and
- fix default IRQ value there to use some unused IRQ
- */
- /* aml_append(crs, aml_irq_no_flags(TPM_TIS_IRQ)); */
- aml_append(dev, aml_name_decl("_CRS", crs));
- aml_append(scope, dev);
- }
-
- aml_append(sb_scope, scope);
- }
+ aml_append(sb_scope, scope);
}
- aml_append(dsdt, sb_scope);
}
+ aml_append(dsdt, sb_scope);
/* copy AML table into ACPI tables blob and patch header there */
g_array_append_vals(table_data, dsdt->buf->data, dsdt->buf->len);
@@ -2575,6 +2436,7 @@ build_dmar_q35(GArray *table_data, BIOSLinker *linker)
AcpiTableDmar *dmar;
AcpiDmarHardwareUnit *drhd;
+ AcpiDmarRootPortATS *atsr;
uint8_t dmar_flags = 0;
X86IOMMUState *iommu = x86_iommu_get_default();
AcpiDmarDeviceScope *scope = NULL;
@@ -2608,6 +2470,14 @@ build_dmar_q35(GArray *table_data, BIOSLinker *linker)
scope->path[0].device = PCI_SLOT(Q35_PSEUDO_DEVFN_IOAPIC);
scope->path[0].function = PCI_FUNC(Q35_PSEUDO_DEVFN_IOAPIC);
+ if (iommu->dt_supported) {
+ atsr = acpi_data_push(table_data, sizeof(*atsr));
+ atsr->type = cpu_to_le16(ACPI_DMAR_TYPE_ATSR);
+ atsr->length = cpu_to_le16(sizeof(*atsr));
+ atsr->flags = ACPI_DMAR_ATSR_ALL_PORTS;
+ atsr->pci_segment = cpu_to_le16(0);
+ }
+
build_header(linker, table_data, (void *)(table_data->data + dmar_start),
"DMAR", table_data->len - dmar_start, 1, NULL, NULL);
}
diff --git a/hw/i386/amd_iommu.c b/hw/i386/amd_iommu.c
index 47b79d9112..e0732ccaf1 100644
--- a/hw/i386/amd_iommu.c
+++ b/hw/i386/amd_iommu.c
@@ -562,7 +562,7 @@ static void amdvi_mmio_trace(hwaddr addr, unsigned size)
trace_amdvi_mmio_read(amdvi_mmio_high[index], addr, size, addr & ~0x07);
} else {
index = index >= AMDVI_MMIO_REGS_LOW ? AMDVI_MMIO_REGS_LOW : index;
- trace_amdvi_mmio_read(amdvi_mmio_high[index], addr, size, addr & ~0x07);
+ trace_amdvi_mmio_read(amdvi_mmio_low[index], addr, size, addr & ~0x07);
}
}
diff --git a/hw/i386/amd_iommu.h b/hw/i386/amd_iommu.h
index 884926e9e7..0d3dc6a9f2 100644
--- a/hw/i386/amd_iommu.h
+++ b/hw/i386/amd_iommu.h
@@ -49,8 +49,8 @@
#define AMDVI_CAPAB_INIT_TYPE (3 << 16)
/* No. of used MMIO registers */
-#define AMDVI_MMIO_REGS_HIGH 8
-#define AMDVI_MMIO_REGS_LOW 7
+#define AMDVI_MMIO_REGS_HIGH 7
+#define AMDVI_MMIO_REGS_LOW 8
/* MMIO registers */
#define AMDVI_MMIO_DEVICE_TABLE 0x0000
diff --git a/hw/i386/intel_iommu.c b/hw/i386/intel_iommu.c
index 5f3e35123d..ec62239aba 100644
--- a/hw/i386/intel_iommu.c
+++ b/hw/i386/intel_iommu.c
@@ -738,11 +738,18 @@ static int vtd_dev_to_context_entry(IntelIOMMUState *s, uint8_t bus_num,
"context-entry hi 0x%"PRIx64 " lo 0x%"PRIx64,
ce->hi, ce->lo);
return -VTD_FR_CONTEXT_ENTRY_INV;
- } else if (ce->lo & VTD_CONTEXT_ENTRY_TT) {
- VTD_DPRINTF(GENERAL, "error: unsupported Translation Type in "
- "context-entry hi 0x%"PRIx64 " lo 0x%"PRIx64,
- ce->hi, ce->lo);
- return -VTD_FR_CONTEXT_ENTRY_INV;
+ } else {
+ switch (ce->lo & VTD_CONTEXT_ENTRY_TT) {
+ case VTD_CONTEXT_TT_MULTI_LEVEL:
+ /* fall through */
+ case VTD_CONTEXT_TT_DEV_IOTLB:
+ break;
+ default:
+ VTD_DPRINTF(GENERAL, "error: unsupported Translation Type in "
+ "context-entry hi 0x%"PRIx64 " lo 0x%"PRIx64,
+ ce->hi, ce->lo);
+ return -VTD_FR_CONTEXT_ENTRY_INV;
+ }
}
return 0;
}
@@ -1438,7 +1445,61 @@ static bool vtd_process_inv_iec_desc(IntelIOMMUState *s,
vtd_iec_notify_all(s, !inv_desc->iec.granularity,
inv_desc->iec.index,
inv_desc->iec.index_mask);
+ return true;
+}
+static bool vtd_process_device_iotlb_desc(IntelIOMMUState *s,
+ VTDInvDesc *inv_desc)
+{
+ VTDAddressSpace *vtd_dev_as;
+ IOMMUTLBEntry entry;
+ struct VTDBus *vtd_bus;
+ hwaddr addr;
+ uint64_t sz;
+ uint16_t sid;
+ uint8_t devfn;
+ bool size;
+ uint8_t bus_num;
+
+ addr = VTD_INV_DESC_DEVICE_IOTLB_ADDR(inv_desc->hi);
+ sid = VTD_INV_DESC_DEVICE_IOTLB_SID(inv_desc->lo);
+ devfn = sid & 0xff;
+ bus_num = sid >> 8;
+ size = VTD_INV_DESC_DEVICE_IOTLB_SIZE(inv_desc->hi);
+
+ if ((inv_desc->lo & VTD_INV_DESC_DEVICE_IOTLB_RSVD_LO) ||
+ (inv_desc->hi & VTD_INV_DESC_DEVICE_IOTLB_RSVD_HI)) {
+ VTD_DPRINTF(GENERAL, "error: non-zero reserved field in Device "
+ "IOTLB Invalidate Descriptor hi 0x%"PRIx64 " lo 0x%"PRIx64,
+ inv_desc->hi, inv_desc->lo);
+ return false;
+ }
+
+ vtd_bus = vtd_find_as_from_bus_num(s, bus_num);
+ if (!vtd_bus) {
+ goto done;
+ }
+
+ vtd_dev_as = vtd_bus->dev_as[devfn];
+ if (!vtd_dev_as) {
+ goto done;
+ }
+
+ if (size) {
+ sz = 1 << (ctz64(~(addr | (VTD_PAGE_MASK_4K - 1))) + 1);
+ addr &= ~(sz - 1);
+ } else {
+ sz = VTD_PAGE_SIZE;
+ }
+
+ entry.target_as = &vtd_dev_as->as;
+ entry.addr_mask = sz - 1;
+ entry.iova = addr;
+ entry.perm = IOMMU_NONE;
+ entry.translated_addr = 0;
+ memory_region_notify_iommu(entry.target_as->root, entry);
+
+done:
return true;
}
@@ -1490,6 +1551,14 @@ static bool vtd_process_inv_desc(IntelIOMMUState *s)
}
break;
+ case VTD_INV_DESC_DEVICE:
+ VTD_DPRINTF(INV, "Device IOTLB Invalidation Descriptor hi 0x%"PRIx64
+ " lo 0x%"PRIx64, inv_desc.hi, inv_desc.lo);
+ if (!vtd_process_device_iotlb_desc(s, &inv_desc)) {
+ return false;
+ }
+ break;
+
default:
VTD_DPRINTF(GENERAL, "error: unkonw Invalidation Descriptor type "
"hi 0x%"PRIx64 " lo 0x%"PRIx64 " type %"PRIu8,
@@ -1996,7 +2065,27 @@ static void vtd_iommu_notify_flag_changed(MemoryRegion *iommu,
static const VMStateDescription vtd_vmstate = {
.name = "iommu-intel",
- .unmigratable = 1,
+ .version_id = 1,
+ .minimum_version_id = 1,
+ .priority = MIG_PRI_IOMMU,
+ .fields = (VMStateField[]) {
+ VMSTATE_UINT64(root, IntelIOMMUState),
+ VMSTATE_UINT64(intr_root, IntelIOMMUState),
+ VMSTATE_UINT64(iq, IntelIOMMUState),
+ VMSTATE_UINT32(intr_size, IntelIOMMUState),
+ VMSTATE_UINT16(iq_head, IntelIOMMUState),
+ VMSTATE_UINT16(iq_tail, IntelIOMMUState),
+ VMSTATE_UINT16(iq_size, IntelIOMMUState),
+ VMSTATE_UINT16(next_frcd_reg, IntelIOMMUState),
+ VMSTATE_UINT8_ARRAY(csr, IntelIOMMUState, DMAR_REG_SIZE),
+ VMSTATE_UINT8(iq_last_desc_type, IntelIOMMUState),
+ VMSTATE_BOOL(root_extended, IntelIOMMUState),
+ VMSTATE_BOOL(dmar_enabled, IntelIOMMUState),
+ VMSTATE_BOOL(qi_enabled, IntelIOMMUState),
+ VMSTATE_BOOL(intr_enabled, IntelIOMMUState),
+ VMSTATE_BOOL(intr_eime, IntelIOMMUState),
+ VMSTATE_END_OF_LIST()
+ }
};
static const MemoryRegionOps vtd_mem_ops = {
@@ -2324,19 +2413,22 @@ VTDAddressSpace *vtd_find_add_as(IntelIOMMUState *s, PCIBus *bus, int devfn)
uintptr_t key = (uintptr_t)bus;
VTDBus *vtd_bus = g_hash_table_lookup(s->vtd_as_by_busptr, &key);
VTDAddressSpace *vtd_dev_as;
+ char name[128];
if (!vtd_bus) {
+ uintptr_t *new_key = g_malloc(sizeof(*new_key));
+ *new_key = (uintptr_t)bus;
/* No corresponding free() */
vtd_bus = g_malloc0(sizeof(VTDBus) + sizeof(VTDAddressSpace *) * \
X86_IOMMU_PCI_DEVFN_MAX);
vtd_bus->bus = bus;
- key = (uintptr_t)bus;
- g_hash_table_insert(s->vtd_as_by_busptr, &key, vtd_bus);
+ g_hash_table_insert(s->vtd_as_by_busptr, new_key, vtd_bus);
}
vtd_dev_as = vtd_bus->dev_as[devfn];
if (!vtd_dev_as) {
+ snprintf(name, sizeof(name), "intel_iommu_devfn_%d", devfn);
vtd_bus->dev_as[devfn] = vtd_dev_as = g_malloc0(sizeof(VTDAddressSpace));
vtd_dev_as->bus = bus;
@@ -2351,7 +2443,7 @@ VTDAddressSpace *vtd_find_add_as(IntelIOMMUState *s, PCIBus *bus, int devfn)
memory_region_add_subregion(&vtd_dev_as->iommu, VTD_INTERRUPT_ADDR_FIRST,
&vtd_dev_as->iommu_ir);
address_space_init(&vtd_dev_as->as,
- &vtd_dev_as->iommu, "intel_iommu");
+ &vtd_dev_as->iommu, name);
}
return vtd_dev_as;
}
@@ -2392,6 +2484,10 @@ static void vtd_init(IntelIOMMUState *s)
assert(s->intr_eim != ON_OFF_AUTO_AUTO);
}
+ if (x86_iommu->dt_supported) {
+ s->ecap |= VTD_ECAP_DT;
+ }
+
vtd_reset_context_cache(s);
vtd_reset_iotlb(s);
diff --git a/hw/i386/intel_iommu_internal.h b/hw/i386/intel_iommu_internal.h
index 11abfa2233..356f188b73 100644
--- a/hw/i386/intel_iommu_internal.h
+++ b/hw/i386/intel_iommu_internal.h
@@ -183,6 +183,7 @@
/* (offset >> 4) << 8 */
#define VTD_ECAP_IRO (DMAR_IOTLB_REG_OFFSET << 4)
#define VTD_ECAP_QI (1ULL << 1)
+#define VTD_ECAP_DT (1ULL << 2)
/* Interrupt Remapping support */
#define VTD_ECAP_IR (1ULL << 3)
#define VTD_ECAP_EIM (1ULL << 4)
@@ -326,6 +327,7 @@ typedef union VTDInvDesc VTDInvDesc;
#define VTD_INV_DESC_TYPE 0xf
#define VTD_INV_DESC_CC 0x1 /* Context-cache Invalidate Desc */
#define VTD_INV_DESC_IOTLB 0x2
+#define VTD_INV_DESC_DEVICE 0x3
#define VTD_INV_DESC_IEC 0x4 /* Interrupt Entry Cache
Invalidate Descriptor */
#define VTD_INV_DESC_WAIT 0x5 /* Invalidation Wait Descriptor */
@@ -361,6 +363,13 @@ typedef union VTDInvDesc VTDInvDesc;
#define VTD_INV_DESC_IOTLB_RSVD_LO 0xffffffff0000ff00ULL
#define VTD_INV_DESC_IOTLB_RSVD_HI 0xf80ULL
+/* Mask for Device IOTLB Invalidate Descriptor */
+#define VTD_INV_DESC_DEVICE_IOTLB_ADDR(val) ((val) & 0xfffffffffffff000ULL)
+#define VTD_INV_DESC_DEVICE_IOTLB_SIZE(val) ((val) & 0x1)
+#define VTD_INV_DESC_DEVICE_IOTLB_SID(val) (((val) >> 32) & 0xFFFFULL)
+#define VTD_INV_DESC_DEVICE_IOTLB_RSVD_HI 0xffeULL
+#define VTD_INV_DESC_DEVICE_IOTLB_RSVD_LO 0xffff0000ffe0fff8
+
/* Information about page-selective IOTLB invalidate */
struct VTDIOTLBPageInvInfo {
uint16_t domain_id;
@@ -399,8 +408,8 @@ typedef struct VTDRootEntry VTDRootEntry;
#define VTD_CONTEXT_ENTRY_FPD (1ULL << 1) /* Fault Processing Disable */
#define VTD_CONTEXT_ENTRY_TT (3ULL << 2) /* Translation Type */
#define VTD_CONTEXT_TT_MULTI_LEVEL 0
-#define VTD_CONTEXT_TT_DEV_IOTLB 1
-#define VTD_CONTEXT_TT_PASS_THROUGH 2
+#define VTD_CONTEXT_TT_DEV_IOTLB (1ULL << 2)
+#define VTD_CONTEXT_TT_PASS_THROUGH (2ULL << 2)
/* Second Level Page Translation Pointer*/
#define VTD_CONTEXT_ENTRY_SLPTPTR (~0xfffULL)
#define VTD_CONTEXT_ENTRY_RSVD_LO (0xff0ULL | ~VTD_HAW_MASK)
diff --git a/hw/i386/x86-iommu.c b/hw/i386/x86-iommu.c
index 2278af7c32..23dcd3f039 100644
--- a/hw/i386/x86-iommu.c
+++ b/hw/i386/x86-iommu.c
@@ -106,6 +106,18 @@ static void x86_iommu_intremap_prop_set(Object *o, bool value, Error **errp)
s->intr_supported = value;
}
+static bool x86_iommu_device_iotlb_prop_get(Object *o, Error **errp)
+{
+ X86IOMMUState *s = X86_IOMMU_DEVICE(o);
+ return s->dt_supported;
+}
+
+static void x86_iommu_device_iotlb_prop_set(Object *o, bool value, Error **errp)
+{
+ X86IOMMUState *s = X86_IOMMU_DEVICE(o);
+ s->dt_supported = value;
+}
+
static void x86_iommu_instance_init(Object *o)
{
X86IOMMUState *s = X86_IOMMU_DEVICE(o);
@@ -114,6 +126,11 @@ static void x86_iommu_instance_init(Object *o)
s->intr_supported = false;
object_property_add_bool(o, "intremap", x86_iommu_intremap_prop_get,
x86_iommu_intremap_prop_set, NULL);
+ s->dt_supported = false;
+ object_property_add_bool(o, "device-iotlb",
+ x86_iommu_device_iotlb_prop_get,
+ x86_iommu_device_iotlb_prop_set,
+ NULL);
}
static const TypeInfo x86_iommu_info = {
diff --git a/hw/input/lm832x.c b/hw/input/lm832x.c
index 539682cac8..2340523da0 100644
--- a/hw/input/lm832x.c
+++ b/hw/input/lm832x.c
@@ -383,7 +383,7 @@ static void lm_kbd_write(LM823KbdState *s, int reg, int byte, uint8_t value)
}
}
-static void lm_i2c_event(I2CSlave *i2c, enum i2c_event event)
+static int lm_i2c_event(I2CSlave *i2c, enum i2c_event event)
{
LM823KbdState *s = LM8323(i2c);
@@ -397,6 +397,8 @@ static void lm_i2c_event(I2CSlave *i2c, enum i2c_event event)
default:
break;
}
+
+ return 0;
}
static int lm_i2c_rx(I2CSlave *i2c)
diff --git a/hw/misc/tmp105.c b/hw/misc/tmp105.c
index f5c2472b5b..04e83787d4 100644
--- a/hw/misc/tmp105.c
+++ b/hw/misc/tmp105.c
@@ -176,7 +176,7 @@ static int tmp105_tx(I2CSlave *i2c, uint8_t data)
return 0;
}
-static void tmp105_event(I2CSlave *i2c, enum i2c_event event)
+static int tmp105_event(I2CSlave *i2c, enum i2c_event event)
{
TMP105State *s = TMP105(i2c);
@@ -185,6 +185,7 @@ static void tmp105_event(I2CSlave *i2c, enum i2c_event event)
}
s->len = 0;
+ return 0;
}
static int tmp105_post_load(void *opaque, int version_id)
diff --git a/hw/net/e1000e.c b/hw/net/e1000e.c
index 4994e1ca00..77a4b3e5bf 100644
--- a/hw/net/e1000e.c
+++ b/hw/net/e1000e.c
@@ -472,7 +472,8 @@ static void e1000e_pci_realize(PCIDevice *pci_dev, Error **errp)
hw_error("Failed to initialize PM capability");
}
- if (pcie_aer_init(pci_dev, e1000e_aer_offset, PCI_ERR_SIZEOF) < 0) {
+ if (pcie_aer_init(pci_dev, PCI_ERR_VER, e1000e_aer_offset,
+ PCI_ERR_SIZEOF, NULL) < 0) {
hw_error("Failed to initialize AER capability");
}
diff --git a/hw/net/fsl_etsec/rings.c b/hw/net/fsl_etsec/rings.c
index 54c01275d4..d0f93eebfc 100644
--- a/hw/net/fsl_etsec/rings.c
+++ b/hw/net/fsl_etsec/rings.c
@@ -358,25 +358,24 @@ void etsec_walk_tx_ring(eTSEC *etsec, int ring_nbr)
/* Save flags before BD update */
bd_flags = bd.flags;
- if (bd_flags & BD_TX_READY) {
- process_tx_bd(etsec, &bd);
-
- /* Write back BD after update */
- write_buffer_descriptor(etsec, bd_addr, &bd);
+ if (!(bd_flags & BD_TX_READY)) {
+ break;
}
+ process_tx_bd(etsec, &bd);
+ /* Write back BD after update */
+ write_buffer_descriptor(etsec, bd_addr, &bd);
+
/* Wrap or next BD */
if (bd_flags & BD_WRAP) {
bd_addr = ring_base;
} else {
bd_addr += sizeof(eTSEC_rxtx_bd);
}
+ } while (TRUE);
- } while (bd_addr != ring_base);
-
- bd_addr = ring_base;
-
- /* Save the Buffer Descriptor Pointers to current bd */
+ /* Save the Buffer Descriptor Pointers to last bd that was not
+ * succesfully closed */
etsec->regs[TBPTR0 + ring_nbr].value = bd_addr;
/* Set transmit halt THLTx */
diff --git a/hw/net/rtl8139.c b/hw/net/rtl8139.c
index f05e59c85f..671c7e48c6 100644
--- a/hw/net/rtl8139.c
+++ b/hw/net/rtl8139.c
@@ -1205,6 +1205,20 @@ static void rtl8139_reset_rxring(RTL8139State *s, uint32_t bufferSize)
s->RxBufAddr = 0;
}
+static void rtl8139_reset_phy(RTL8139State *s)
+{
+ s->BasicModeStatus = 0x7809;
+ s->BasicModeStatus |= 0x0020; /* autonegotiation completed */
+ /* preserve link state */
+ s->BasicModeStatus |= qemu_get_queue(s->nic)->link_down ? 0 : 0x04;
+
+ s->NWayAdvert = 0x05e1; /* all modes, full duplex */
+ s->NWayLPAR = 0x05e1; /* all modes, full duplex */
+ s->NWayExpansion = 0x0001; /* autonegotiation supported */
+
+ s->CSCR = CSCR_F_LINK_100 | CSCR_HEART_BIT | CSCR_LD;
+}
+
static void rtl8139_reset(DeviceState *d)
{
RTL8139State *s = RTL8139(d);
@@ -1256,25 +1270,14 @@ static void rtl8139_reset(DeviceState *d)
s->Config3 = 0x1; /* fast back-to-back compatible */
s->Config5 = 0x0;
- s->CSCR = CSCR_F_LINK_100 | CSCR_HEART_BIT | CSCR_LD;
-
s->CpCmd = 0x0; /* reset C+ mode */
s->cplus_enabled = 0;
-
// s->BasicModeCtrl = 0x3100; // 100Mbps, full duplex, autonegotiation
// s->BasicModeCtrl = 0x2100; // 100Mbps, full duplex
s->BasicModeCtrl = 0x1000; // autonegotiation
- s->BasicModeStatus = 0x7809;
- //s->BasicModeStatus |= 0x0040; /* UTP medium */
- s->BasicModeStatus |= 0x0020; /* autonegotiation completed */
- /* preserve link state */
- s->BasicModeStatus |= qemu_get_queue(s->nic)->link_down ? 0 : 0x04;
-
- s->NWayAdvert = 0x05e1; /* all modes, full duplex */
- s->NWayLPAR = 0x05e1; /* all modes, full duplex */
- s->NWayExpansion = 0x0001; /* autonegotiation supported */
+ rtl8139_reset_phy(s);
/* also reset timer and disable timer interrupt */
s->TCTR = 0;
@@ -1469,7 +1472,7 @@ static void rtl8139_BasicModeCtrl_write(RTL8139State *s, uint32_t val)
DPRINTF("BasicModeCtrl register write(w) val=0x%04x\n", val);
/* mask unwritable bits */
- uint32_t mask = 0x4cff;
+ uint32_t mask = 0xccff;
if (1 || !rtl8139_config_writable(s))
{
@@ -1479,6 +1482,11 @@ static void rtl8139_BasicModeCtrl_write(RTL8139State *s, uint32_t val)
mask |= 0x0100;
}
+ if (val & 0x8000) {
+ /* Reset PHY */
+ rtl8139_reset_phy(s);
+ }
+
val = SET_MASKED(val, mask, s->BasicModeCtrl);
s->BasicModeCtrl = val;
diff --git a/hw/net/vhost_net.c b/hw/net/vhost_net.c
index f2d49ad7e7..6280422d02 100644
--- a/hw/net/vhost_net.c
+++ b/hw/net/vhost_net.c
@@ -51,6 +51,7 @@ static const int kernel_feature_bits[] = {
VIRTIO_RING_F_EVENT_IDX,
VIRTIO_NET_F_MRG_RXBUF,
VIRTIO_F_VERSION_1,
+ VIRTIO_NET_F_MTU,
VHOST_INVALID_FEATURE_BIT
};
@@ -74,6 +75,7 @@ static const int user_feature_bits[] = {
VIRTIO_NET_F_HOST_ECN,
VIRTIO_NET_F_HOST_UFO,
VIRTIO_NET_F_MRG_RXBUF,
+ VIRTIO_NET_F_MTU,
/* This bit implies RARP isn't sent by QEMU out of band */
VIRTIO_NET_F_GUEST_ANNOUNCE,
@@ -435,6 +437,17 @@ int vhost_set_vring_enable(NetClientState *nc, int enable)
return 0;
}
+int vhost_net_set_mtu(struct vhost_net *net, uint16_t mtu)
+{
+ const VhostOps *vhost_ops = net->dev.vhost_ops;
+
+ if (!vhost_ops->vhost_net_set_mtu) {
+ return 0;
+ }
+
+ return vhost_ops->vhost_net_set_mtu(&net->dev, mtu);
+}
+
#else
uint64_t vhost_net_get_max_queues(VHostNetState *net)
{
@@ -501,4 +514,9 @@ int vhost_set_vring_enable(NetClientState *nc, int enable)
{
return 0;
}
+
+int vhost_net_set_mtu(struct vhost_net *net, uint16_t mtu)
+{
+ return 0;
+}
#endif
diff --git a/hw/net/virtio-net.c b/hw/net/virtio-net.c
index 5009533cfa..7b3ad4a9f0 100644
--- a/hw/net/virtio-net.c
+++ b/hw/net/virtio-net.c
@@ -55,6 +55,8 @@ static VirtIOFeature feature_sizes[] = {
.end = endof(struct virtio_net_config, status)},
{.flags = 1 << VIRTIO_NET_F_MQ,
.end = endof(struct virtio_net_config, max_virtqueue_pairs)},
+ {.flags = 1 << VIRTIO_NET_F_MTU,
+ .end = endof(struct virtio_net_config, mtu)},
{}
};
@@ -81,6 +83,7 @@ static void virtio_net_get_config(VirtIODevice *vdev, uint8_t *config)
virtio_stw_p(vdev, &netcfg.status, n->status);
virtio_stw_p(vdev, &netcfg.max_virtqueue_pairs, n->max_queues);
+ virtio_stw_p(vdev, &netcfg.mtu, n->net_conf.mtu);
memcpy(netcfg.mac, n->mac, ETH_ALEN);
memcpy(config, &netcfg, n->config_size);
}
@@ -152,6 +155,16 @@ static void virtio_net_vhost_status(VirtIONet *n, uint8_t status)
qemu_net_queue_purge(qnc->incoming_queue, qnc->peer);
}
+ if (virtio_has_feature(vdev->guest_features, VIRTIO_NET_F_MTU)) {
+ r = vhost_net_set_mtu(get_vhost_net(nc->peer), n->net_conf.mtu);
+ if (r < 0) {
+ error_report("%uBytes MTU not supported by the backend",
+ n->net_conf.mtu);
+
+ return;
+ }
+ }
+
n->vhost_started = 1;
r = vhost_net_start(vdev, n->nic->ncs, queues);
if (r < 0) {
@@ -218,6 +231,14 @@ static void virtio_net_vnet_endian_status(VirtIONet *n, uint8_t status)
}
}
+static void virtio_net_drop_tx_queue_data(VirtIODevice *vdev, VirtQueue *vq)
+{
+ unsigned int dropped = virtqueue_drop_all(vq);
+ if (dropped) {
+ virtio_notify(vdev, vq);
+ }
+}
+
static void virtio_net_set_status(struct VirtIODevice *vdev, uint8_t status)
{
VirtIONet *n = VIRTIO_NET(vdev);
@@ -262,6 +283,14 @@ static void virtio_net_set_status(struct VirtIODevice *vdev, uint8_t status)
} else {
qemu_bh_cancel(q->tx_bh);
}
+ if ((n->status & VIRTIO_NET_S_LINK_UP) == 0 &&
+ (queue_status & VIRTIO_CONFIG_S_DRIVER_OK)) {
+ /* if tx is waiting we are likely have some packets in tx queue
+ * and disabled notification */
+ q->tx_waiting = 0;
+ virtio_queue_set_notification(q->tx_vq, 1);
+ virtio_net_drop_tx_queue_data(vdev, q->tx_vq);
+ }
}
}
}
@@ -1323,6 +1352,11 @@ static void virtio_net_handle_tx_timer(VirtIODevice *vdev, VirtQueue *vq)
VirtIONet *n = VIRTIO_NET(vdev);
VirtIONetQueue *q = &n->vqs[vq2q(virtio_get_queue_index(vq))];
+ if (unlikely((n->status & VIRTIO_NET_S_LINK_UP) == 0)) {
+ virtio_net_drop_tx_queue_data(vdev, vq);
+ return;
+ }
+
/* This happens when device was stopped but VCPU wasn't. */
if (!vdev->vm_running) {
q->tx_waiting = 1;
@@ -1349,6 +1383,11 @@ static void virtio_net_handle_tx_bh(VirtIODevice *vdev, VirtQueue *vq)
VirtIONet *n = VIRTIO_NET(vdev);
VirtIONetQueue *q = &n->vqs[vq2q(virtio_get_queue_index(vq))];
+ if (unlikely((n->status & VIRTIO_NET_S_LINK_UP) == 0)) {
+ virtio_net_drop_tx_queue_data(vdev, vq);
+ return;
+ }
+
if (unlikely(q->tx_waiting)) {
return;
}
@@ -1695,6 +1734,7 @@ static void virtio_net_set_config_size(VirtIONet *n, uint64_t host_features)
{
int i, config_size = 0;
virtio_add_feature(&host_features, VIRTIO_NET_F_MAC);
+
for (i = 0; feature_sizes[i].flags != 0; i++) {
if (host_features & feature_sizes[i].flags) {
config_size = MAX(feature_sizes[i].end, config_size);
@@ -1724,6 +1764,10 @@ static void virtio_net_device_realize(DeviceState *dev, Error **errp)
NetClientState *nc;
int i;
+ if (n->net_conf.mtu) {
+ n->host_features |= (0x1 << VIRTIO_NET_F_MTU);
+ }
+
virtio_net_set_config_size(n, n->host_features);
virtio_init(vdev, "virtio-net", VIRTIO_ID_NET, n->config_size);
@@ -1922,6 +1966,7 @@ static Property virtio_net_properties[] = {
DEFINE_PROP_STRING("tx", VirtIONet, net_conf.tx),
DEFINE_PROP_UINT16("rx_queue_size", VirtIONet, net_conf.rx_queue_size,
VIRTIO_NET_RX_QUEUE_DEFAULT_SIZE),
+ DEFINE_PROP_UINT16("host_mtu", VirtIONet, net_conf.mtu, 0),
DEFINE_PROP_END_OF_LIST(),
};
diff --git a/hw/pci-bridge/ioh3420.c b/hw/pci-bridge/ioh3420.c
index c8b5ac4207..84b7946c31 100644
--- a/hw/pci-bridge/ioh3420.c
+++ b/hw/pci-bridge/ioh3420.c
@@ -135,8 +135,10 @@ static int ioh3420_initfn(PCIDevice *d)
goto err_pcie_cap;
}
- rc = pcie_aer_init(d, IOH_EP_AER_OFFSET, PCI_ERR_SIZEOF);
+ rc = pcie_aer_init(d, PCI_ERR_VER, IOH_EP_AER_OFFSET,
+ PCI_ERR_SIZEOF, &err);
if (rc < 0) {
+ error_report_err(err);
goto err;
}
pcie_aer_root_init(d);
diff --git a/hw/pci-bridge/xio3130_downstream.c b/hw/pci-bridge/xio3130_downstream.c
index cef6e1325e..04b8e5b847 100644
--- a/hw/pci-bridge/xio3130_downstream.c
+++ b/hw/pci-bridge/xio3130_downstream.c
@@ -97,8 +97,10 @@ static int xio3130_downstream_initfn(PCIDevice *d)
goto err_pcie_cap;
}
- rc = pcie_aer_init(d, XIO3130_AER_OFFSET, PCI_ERR_SIZEOF);
+ rc = pcie_aer_init(d, PCI_ERR_VER, XIO3130_AER_OFFSET,
+ PCI_ERR_SIZEOF, &err);
if (rc < 0) {
+ error_report_err(err);
goto err;
}
diff --git a/hw/pci-bridge/xio3130_upstream.c b/hw/pci-bridge/xio3130_upstream.c
index 4ad0440aa1..d1f59c8834 100644
--- a/hw/pci-bridge/xio3130_upstream.c
+++ b/hw/pci-bridge/xio3130_upstream.c
@@ -85,8 +85,10 @@ static int xio3130_upstream_initfn(PCIDevice *d)
pcie_cap_flr_init(d);
pcie_cap_deverr_init(d);
- rc = pcie_aer_init(d, XIO3130_AER_OFFSET, PCI_ERR_SIZEOF);
+ rc = pcie_aer_init(d, PCI_ERR_VER, XIO3130_AER_OFFSET,
+ PCI_ERR_SIZEOF, &err);
if (rc < 0) {
+ error_report_err(err);
goto err;
}
diff --git a/hw/pci/pci.c b/hw/pci/pci.c
index 24fae1689d..637d54549e 100644
--- a/hw/pci/pci.c
+++ b/hw/pci/pci.c
@@ -982,8 +982,8 @@ static PCIDevice *do_pci_register_device(PCIDevice *pci_dev, PCIBus *bus,
pci_get_function_0(pci_dev)) {
error_setg(errp, "PCI: slot %d function 0 already ocuppied by %s,"
" new func %s cannot be exposed to guest.",
- PCI_SLOT(devfn),
- bus->devices[PCI_DEVFN(PCI_SLOT(devfn), 0)]->name,
+ PCI_SLOT(pci_get_function_0(pci_dev)->devfn),
+ pci_get_function_0(pci_dev)->name,
name);
return NULL;
diff --git a/hw/pci/pcie.c b/hw/pci/pcie.c
index 99cfb4561b..adeda04036 100644
--- a/hw/pci/pcie.c
+++ b/hw/pci/pcie.c
@@ -717,3 +717,18 @@ void pcie_dev_ser_num_init(PCIDevice *dev, uint16_t offset, uint64_t ser_num)
PCI_EXT_CAP_DSN_SIZEOF);
pci_set_quad(dev->config + offset + pci_dsn_cap, ser_num);
}
+
+void pcie_ats_init(PCIDevice *dev, uint16_t offset)
+{
+ pcie_add_capability(dev, PCI_EXT_CAP_ID_ATS, 0x1,
+ offset, PCI_EXT_CAP_ATS_SIZEOF);
+
+ dev->exp.ats_cap = offset;
+
+ /* Invalidate Queue Depth 0, Page Aligned Request 0 */
+ pci_set_word(dev->config + offset + PCI_ATS_CAP, 0);
+ /* STU 0, Disabled by default */
+ pci_set_word(dev->config + offset + PCI_ATS_CTRL, 0);
+
+ pci_set_word(dev->wmask + dev->exp.ats_cap + PCI_ATS_CTRL, 0x800f);
+}
diff --git a/hw/pci/pcie_aer.c b/hw/pci/pcie_aer.c
index 048ce6a424..daf1f65427 100644
--- a/hw/pci/pcie_aer.c
+++ b/hw/pci/pcie_aer.c
@@ -29,6 +29,7 @@
#include "hw/pci/msi.h"
#include "hw/pci/pci_bus.h"
#include "hw/pci/pcie_regs.h"
+#include "qapi/error.h"
//#define DEBUG_PCIE
#ifdef DEBUG_PCIE
@@ -96,21 +97,17 @@ static void aer_log_clear_all_err(PCIEAERLog *aer_log)
aer_log->log_num = 0;
}
-int pcie_aer_init(PCIDevice *dev, uint16_t offset, uint16_t size)
+int pcie_aer_init(PCIDevice *dev, uint8_t cap_ver, uint16_t offset,
+ uint16_t size, Error **errp)
{
- PCIExpressDevice *exp;
-
- pcie_add_capability(dev, PCI_EXT_CAP_ID_ERR, PCI_ERR_VER,
+ pcie_add_capability(dev, PCI_EXT_CAP_ID_ERR, cap_ver,
offset, size);
- exp = &dev->exp;
- exp->aer_cap = offset;
+ dev->exp.aer_cap = offset;
- /* log_max is property */
- if (dev->exp.aer_log.log_max == PCIE_AER_LOG_MAX_UNSET) {
- dev->exp.aer_log.log_max = PCIE_AER_LOG_MAX_DEFAULT;
- }
- /* clip down the value to avoid unreasobale memory usage */
+ /* clip down the value to avoid unreasonable memory usage */
if (dev->exp.aer_log.log_max > PCIE_AER_LOG_MAX_LIMIT) {
+ error_setg(errp, "Invalid aer_log_max %d. The max number of aer log "
+ "is %d", dev->exp.aer_log.log_max, PCIE_AER_LOG_MAX_LIMIT);
return -EINVAL;
}
dev->exp.aer_log.log = g_malloc0(sizeof dev->exp.aer_log.log[0] *
diff --git a/hw/s390x/virtio-ccw.c b/hw/s390x/virtio-ccw.c
index f5c1d98192..07650683f7 100644
--- a/hw/s390x/virtio-ccw.c
+++ b/hw/s390x/virtio-ccw.c
@@ -1098,7 +1098,7 @@ static int virtio_ccw_set_guest_notifier(VirtioCcwDevice *dev, int n,
* We do not support individual masking for channel devices, so we
* need to manually trigger any guest masking callbacks here.
*/
- if (k->guest_notifier_mask) {
+ if (k->guest_notifier_mask && vdev->use_guest_notifier_mask) {
k->guest_notifier_mask(vdev, n, false);
}
/* get lost events and re-inject */
@@ -1107,7 +1107,7 @@ static int virtio_ccw_set_guest_notifier(VirtioCcwDevice *dev, int n,
event_notifier_set(notifier);
}
} else {
- if (k->guest_notifier_mask) {
+ if (k->guest_notifier_mask && vdev->use_guest_notifier_mask) {
k->guest_notifier_mask(vdev, n, true);
}
if (with_irqfd) {
diff --git a/hw/scsi/virtio-scsi.c b/hw/scsi/virtio-scsi.c
index 204e14f237..ce19efffc8 100644
--- a/hw/scsi/virtio-scsi.c
+++ b/hw/scsi/virtio-scsi.c
@@ -198,12 +198,14 @@ static void *virtio_scsi_load_request(QEMUFile *f, SCSIRequest *sreq)
SCSIBus *bus = sreq->bus;
VirtIOSCSI *s = container_of(bus, VirtIOSCSI, bus);
VirtIOSCSICommon *vs = VIRTIO_SCSI_COMMON(s);
+ VirtIODevice *vdev = VIRTIO_DEVICE(s);
VirtIOSCSIReq *req;
uint32_t n;
qemu_get_be32s(f, &n);
assert(n < vs->conf.num_queues);
- req = qemu_get_virtqueue_element(f, sizeof(VirtIOSCSIReq) + vs->cdb_size);
+ req = qemu_get_virtqueue_element(vdev, f,
+ sizeof(VirtIOSCSIReq) + vs->cdb_size);
virtio_scsi_init_req(s, vs->cmd_vqs[n], req);
if (virtio_scsi_parse_req(req, sizeof(VirtIOSCSICmdReq) + vs->cdb_size,
diff --git a/hw/ssi/imx_spi.c b/hw/ssi/imx_spi.c
index e4e395fa67..b66505ca49 100644
--- a/hw/ssi/imx_spi.c
+++ b/hw/ssi/imx_spi.c
@@ -320,9 +320,6 @@ static void imx_spi_write(void *opaque, hwaddr offset, uint64_t value,
TYPE_IMX_SPI, __func__);
break;
case ECSPI_TXDATA:
- case ECSPI_MSGDATA:
- /* Is there any difference between TXDATA and MSGDATA ? */
- /* I'll have to look in the linux driver */
if (!imx_spi_is_enabled(s)) {
/* Ignore writes if device is disabled */
break;
@@ -380,6 +377,14 @@ static void imx_spi_write(void *opaque, hwaddr offset, uint64_t value,
}
break;
+ case ECSPI_MSGDATA:
+ /* it is not clear from the spec what MSGDATA is for */
+ /* Anyway it is not used by Linux driver */
+ /* So for now we just ignore it */
+ qemu_log_mask(LOG_UNIMP,
+ "[%s]%s: Trying to write to MSGDATA, ignoring\n",
+ TYPE_IMX_SPI, __func__);
+ break;
default:
s->regs[index] = value;
diff --git a/hw/timer/ds1338.c b/hw/timer/ds1338.c
index f5d04dd5d7..3849b74a68 100644
--- a/hw/timer/ds1338.c
+++ b/hw/timer/ds1338.c
@@ -94,7 +94,7 @@ static void inc_regptr(DS1338State *s)
}
}
-static void ds1338_event(I2CSlave *i2c, enum i2c_event event)
+static int ds1338_event(I2CSlave *i2c, enum i2c_event event)
{
DS1338State *s = DS1338(i2c);
@@ -113,6 +113,8 @@ static void ds1338_event(I2CSlave *i2c, enum i2c_event event)
default:
break;
}
+
+ return 0;
}
static int ds1338_recv(I2CSlave *i2c)
diff --git a/hw/timer/twl92230.c b/hw/timer/twl92230.c
index 7ba4e9a7c9..b8d914e49b 100644
--- a/hw/timer/twl92230.c
+++ b/hw/timer/twl92230.c
@@ -713,12 +713,14 @@ static void menelaus_write(void *opaque, uint8_t addr, uint8_t value)
}
}
-static void menelaus_event(I2CSlave *i2c, enum i2c_event event)
+static int menelaus_event(I2CSlave *i2c, enum i2c_event event)
{
MenelausState *s = TWL92230(i2c);
if (event == I2C_START_SEND)
s->firstbyte = 1;
+
+ return 0;
}
static int menelaus_tx(I2CSlave *i2c, uint8_t data)
diff --git a/hw/virtio/trace-events b/hw/virtio/trace-events
index 7b6f55e70e..6926eedd3f 100644
--- a/hw/virtio/trace-events
+++ b/hw/virtio/trace-events
@@ -15,6 +15,8 @@ virtio_rng_pushed(void *rng, size_t len) "rng %p: %zd bytes pushed"
virtio_rng_request(void *rng, size_t size, unsigned quota) "rng %p: %zd bytes requested, %u bytes quota left"
# hw/virtio/virtio-balloon.c
+#
+virtio_balloon_bad_addr(uint64_t gpa) "%"PRIx64
virtio_balloon_handle_output(const char *name, uint64_t gpa) "section name: %s gpa: %"PRIx64
virtio_balloon_get_config(uint32_t num_pages, uint32_t actual) "num_pages: %d actual: %d"
virtio_balloon_set_config(uint32_t actual, uint32_t oldactual) "actual: %d oldactual: %d"
diff --git a/hw/virtio/vhost-user.c b/hw/virtio/vhost-user.c
index 7ee92b32c5..9334a8ae22 100644
--- a/hw/virtio/vhost-user.c
+++ b/hw/virtio/vhost-user.c
@@ -32,6 +32,7 @@ enum VhostUserProtocolFeature {
VHOST_USER_PROTOCOL_F_LOG_SHMFD = 1,
VHOST_USER_PROTOCOL_F_RARP = 2,
VHOST_USER_PROTOCOL_F_REPLY_ACK = 3,
+ VHOST_USER_PROTOCOL_F_NET_MTU = 4,
VHOST_USER_PROTOCOL_F_MAX
};
@@ -59,6 +60,7 @@ typedef enum VhostUserRequest {
VHOST_USER_GET_QUEUE_NUM = 17,
VHOST_USER_SET_VRING_ENABLE = 18,
VHOST_USER_SEND_RARP = 19,
+ VHOST_USER_NET_SET_MTU = 20,
VHOST_USER_MAX
} VhostUserRequest;
@@ -186,6 +188,7 @@ static bool vhost_user_one_time_request(VhostUserRequest request)
case VHOST_USER_RESET_OWNER:
case VHOST_USER_SET_MEM_TABLE:
case VHOST_USER_GET_QUEUE_NUM:
+ case VHOST_USER_NET_SET_MTU:
return true;
default:
return false;
@@ -685,6 +688,36 @@ static bool vhost_user_can_merge(struct vhost_dev *dev,
return mfd == rfd;
}
+static int vhost_user_net_set_mtu(struct vhost_dev *dev, uint16_t mtu)
+{
+ VhostUserMsg msg;
+ bool reply_supported = virtio_has_feature(dev->protocol_features,
+ VHOST_USER_PROTOCOL_F_REPLY_ACK);
+
+ if (!(dev->protocol_features & (1ULL << VHOST_USER_PROTOCOL_F_NET_MTU))) {
+ return 0;
+ }
+
+ msg.request = VHOST_USER_NET_SET_MTU;
+ msg.payload.u64 = mtu;
+ msg.size = sizeof(msg.payload.u64);
+ msg.flags = VHOST_USER_VERSION;
+ if (reply_supported) {
+ msg.flags |= VHOST_USER_NEED_REPLY_MASK;
+ }
+
+ if (vhost_user_write(dev, &msg, NULL, 0) < 0) {
+ return -1;
+ }
+
+ /* If reply_ack supported, slave has to ack specified MTU is valid */
+ if (reply_supported) {
+ return process_message_reply(dev, msg.request);
+ }
+
+ return 0;
+}
+
const VhostOps user_ops = {
.backend_type = VHOST_BACKEND_TYPE_USER,
.vhost_backend_init = vhost_user_init,
@@ -708,4 +741,5 @@ const VhostOps user_ops = {
.vhost_requires_shm_log = vhost_user_requires_shm_log,
.vhost_migration_done = vhost_user_migration_done,
.vhost_backend_can_merge = vhost_user_can_merge,
+ .vhost_net_set_mtu = vhost_user_net_set_mtu,
};
diff --git a/hw/virtio/vhost.c b/hw/virtio/vhost.c
index f7f70237db..d396b22531 100644
--- a/hw/virtio/vhost.c
+++ b/hw/virtio/vhost.c
@@ -993,6 +993,7 @@ static void vhost_virtqueue_stop(struct vhost_dev *dev,
virtio_queue_set_last_avail_idx(vdev, idx, state.num);
}
virtio_queue_invalidate_signalled_used(vdev, idx);
+ virtio_queue_update_used_idx(vdev, idx);
/* In the cross-endian case, we need to reset the vring endianness to
* native as legacy devices expect so by default.
diff --git a/hw/virtio/virtio-balloon.c b/hw/virtio/virtio-balloon.c
index 884570a57d..a705e0ec55 100644
--- a/hw/virtio/virtio-balloon.c
+++ b/hw/virtio/virtio-balloon.c
@@ -228,8 +228,13 @@ static void virtio_balloon_handle_output(VirtIODevice *vdev, VirtQueue *vq)
/* FIXME: remove get_system_memory(), but how? */
section = memory_region_find(get_system_memory(), pa, 1);
- if (!int128_nz(section.size) || !memory_region_is_ram(section.mr))
+ if (!int128_nz(section.size) ||
+ !memory_region_is_ram(section.mr) ||
+ memory_region_is_rom(section.mr) ||
+ memory_region_is_romd(section.mr)) {
+ trace_virtio_balloon_bad_addr(pa);
continue;
+ }
trace_virtio_balloon_handle_output(memory_region_name(section.mr),
pa);
diff --git a/hw/virtio/virtio-bus.c b/hw/virtio/virtio-bus.c
index d6c0c72bd2..d31cc00e83 100644
--- a/hw/virtio/virtio-bus.c
+++ b/hw/virtio/virtio-bus.c
@@ -28,6 +28,7 @@
#include "hw/qdev.h"
#include "hw/virtio/virtio-bus.h"
#include "hw/virtio/virtio.h"
+#include "exec/address-spaces.h"
/* #define DEBUG_VIRTIO_BUS */
@@ -61,6 +62,13 @@ void virtio_bus_device_plugged(VirtIODevice *vdev, Error **errp)
if (klass->device_plugged != NULL) {
klass->device_plugged(qbus->parent, errp);
}
+
+ if (klass->get_dma_as != NULL &&
+ virtio_host_has_feature(vdev, VIRTIO_F_IOMMU_PLATFORM)) {
+ vdev->dma_as = klass->get_dma_as(qbus->parent);
+ } else {
+ vdev->dma_as = &address_space_memory;
+ }
}
/* Reset the virtio_bus */
diff --git a/hw/virtio/virtio-crypto-pci.c b/hw/virtio/virtio-crypto-pci.c
index a1b09064c0..422aca3a98 100644
--- a/hw/virtio/virtio-crypto-pci.c
+++ b/hw/virtio/virtio-crypto-pci.c
@@ -31,6 +31,11 @@ static void virtio_crypto_pci_realize(VirtIOPCIProxy *vpci_dev, Error **errp)
VirtIOCryptoPCI *vcrypto = VIRTIO_CRYPTO_PCI(vpci_dev);
DeviceState *vdev = DEVICE(&vcrypto->vdev);
+ if (vcrypto->vdev.conf.cryptodev == NULL) {
+ error_setg(errp, "'cryptodev' parameter expects a valid object");
+ return;
+ }
+
qdev_set_parent_bus(vdev, BUS(&vpci_dev->bus));
virtio_pci_force_virtio_1(vpci_dev);
object_property_set_bool(OBJECT(vdev), true, "realized", errp);
@@ -48,7 +53,6 @@ static void virtio_crypto_pci_class_init(ObjectClass *klass, void *data)
k->realize = virtio_crypto_pci_realize;
set_bit(DEVICE_CATEGORY_MISC, dc->categories);
dc->props = virtio_crypto_pci_properties;
- dc->hotpluggable = false;
pcidev_k->class_id = PCI_CLASS_OTHERS;
}
diff --git a/hw/virtio/virtio-crypto.c b/hw/virtio/virtio-crypto.c
index 2f2467e859..296472fc6e 100644
--- a/hw/virtio/virtio-crypto.c
+++ b/hw/virtio/virtio-crypto.c
@@ -337,7 +337,18 @@ static void virtio_crypto_free_request(VirtIOCryptoReq *req)
{
if (req) {
if (req->flags == CRYPTODEV_BACKEND_ALG_SYM) {
- g_free(req->u.sym_op_info);
+ size_t max_len;
+ CryptoDevBackendSymOpInfo *op_info = req->u.sym_op_info;
+
+ max_len = op_info->iv_len +
+ op_info->aad_len +
+ op_info->src_len +
+ op_info->dst_len +
+ op_info->digest_result_len;
+
+ /* Zeroize and free request data structure */
+ memset(op_info, 0, sizeof(*op_info) + max_len);
+ g_free(op_info);
}
g_free(req);
}
@@ -355,7 +366,7 @@ virtio_crypto_sym_input_data_helper(VirtIODevice *vdev,
return;
}
- len = sym_op_info->dst_len;
+ len = sym_op_info->src_len;
/* Save the cipher result */
s = iov_from_buf(req->in_iov, req->in_num, 0, sym_op_info->dst, len);
if (s != len) {
@@ -416,7 +427,7 @@ virtio_crypto_sym_op_helper(VirtIODevice *vdev,
uint32_t hash_start_src_offset = 0, len_to_hash = 0;
uint32_t cipher_start_src_offset = 0, len_to_cipher = 0;
- size_t max_len, curr_size = 0;
+ uint64_t max_len, curr_size = 0;
size_t s;
/* Plain cipher */
@@ -441,7 +452,7 @@ virtio_crypto_sym_op_helper(VirtIODevice *vdev,
return NULL;
}
- max_len = iv_len + aad_len + src_len + dst_len + hash_result_len;
+ max_len = (uint64_t)iv_len + aad_len + src_len + dst_len + hash_result_len;
if (unlikely(max_len > vcrypto->conf.max_size)) {
virtio_error(vdev, "virtio-crypto too big length");
return NULL;
@@ -732,7 +743,7 @@ static void virtio_crypto_reset(VirtIODevice *vdev)
VirtIOCrypto *vcrypto = VIRTIO_CRYPTO(vdev);
/* multiqueue is disabled by default */
vcrypto->curr_queues = 1;
- if (!vcrypto->cryptodev->ready) {
+ if (!cryptodev_backend_is_ready(vcrypto->cryptodev)) {
vcrypto->status &= ~VIRTIO_CRYPTO_S_HW_READY;
} else {
vcrypto->status |= VIRTIO_CRYPTO_S_HW_READY;
@@ -792,13 +803,14 @@ static void virtio_crypto_device_realize(DeviceState *dev, Error **errp)
}
vcrypto->ctrl_vq = virtio_add_queue(vdev, 64, virtio_crypto_handle_ctrl);
- if (!vcrypto->cryptodev->ready) {
+ if (!cryptodev_backend_is_ready(vcrypto->cryptodev)) {
vcrypto->status &= ~VIRTIO_CRYPTO_S_HW_READY;
} else {
vcrypto->status |= VIRTIO_CRYPTO_S_HW_READY;
}
virtio_crypto_init_config(vdev);
+ cryptodev_backend_set_used(vcrypto->cryptodev, true);
}
static void virtio_crypto_device_unrealize(DeviceState *dev, Error **errp)
@@ -818,6 +830,7 @@ static void virtio_crypto_device_unrealize(DeviceState *dev, Error **errp)
g_free(vcrypto->vqs);
virtio_cleanup(vdev);
+ cryptodev_backend_set_used(vcrypto->cryptodev, false);
}
static const VMStateDescription vmstate_virtio_crypto = {
@@ -875,6 +888,20 @@ static void virtio_crypto_class_init(ObjectClass *klass, void *data)
vdc->reset = virtio_crypto_reset;
}
+static void
+virtio_crypto_check_cryptodev_is_used(Object *obj, const char *name,
+ Object *val, Error **errp)
+{
+ if (cryptodev_backend_is_used(CRYPTODEV_BACKEND(val))) {
+ char *path = object_get_canonical_path_component(val);
+ error_setg(errp,
+ "can't use already used cryptodev backend: %s", path);
+ g_free(path);
+ } else {
+ qdev_prop_allow_set_link_before_realize(obj, name, val, errp);
+ }
+}
+
static void virtio_crypto_instance_init(Object *obj)
{
VirtIOCrypto *vcrypto = VIRTIO_CRYPTO(obj);
@@ -888,7 +915,7 @@ static void virtio_crypto_instance_init(Object *obj)
object_property_add_link(obj, "cryptodev",
TYPE_CRYPTODEV_BACKEND,
(Object **)&vcrypto->conf.cryptodev,
- qdev_prop_allow_set_link_before_realize,
+ virtio_crypto_check_cryptodev_is_used,
OBJ_PROP_LINK_UNREF_ON_RELEASE, NULL);
}
diff --git a/hw/virtio/virtio-mmio.c b/hw/virtio/virtio-mmio.c
index 17412cb7b5..60654dc19d 100644
--- a/hw/virtio/virtio-mmio.c
+++ b/hw/virtio/virtio-mmio.c
@@ -402,7 +402,7 @@ static int virtio_mmio_set_guest_notifier(DeviceState *d, int n, bool assign,
event_notifier_cleanup(notifier);
}
- if (vdc->guest_notifier_mask) {
+ if (vdc->guest_notifier_mask && vdev->use_guest_notifier_mask) {
vdc->guest_notifier_mask(vdev, n, !assign);
}
diff --git a/hw/virtio/virtio-pci.c b/hw/virtio/virtio-pci.c
index 21c2b9dbfc..854b8f22bf 100644
--- a/hw/virtio/virtio-pci.c
+++ b/hw/virtio/virtio-pci.c
@@ -1144,6 +1144,14 @@ static int virtio_pci_query_nvectors(DeviceState *d)
return proxy->nvectors;
}
+static AddressSpace *virtio_pci_get_dma_as(DeviceState *d)
+{
+ VirtIOPCIProxy *proxy = VIRTIO_PCI(d);
+ PCIDevice *dev = &proxy->pci_dev;
+
+ return pci_get_address_space(dev);
+}
+
static int virtio_pci_add_mem_cap(VirtIOPCIProxy *proxy,
struct virtio_pci_cap *cap)
{
@@ -1601,6 +1609,11 @@ static void virtio_pci_device_plugged(DeviceState *d, Error **errp)
}
if (legacy) {
+ if (virtio_host_has_feature(vdev, VIRTIO_F_IOMMU_PLATFORM)) {
+ error_setg(errp, "VIRTIO_F_IOMMU_PLATFORM was supported by"
+ "neither legacy nor transitional device.");
+ return ;
+ }
/* legacy and transitional */
pci_set_word(config + PCI_SUBSYSTEM_VENDOR_ID,
pci_get_word(config + PCI_VENDOR_ID));
@@ -1802,6 +1815,11 @@ static void virtio_pci_realize(PCIDevice *pci_dev, Error **errp)
* PCI Power Management Interface Specification.
*/
pci_set_word(pci_dev->config + pos + PCI_PM_PMC, 0x3);
+
+ if (proxy->flags & VIRTIO_PCI_FLAG_ATS) {
+ pcie_ats_init(pci_dev, 256);
+ }
+
} else {
/*
* make future invocations of pci_is_express() return false
@@ -1855,6 +1873,8 @@ static Property virtio_pci_properties[] = {
VIRTIO_PCI_FLAG_PAGE_PER_VQ_BIT, false),
DEFINE_PROP_BOOL("x-ignore-backend-features", VirtIOPCIProxy,
ignore_backend_features, false),
+ DEFINE_PROP_BIT("ats", VirtIOPCIProxy, flags,
+ VIRTIO_PCI_FLAG_ATS_BIT, false),
DEFINE_PROP_END_OF_LIST(),
};
@@ -2520,6 +2540,7 @@ static void virtio_pci_bus_class_init(ObjectClass *klass, void *data)
k->query_nvectors = virtio_pci_query_nvectors;
k->ioeventfd_enabled = virtio_pci_ioeventfd_enabled;
k->ioeventfd_assign = virtio_pci_ioeventfd_assign;
+ k->get_dma_as = virtio_pci_get_dma_as;
}
static const TypeInfo virtio_pci_bus_info = {
diff --git a/hw/virtio/virtio-pci.h b/hw/virtio/virtio-pci.h
index 5e078866c4..d00064cc0c 100644
--- a/hw/virtio/virtio-pci.h
+++ b/hw/virtio/virtio-pci.h
@@ -72,6 +72,7 @@ enum {
VIRTIO_PCI_FLAG_MODERN_PIO_NOTIFY_BIT,
VIRTIO_PCI_FLAG_DISABLE_PCIE_BIT,
VIRTIO_PCI_FLAG_PAGE_PER_VQ_BIT,
+ VIRTIO_PCI_FLAG_ATS_BIT,
};
/* Need to activate work-arounds for buggy guests at vmstate load. */
@@ -96,6 +97,9 @@ enum {
#define VIRTIO_PCI_FLAG_PAGE_PER_VQ \
(1 << VIRTIO_PCI_FLAG_PAGE_PER_VQ_BIT)
+/* address space translation service */
+#define VIRTIO_PCI_FLAG_ATS (1 << VIRTIO_PCI_FLAG_ATS_BIT)
+
typedef struct {
MSIMessage msg;
int virq;
diff --git a/hw/virtio/virtio.c b/hw/virtio/virtio.c
index d40711a31d..aa4f38f50a 100644
--- a/hw/virtio/virtio.c
+++ b/hw/virtio/virtio.c
@@ -23,6 +23,7 @@
#include "hw/virtio/virtio-bus.h"
#include "migration/migration.h"
#include "hw/virtio/virtio-access.h"
+#include "sysemu/dma.h"
/*
* The alignment to use between consumer and producer parts of vring.
@@ -92,7 +93,7 @@ struct VirtQueue
uint16_t queue_index;
- int inuse;
+ unsigned int inuse;
uint16_t vector;
VirtIOHandleOutput handle_output;
@@ -121,7 +122,7 @@ void virtio_queue_update_rings(VirtIODevice *vdev, int n)
static void vring_desc_read(VirtIODevice *vdev, VRingDesc *desc,
hwaddr desc_pa, int i)
{
- address_space_read(&address_space_memory, desc_pa + i * sizeof(VRingDesc),
+ address_space_read(vdev->dma_as, desc_pa + i * sizeof(VRingDesc),
MEMTXATTRS_UNSPECIFIED, (void *)desc, sizeof(VRingDesc));
virtio_tswap64s(vdev, &desc->addr);
virtio_tswap32s(vdev, &desc->len);
@@ -163,7 +164,7 @@ static inline void vring_used_write(VirtQueue *vq, VRingUsedElem *uelem,
virtio_tswap32s(vq->vdev, &uelem->id);
virtio_tswap32s(vq->vdev, &uelem->len);
pa = vq->vring.used + offsetof(VRingUsed, ring[i]);
- address_space_write(&address_space_memory, pa, MEMTXATTRS_UNSPECIFIED,
+ address_space_write(vq->vdev->dma_as, pa, MEMTXATTRS_UNSPECIFIED,
(void *)uelem, sizeof(VRingUsedElem));
}
@@ -249,6 +250,7 @@ int virtio_queue_empty(VirtQueue *vq)
static void virtqueue_unmap_sg(VirtQueue *vq, const VirtQueueElement *elem,
unsigned int len)
{
+ AddressSpace *dma_as = vq->vdev->dma_as;
unsigned int offset;
int i;
@@ -256,17 +258,18 @@ static void virtqueue_unmap_sg(VirtQueue *vq, const VirtQueueElement *elem,
for (i = 0; i < elem->in_num; i++) {
size_t size = MIN(len - offset, elem->in_sg[i].iov_len);
- cpu_physical_memory_unmap(elem->in_sg[i].iov_base,
- elem->in_sg[i].iov_len,
- 1, size);
+ dma_memory_unmap(dma_as, elem->in_sg[i].iov_base,
+ elem->in_sg[i].iov_len,
+ DMA_DIRECTION_FROM_DEVICE, size);
offset += size;
}
for (i = 0; i < elem->out_num; i++)
- cpu_physical_memory_unmap(elem->out_sg[i].iov_base,
- elem->out_sg[i].iov_len,
- 0, elem->out_sg[i].iov_len);
+ dma_memory_unmap(dma_as, elem->out_sg[i].iov_base,
+ elem->out_sg[i].iov_len,
+ DMA_DIRECTION_TO_DEVICE,
+ elem->out_sg[i].iov_len);
}
/* virtqueue_detach_element:
@@ -560,7 +563,10 @@ static bool virtqueue_map_desc(VirtIODevice *vdev, unsigned int *p_num_sg,
goto out;
}
- iov[num_sg].iov_base = cpu_physical_memory_map(pa, &len, is_write);
+ iov[num_sg].iov_base = dma_memory_map(vdev->dma_as, pa, &len,
+ is_write ?
+ DMA_DIRECTION_FROM_DEVICE :
+ DMA_DIRECTION_TO_DEVICE);
if (!iov[num_sg].iov_base) {
virtio_error(vdev, "virtio: bogus descriptor or out of resources");
goto out;
@@ -597,9 +603,9 @@ static void virtqueue_undo_map_desc(unsigned int out_num, unsigned int in_num,
}
}
-static void virtqueue_map_iovec(struct iovec *sg, hwaddr *addr,
- unsigned int *num_sg, unsigned int max_size,
- int is_write)
+static void virtqueue_map_iovec(VirtIODevice *vdev, struct iovec *sg,
+ hwaddr *addr, unsigned int *num_sg,
+ unsigned int max_size, int is_write)
{
unsigned int i;
hwaddr len;
@@ -618,7 +624,10 @@ static void virtqueue_map_iovec(struct iovec *sg, hwaddr *addr,
for (i = 0; i < *num_sg; i++) {
len = sg[i].iov_len;
- sg[i].iov_base = cpu_physical_memory_map(addr[i], &len, is_write);
+ sg[i].iov_base = dma_memory_map(vdev->dma_as,
+ addr[i], &len, is_write ?
+ DMA_DIRECTION_FROM_DEVICE :
+ DMA_DIRECTION_TO_DEVICE);
if (!sg[i].iov_base) {
error_report("virtio: error trying to map MMIO memory");
exit(1);
@@ -630,12 +639,15 @@ static void virtqueue_map_iovec(struct iovec *sg, hwaddr *addr,
}
}
-void virtqueue_map(VirtQueueElement *elem)
+void virtqueue_map(VirtIODevice *vdev, VirtQueueElement *elem)
{
- virtqueue_map_iovec(elem->in_sg, elem->in_addr, &elem->in_num,
- VIRTQUEUE_MAX_SIZE, 1);
- virtqueue_map_iovec(elem->out_sg, elem->out_addr, &elem->out_num,
- VIRTQUEUE_MAX_SIZE, 0);
+ virtqueue_map_iovec(vdev, elem->in_sg, elem->in_addr, &elem->in_num,
+ MIN(ARRAY_SIZE(elem->in_sg), ARRAY_SIZE(elem->in_addr)),
+ 1);
+ virtqueue_map_iovec(vdev, elem->out_sg, elem->out_addr, &elem->out_num,
+ MIN(ARRAY_SIZE(elem->out_sg),
+ ARRAY_SIZE(elem->out_addr)),
+ 0);
}
static void *virtqueue_alloc_element(size_t sz, unsigned out_num, unsigned in_num)
@@ -771,6 +783,44 @@ err_undo_map:
return NULL;
}
+/* virtqueue_drop_all:
+ * @vq: The #VirtQueue
+ * Drops all queued buffers and indicates them to the guest
+ * as if they are done. Useful when buffers can not be
+ * processed but must be returned to the guest.
+ */
+unsigned int virtqueue_drop_all(VirtQueue *vq)
+{
+ unsigned int dropped = 0;
+ VirtQueueElement elem = {};
+ VirtIODevice *vdev = vq->vdev;
+ bool fEventIdx = virtio_vdev_has_feature(vdev, VIRTIO_RING_F_EVENT_IDX);
+
+ if (unlikely(vdev->broken)) {
+ return 0;
+ }
+
+ while (!virtio_queue_empty(vq) && vq->inuse < vq->vring.num) {
+ /* works similar to virtqueue_pop but does not map buffers
+ * and does not allocate any memory */
+ smp_rmb();
+ if (!virtqueue_get_head(vq, vq->last_avail_idx, &elem.index)) {
+ break;
+ }
+ vq->inuse++;
+ vq->last_avail_idx++;
+ if (fEventIdx) {
+ vring_set_avail_event(vq, vq->last_avail_idx);
+ }
+ /* immediately push the element, nothing to unmap
+ * as both in_num and out_num are set to 0 */
+ virtqueue_push(vq, &elem, 0);
+ dropped++;
+ }
+
+ return dropped;
+}
+
/* Reading and writing a structure directly to QEMUFile is *awful*, but
* it is what QEMU has always done by mistake. We can change it sooner
* or later by bumping the version number of the affected vm states.
@@ -788,7 +838,7 @@ typedef struct VirtQueueElementOld {
struct iovec out_sg[VIRTQUEUE_MAX_SIZE];
} VirtQueueElementOld;
-void *qemu_get_virtqueue_element(QEMUFile *f, size_t sz)
+void *qemu_get_virtqueue_element(VirtIODevice *vdev, QEMUFile *f, size_t sz)
{
VirtQueueElement *elem;
VirtQueueElementOld data;
@@ -819,7 +869,7 @@ void *qemu_get_virtqueue_element(QEMUFile *f, size_t sz)
elem->out_sg[i].iov_len = data.out_sg[i].iov_len;
}
- virtqueue_map(elem);
+ virtqueue_map(vdev, elem);
return elem;
}
@@ -878,6 +928,11 @@ static int virtio_validate_features(VirtIODevice *vdev)
{
VirtioDeviceClass *k = VIRTIO_DEVICE_GET_CLASS(vdev);
+ if (virtio_host_has_feature(vdev, VIRTIO_F_IOMMU_PLATFORM) &&
+ !virtio_vdev_has_feature(vdev, VIRTIO_F_IOMMU_PLATFORM)) {
+ return -EFAULT;
+ }
+
if (k->validate_features) {
return k->validate_features(vdev);
} else {
@@ -1861,9 +1916,11 @@ int virtio_load(VirtIODevice *vdev, QEMUFile *f, int version_id)
/*
* Some devices migrate VirtQueueElements that have been popped
* from the avail ring but not yet returned to the used ring.
+ * Since max ring size < UINT16_MAX it's safe to use modulo
+ * UINT16_MAX + 1 subtraction.
*/
- vdev->vq[i].inuse = vdev->vq[i].last_avail_idx -
- vdev->vq[i].used_idx;
+ vdev->vq[i].inuse = (uint16_t)(vdev->vq[i].last_avail_idx -
+ vdev->vq[i].used_idx);
if (vdev->vq[i].inuse > vdev->vq[i].vring.num) {
error_report("VQ %d size 0x%x < last_avail_idx 0x%x - "
"used_idx 0x%x",
@@ -2001,6 +2058,11 @@ void virtio_queue_set_last_avail_idx(VirtIODevice *vdev, int n, uint16_t idx)
vdev->vq[n].shadow_avail_idx = idx;
}
+void virtio_queue_update_used_idx(VirtIODevice *vdev, int n)
+{
+ vdev->vq[n].used_idx = vring_used_idx(&vdev->vq[n]);
+}
+
void virtio_queue_invalidate_signalled_used(VirtIODevice *vdev, int n)
{
vdev->vq[n].signalled_used_valid = false;
diff --git a/include/block/block_int.h b/include/block/block_int.h
index 83a423c580..4e4562d444 100644
--- a/include/block/block_int.h
+++ b/include/block/block_int.h
@@ -184,7 +184,7 @@ struct BlockDriver {
/*
* Flushes all data that was already written to the OS all the way down to
- * the disk (for example raw-posix calls fsync()).
+ * the disk (for example file-posix.c calls fsync()).
*/
int coroutine_fn (*bdrv_co_flush_to_disk)(BlockDriverState *bs);
diff --git a/include/exec/memory.h b/include/exec/memory.h
index 64560f61b4..bec9756667 100644
--- a/include/exec/memory.h
+++ b/include/exec/memory.h
@@ -628,6 +628,9 @@ static inline bool memory_region_is_romd(MemoryRegion *mr)
*/
static inline bool memory_region_is_iommu(MemoryRegion *mr)
{
+ if (mr->alias) {
+ return memory_region_is_iommu(mr->alias);
+ }
return mr->iommu_ops;
}
@@ -1537,6 +1540,11 @@ void stl_le_phys_cached(MemoryRegionCache *cache, hwaddr addr, uint32_t val);
void stl_be_phys_cached(MemoryRegionCache *cache, hwaddr addr, uint32_t val);
void stq_le_phys_cached(MemoryRegionCache *cache, hwaddr addr, uint64_t val);
void stq_be_phys_cached(MemoryRegionCache *cache, hwaddr addr, uint64_t val);
+/* address_space_get_iotlb_entry: translate an address into an IOTLB
+ * entry. Should be called from an RCU critical section.
+ */
+IOMMUTLBEntry address_space_get_iotlb_entry(AddressSpace *as, hwaddr addr,
+ bool is_write);
/* address_space_translate: translate an address range into an address space
* into a MemoryRegion and an address range into that section. Should be
diff --git a/include/hw/acpi/acpi-defs.h b/include/hw/acpi/acpi-defs.h
index 154f3b82f6..4cc3630e61 100644
--- a/include/hw/acpi/acpi-defs.h
+++ b/include/hw/acpi/acpi-defs.h
@@ -191,10 +191,8 @@ struct AcpiFadtDescriptorRev5_1 {
typedef struct AcpiFadtDescriptorRev5_1 AcpiFadtDescriptorRev5_1;
-enum {
- ACPI_FADT_ARM_USE_PSCI_G_0_2 = 0,
- ACPI_FADT_ARM_PSCI_USE_HVC = 1,
-};
+#define ACPI_FADT_ARM_PSCI_COMPLIANT (1 << 0)
+#define ACPI_FADT_ARM_PSCI_USE_HVC (1 << 1)
/*
* Serial Port Console Redirection Table (SPCR), Rev. 1.02
@@ -290,7 +288,7 @@ typedef struct AcpiMultipleApicTable AcpiMultipleApicTable;
#define ACPI_APIC_XRUPT_SOURCE 8
#define ACPI_APIC_LOCAL_X2APIC 9
#define ACPI_APIC_LOCAL_X2APIC_NMI 10
-#define ACPI_APIC_GENERIC_INTERRUPT 11
+#define ACPI_APIC_GENERIC_CPU_INTERFACE 11
#define ACPI_APIC_GENERIC_DISTRIBUTOR 12
#define ACPI_APIC_GENERIC_MSI_FRAME 13
#define ACPI_APIC_GENERIC_REDISTRIBUTOR 14
@@ -361,7 +359,7 @@ struct AcpiMadtLocalX2ApicNmi {
} QEMU_PACKED;
typedef struct AcpiMadtLocalX2ApicNmi AcpiMadtLocalX2ApicNmi;
-struct AcpiMadtGenericInterrupt {
+struct AcpiMadtGenericCpuInterface {
ACPI_SUB_HEADER_DEF
uint16_t reserved;
uint32_t cpu_interface_number;
@@ -378,7 +376,10 @@ struct AcpiMadtGenericInterrupt {
uint64_t arm_mpidr;
} QEMU_PACKED;
-typedef struct AcpiMadtGenericInterrupt AcpiMadtGenericInterrupt;
+typedef struct AcpiMadtGenericCpuInterface AcpiMadtGenericCpuInterface;
+
+/* GICC CPU Interface Flags */
+#define ACPI_MADT_GICC_ENABLED 1
struct AcpiMadtGenericDistributor {
ACPI_SUB_HEADER_DEF
@@ -427,21 +428,9 @@ typedef struct AcpiMadtGenericTranslator AcpiMadtGenericTranslator;
/*
* Generic Timer Description Table (GTDT)
*/
-
-#define ACPI_GTDT_INTERRUPT_MODE (1 << 0)
-#define ACPI_GTDT_INTERRUPT_POLARITY (1 << 1)
-#define ACPI_GTDT_ALWAYS_ON (1 << 2)
-
-/* Triggering */
-
-#define ACPI_LEVEL_SENSITIVE ((uint8_t) 0x00)
-#define ACPI_EDGE_SENSITIVE ((uint8_t) 0x01)
-
-/* Polarity */
-
-#define ACPI_ACTIVE_HIGH ((uint8_t) 0x00)
-#define ACPI_ACTIVE_LOW ((uint8_t) 0x01)
-#define ACPI_ACTIVE_BOTH ((uint8_t) 0x02)
+#define ACPI_GTDT_INTERRUPT_MODE_LEVEL (0 << 0)
+#define ACPI_GTDT_INTERRUPT_MODE_EDGE (1 << 0)
+#define ACPI_GTDT_CAP_ALWAYS_ON (1 << 2)
struct AcpiGenericTimerTable {
ACPI_TABLE_HEADER_DEF
@@ -638,8 +627,20 @@ struct AcpiDmarHardwareUnit {
} QEMU_PACKED;
typedef struct AcpiDmarHardwareUnit AcpiDmarHardwareUnit;
+/* Type 2: Root Port ATS Capability Reporting Structure */
+struct AcpiDmarRootPortATS {
+ uint16_t type;
+ uint16_t length;
+ uint8_t flags;
+ uint8_t reserved;
+ uint16_t pci_segment;
+ AcpiDmarDeviceScope scope[0];
+} QEMU_PACKED;
+typedef struct AcpiDmarRootPortATS AcpiDmarRootPortATS;
+
/* Masks for Flags field above */
#define ACPI_DMAR_INCLUDE_PCI_ALL 1
+#define ACPI_DMAR_ATSR_ALL_PORTS 1
/*
* Input Output Remapping Table (IORT)
diff --git a/include/hw/acpi/memory_hotplug.h b/include/hw/acpi/memory_hotplug.h
index d2c7452397..db8ebc9cea 100644
--- a/include/hw/acpi/memory_hotplug.h
+++ b/include/hw/acpi/memory_hotplug.h
@@ -30,7 +30,7 @@ typedef struct MemHotplugState {
} MemHotplugState;
void acpi_memory_hotplug_init(MemoryRegion *as, Object *owner,
- MemHotplugState *state);
+ MemHotplugState *state, uint16_t io_base);
void acpi_memory_plug_cb(HotplugHandler *hotplug_dev, MemHotplugState *mem_st,
DeviceState *dev, Error **errp);
@@ -47,11 +47,7 @@ extern const VMStateDescription vmstate_memory_hotplug;
void acpi_memory_ospm_status(MemHotplugState *mem_st, ACPIOSTInfoList ***list);
-#define MEMORY_HOTPLUG_DEVICE "MHPD"
-#define MEMORY_SLOT_SCAN_METHOD "MSCN"
-#define MEMORY_HOTPLUG_HANDLER_PATH "\\_SB.PCI0." \
- MEMORY_HOTPLUG_DEVICE "." MEMORY_SLOT_SCAN_METHOD
-
-void build_memory_hotplug_aml(Aml *ctx, uint32_t nr_mem,
- uint16_t io_base, uint16_t io_len);
+void build_memory_hotplug_aml(Aml *table, uint32_t nr_mem,
+ const char *res_root,
+ const char *event_handler_method);
#endif
diff --git a/include/hw/acpi/pc-hotplug.h b/include/hw/acpi/pc-hotplug.h
index 6a8d268f84..31bc9191c3 100644
--- a/include/hw/acpi/pc-hotplug.h
+++ b/include/hw/acpi/pc-hotplug.h
@@ -29,29 +29,6 @@
#define PIIX4_CPU_HOTPLUG_IO_BASE 0xaf00
#define CPU_HOTPLUG_RESOURCE_DEVICE PRES
-#define ACPI_MEMORY_HOTPLUG_IO_LEN 24
#define ACPI_MEMORY_HOTPLUG_BASE 0x0a00
-#define MEMORY_SLOTS_NUMBER "MDNR"
-#define MEMORY_HOTPLUG_IO_REGION "HPMR"
-#define MEMORY_SLOT_ADDR_LOW "MRBL"
-#define MEMORY_SLOT_ADDR_HIGH "MRBH"
-#define MEMORY_SLOT_SIZE_LOW "MRLL"
-#define MEMORY_SLOT_SIZE_HIGH "MRLH"
-#define MEMORY_SLOT_PROXIMITY "MPX"
-#define MEMORY_SLOT_ENABLED "MES"
-#define MEMORY_SLOT_INSERT_EVENT "MINS"
-#define MEMORY_SLOT_REMOVE_EVENT "MRMV"
-#define MEMORY_SLOT_EJECT "MEJ"
-#define MEMORY_SLOT_SLECTOR "MSEL"
-#define MEMORY_SLOT_OST_EVENT "MOEV"
-#define MEMORY_SLOT_OST_STATUS "MOSC"
-#define MEMORY_SLOT_LOCK "MLCK"
-#define MEMORY_SLOT_STATUS_METHOD "MRST"
-#define MEMORY_SLOT_CRS_METHOD "MCRS"
-#define MEMORY_SLOT_OST_METHOD "MOST"
-#define MEMORY_SLOT_PROXIMITY_METHOD "MPXM"
-#define MEMORY_SLOT_EJECT_METHOD "MEJ0"
-#define MEMORY_SLOT_NOTIFY_METHOD "MTFY"
-
#endif
diff --git a/include/hw/arm/virt-acpi-build.h b/include/hw/arm/virt-acpi-build.h
deleted file mode 100644
index f5ec749b8f..0000000000
--- a/include/hw/arm/virt-acpi-build.h
+++ /dev/null
@@ -1,47 +0,0 @@
-/*
- *
- * Copyright (c) 2015 HUAWEI TECHNOLOGIES CO.,LTD.
- *
- * Author: Shannon Zhao <zhaoshenglong@huawei.com>
- *
- * This program is free software; you can redistribute it and/or modify it
- * under the terms and conditions of the GNU General Public License,
- * version 2 or later, as published by the Free Software Foundation.
- *
- * This program is distributed in the hope it will be useful, but WITHOUT
- * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
- * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
- * more details.
- *
- * You should have received a copy of the GNU General Public License along with
- * this program. If not, see <http://www.gnu.org/licenses/>.
- */
-
-#ifndef QEMU_VIRT_ACPI_BUILD_H
-#define QEMU_VIRT_ACPI_BUILD_H
-
-#include "qemu-common.h"
-#include "hw/arm/virt.h"
-#include "qemu/notify.h"
-
-#define ACPI_GICC_ENABLED 1
-
-typedef struct VirtGuestInfo {
- int smp_cpus;
- FWCfgState *fw_cfg;
- const MemMapEntry *memmap;
- const int *irqmap;
- bool use_highmem;
- int gic_version;
- bool no_its;
-} VirtGuestInfo;
-
-
-typedef struct VirtGuestInfoState {
- VirtGuestInfo info;
- Notifier machine_done;
-} VirtGuestInfoState;
-
-void virt_acpi_setup(VirtGuestInfo *guest_info);
-
-#endif
diff --git a/include/hw/arm/virt.h b/include/hw/arm/virt.h
index 9650193253..eb1c63d688 100644
--- a/include/hw/arm/virt.h
+++ b/include/hw/arm/virt.h
@@ -32,6 +32,9 @@
#include "qemu-common.h"
#include "exec/hwaddr.h"
+#include "qemu/notify.h"
+#include "hw/boards.h"
+#include "hw/arm/arm.h"
#define NUM_GICV2M_SPIS 64
#define NUM_VIRTIO_TRANSPORTS 32
@@ -74,5 +77,41 @@ typedef struct MemMapEntry {
hwaddr size;
} MemMapEntry;
+typedef struct {
+ MachineClass parent;
+ bool disallow_affinity_adjustment;
+ bool no_its;
+ bool no_pmu;
+ bool claim_edge_triggered_timers;
+} VirtMachineClass;
-#endif
+typedef struct {
+ MachineState parent;
+ Notifier machine_done;
+ FWCfgState *fw_cfg;
+ bool secure;
+ bool highmem;
+ int32_t gic_version;
+ struct arm_boot_info bootinfo;
+ const MemMapEntry *memmap;
+ const int *irqmap;
+ int smp_cpus;
+ void *fdt;
+ int fdt_size;
+ uint32_t clock_phandle;
+ uint32_t gic_phandle;
+ uint32_t msi_phandle;
+ bool using_psci;
+} VirtMachineState;
+
+#define TYPE_VIRT_MACHINE MACHINE_TYPE_NAME("virt")
+#define VIRT_MACHINE(obj) \
+ OBJECT_CHECK(VirtMachineState, (obj), TYPE_VIRT_MACHINE)
+#define VIRT_MACHINE_GET_CLASS(obj) \
+ OBJECT_GET_CLASS(VirtMachineClass, obj, TYPE_VIRT_MACHINE)
+#define VIRT_MACHINE_CLASS(klass) \
+ OBJECT_CLASS_CHECK(VirtMachineClass, klass, TYPE_VIRT_MACHINE)
+
+void virt_acpi_setup(VirtMachineState *vms);
+
+#endif /* QEMU_ARM_VIRT_H */
diff --git a/include/hw/i2c/i2c.h b/include/hw/i2c/i2c.h
index c4085aa366..2ce611d4c8 100644
--- a/include/hw/i2c/i2c.h
+++ b/include/hw/i2c/i2c.h
@@ -32,14 +32,22 @@ typedef struct I2CSlaveClass
/* Callbacks provided by the device. */
int (*init)(I2CSlave *dev);
- /* Master to slave. */
+ /* Master to slave. Returns non-zero for a NAK, 0 for success. */
int (*send)(I2CSlave *s, uint8_t data);
- /* Slave to master. */
+ /*
+ * Slave to master. This cannot fail, the device should always
+ * return something here. Negative values probably result in 0xff
+ * and a possible log from the driver, and shouldn't be used.
+ */
int (*recv)(I2CSlave *s);
- /* Notify the slave of a bus state change. */
- void (*event)(I2CSlave *s, enum i2c_event event);
+ /*
+ * Notify the slave of a bus state change. For start event,
+ * returns non-zero to NAK an operation. For other events the
+ * return code is not used and should be zero.
+ */
+ int (*event)(I2CSlave *s, enum i2c_event event);
} I2CSlaveClass;
struct I2CSlave
diff --git a/include/hw/i386/x86-iommu.h b/include/hw/i386/x86-iommu.h
index 0c89d9835b..361c07cdc6 100644
--- a/include/hw/i386/x86-iommu.h
+++ b/include/hw/i386/x86-iommu.h
@@ -73,6 +73,7 @@ typedef struct IEC_Notifier IEC_Notifier;
struct X86IOMMUState {
SysBusDevice busdev;
bool intr_supported; /* Whether vIOMMU supports IR */
+ bool dt_supported; /* Whether vIOMMU supports DT */
IommuType type; /* IOMMU type - AMD/Intel */
QLIST_HEAD(, IEC_Notifier) iec_notifiers; /* IEC notify list */
};
diff --git a/include/hw/pci/pcie.h b/include/hw/pci/pcie.h
index 056d25e53c..b08451d2c5 100644
--- a/include/hw/pci/pcie.h
+++ b/include/hw/pci/pcie.h
@@ -74,6 +74,9 @@ struct PCIExpressDevice {
/* AER */
uint16_t aer_cap;
PCIEAERLog aer_log;
+
+ /* Offset of ATS capability in config space */
+ uint16_t ats_cap;
};
#define COMPAT_PROP_PCP "power_controller_present"
@@ -120,6 +123,7 @@ void pcie_add_capability(PCIDevice *dev,
void pcie_ari_init(PCIDevice *dev, uint16_t offset, uint16_t nextfn);
void pcie_dev_ser_num_init(PCIDevice *dev, uint16_t offset, uint64_t ser_num);
+void pcie_ats_init(PCIDevice *dev, uint16_t offset);
extern const VMStateDescription vmstate_pcie_device;
diff --git a/include/hw/pci/pcie_aer.h b/include/hw/pci/pcie_aer.h
index c2ee4e2bdb..526802bd31 100644
--- a/include/hw/pci/pcie_aer.h
+++ b/include/hw/pci/pcie_aer.h
@@ -44,7 +44,6 @@ struct PCIEAERLog {
*/
#define PCIE_AER_LOG_MAX_DEFAULT 8
#define PCIE_AER_LOG_MAX_LIMIT 128
-#define PCIE_AER_LOG_MAX_UNSET 0xffff
uint16_t log_max;
/* Error log. log_max-sized array */
@@ -87,7 +86,8 @@ struct PCIEAERErr {
extern const VMStateDescription vmstate_pcie_aer_log;
-int pcie_aer_init(PCIDevice *dev, uint16_t offset, uint16_t size);
+int pcie_aer_init(PCIDevice *dev, uint8_t cap_ver, uint16_t offset,
+ uint16_t size, Error **errp);
void pcie_aer_exit(PCIDevice *dev);
void pcie_aer_write_config(PCIDevice *dev,
uint32_t addr, uint32_t val, int len);
diff --git a/include/hw/virtio/vhost-backend.h b/include/hw/virtio/vhost-backend.h
index 6e90703cad..30abc11cf1 100644
--- a/include/hw/virtio/vhost-backend.h
+++ b/include/hw/virtio/vhost-backend.h
@@ -32,6 +32,7 @@ typedef int (*vhost_backend_memslots_limit)(struct vhost_dev *dev);
typedef int (*vhost_net_set_backend_op)(struct vhost_dev *dev,
struct vhost_vring_file *file);
+typedef int (*vhost_net_set_mtu_op)(struct vhost_dev *dev, uint16_t mtu);
typedef int (*vhost_scsi_set_endpoint_op)(struct vhost_dev *dev,
struct vhost_scsi_target *target);
typedef int (*vhost_scsi_clear_endpoint_op)(struct vhost_dev *dev,
@@ -83,6 +84,7 @@ typedef struct VhostOps {
vhost_backend_cleanup vhost_backend_cleanup;
vhost_backend_memslots_limit vhost_backend_memslots_limit;
vhost_net_set_backend_op vhost_net_set_backend;
+ vhost_net_set_mtu_op vhost_net_set_mtu;
vhost_scsi_set_endpoint_op vhost_scsi_set_endpoint;
vhost_scsi_clear_endpoint_op vhost_scsi_clear_endpoint;
vhost_scsi_get_abi_version_op vhost_scsi_get_abi_version;
diff --git a/include/hw/virtio/virtio-access.h b/include/hw/virtio/virtio-access.h
index 440b4555ea..91ae14d254 100644
--- a/include/hw/virtio/virtio-access.h
+++ b/include/hw/virtio/virtio-access.h
@@ -17,6 +17,7 @@
#define QEMU_VIRTIO_ACCESS_H
#include "hw/virtio/virtio.h"
+#include "hw/virtio/virtio-bus.h"
#include "exec/address-spaces.h"
#if defined(TARGET_PPC64) || defined(TARGET_ARM)
@@ -40,45 +41,55 @@ static inline bool virtio_access_is_big_endian(VirtIODevice *vdev)
static inline uint16_t virtio_lduw_phys(VirtIODevice *vdev, hwaddr pa)
{
+ AddressSpace *dma_as = vdev->dma_as;
+
if (virtio_access_is_big_endian(vdev)) {
- return lduw_be_phys(&address_space_memory, pa);
+ return lduw_be_phys(dma_as, pa);
}
- return lduw_le_phys(&address_space_memory, pa);
+ return lduw_le_phys(dma_as, pa);
}
static inline uint32_t virtio_ldl_phys(VirtIODevice *vdev, hwaddr pa)
{
+ AddressSpace *dma_as = vdev->dma_as;
+
if (virtio_access_is_big_endian(vdev)) {
- return ldl_be_phys(&address_space_memory, pa);
+ return ldl_be_phys(dma_as, pa);
}
- return ldl_le_phys(&address_space_memory, pa);
+ return ldl_le_phys(dma_as, pa);
}
static inline uint64_t virtio_ldq_phys(VirtIODevice *vdev, hwaddr pa)
{
+ AddressSpace *dma_as = vdev->dma_as;
+
if (virtio_access_is_big_endian(vdev)) {
- return ldq_be_phys(&address_space_memory, pa);
+ return ldq_be_phys(dma_as, pa);
}
- return ldq_le_phys(&address_space_memory, pa);
+ return ldq_le_phys(dma_as, pa);
}
static inline void virtio_stw_phys(VirtIODevice *vdev, hwaddr pa,
uint16_t value)
{
+ AddressSpace *dma_as = vdev->dma_as;
+
if (virtio_access_is_big_endian(vdev)) {
- stw_be_phys(&address_space_memory, pa, value);
+ stw_be_phys(dma_as, pa, value);
} else {
- stw_le_phys(&address_space_memory, pa, value);
+ stw_le_phys(dma_as, pa, value);
}
}
static inline void virtio_stl_phys(VirtIODevice *vdev, hwaddr pa,
uint32_t value)
{
+ AddressSpace *dma_as = vdev->dma_as;
+
if (virtio_access_is_big_endian(vdev)) {
- stl_be_phys(&address_space_memory, pa, value);
+ stl_be_phys(dma_as, pa, value);
} else {
- stl_le_phys(&address_space_memory, pa, value);
+ stl_le_phys(dma_as, pa, value);
}
}
diff --git a/include/hw/virtio/virtio-bus.h b/include/hw/virtio/virtio-bus.h
index 8a51e2c564..a63c1d216d 100644
--- a/include/hw/virtio/virtio-bus.h
+++ b/include/hw/virtio/virtio-bus.h
@@ -88,6 +88,7 @@ typedef struct VirtioBusClass {
* Note that changing this will break migration for this transport.
*/
bool has_variable_vring_alignment;
+ AddressSpace *(*get_dma_as)(DeviceState *d);
} VirtioBusClass;
struct VirtioBusState {
diff --git a/include/hw/virtio/virtio-net.h b/include/hw/virtio/virtio-net.h
index 0ced975c57..8ea56a8f60 100644
--- a/include/hw/virtio/virtio-net.h
+++ b/include/hw/virtio/virtio-net.h
@@ -36,6 +36,7 @@ typedef struct virtio_net_conf
int32_t txburst;
char *tx;
uint16_t rx_queue_size;
+ uint16_t mtu;
} virtio_net_conf;
/* Maximum packet size we can receive from tap device: header + 64k */
diff --git a/include/hw/virtio/virtio.h b/include/hw/virtio/virtio.h
index ab0e030cc4..e5541c61f7 100644
--- a/include/hw/virtio/virtio.h
+++ b/include/hw/virtio/virtio.h
@@ -92,6 +92,7 @@ struct VirtIODevice
char *bus_name;
uint8_t device_endian;
bool use_guest_notifier_mask;
+ AddressSpace *dma_as;
QLIST_HEAD(, VirtQueue) *vector_queues;
};
@@ -170,9 +171,10 @@ bool virtqueue_rewind(VirtQueue *vq, unsigned int num);
void virtqueue_fill(VirtQueue *vq, const VirtQueueElement *elem,
unsigned int len, unsigned int idx);
-void virtqueue_map(VirtQueueElement *elem);
+void virtqueue_map(VirtIODevice *vdev, VirtQueueElement *elem);
void *virtqueue_pop(VirtQueue *vq, size_t sz);
-void *qemu_get_virtqueue_element(QEMUFile *f, size_t sz);
+unsigned int virtqueue_drop_all(VirtQueue *vq);
+void *qemu_get_virtqueue_element(VirtIODevice *vdev, QEMUFile *f, size_t sz);
void qemu_put_virtqueue_element(QEMUFile *f, VirtQueueElement *elem);
int virtqueue_avail_bytes(VirtQueue *vq, unsigned int in_bytes,
unsigned int out_bytes);
@@ -255,7 +257,9 @@ typedef struct VirtIORNGConf VirtIORNGConf;
DEFINE_PROP_BIT64("notify_on_empty", _state, _field, \
VIRTIO_F_NOTIFY_ON_EMPTY, true), \
DEFINE_PROP_BIT64("any_layout", _state, _field, \
- VIRTIO_F_ANY_LAYOUT, true)
+ VIRTIO_F_ANY_LAYOUT, true), \
+ DEFINE_PROP_BIT64("iommu_platform", _state, _field, \
+ VIRTIO_F_IOMMU_PLATFORM, false)
hwaddr virtio_queue_get_desc_addr(VirtIODevice *vdev, int n);
hwaddr virtio_queue_get_avail_addr(VirtIODevice *vdev, int n);
@@ -266,6 +270,7 @@ hwaddr virtio_queue_get_used_size(VirtIODevice *vdev, int n);
uint16_t virtio_queue_get_last_avail_idx(VirtIODevice *vdev, int n);
void virtio_queue_set_last_avail_idx(VirtIODevice *vdev, int n, uint16_t idx);
void virtio_queue_invalidate_signalled_used(VirtIODevice *vdev, int n);
+void virtio_queue_update_used_idx(VirtIODevice *vdev, int n);
VirtQueue *virtio_get_queue(VirtIODevice *vdev, int n);
uint16_t virtio_get_queue_index(VirtQueue *vq);
EventNotifier *virtio_queue_get_guest_notifier(VirtQueue *vq);
diff --git a/include/migration/vmstate.h b/include/migration/vmstate.h
index 1638ee57f7..2125829a16 100644
--- a/include/migration/vmstate.h
+++ b/include/migration/vmstate.h
@@ -186,6 +186,12 @@ enum VMStateFlags {
VMS_MULTIPLY_ELEMENTS = 0x4000,
};
+typedef enum {
+ MIG_PRI_DEFAULT = 0,
+ MIG_PRI_IOMMU, /* Must happen before PCI devices */
+ MIG_PRI_MAX,
+} MigrationPriority;
+
typedef struct {
const char *name;
size_t offset;
@@ -207,6 +213,7 @@ struct VMStateDescription {
int version_id;
int minimum_version_id;
int minimum_version_id_old;
+ MigrationPriority priority;
LoadStateHandler *load_state_old;
int (*pre_load)(void *opaque);
int (*post_load)(void *opaque, int version_id);
diff --git a/include/net/vhost_net.h b/include/net/vhost_net.h
index 5a08efffef..afc1499eb9 100644
--- a/include/net/vhost_net.h
+++ b/include/net/vhost_net.h
@@ -35,4 +35,6 @@ int vhost_set_vring_enable(NetClientState * nc, int enable);
uint64_t vhost_net_get_acked_features(VHostNetState *net);
+int vhost_net_set_mtu(struct vhost_net *net, uint16_t mtu);
+
#endif
diff --git a/include/qemu/coroutine.h b/include/qemu/coroutine.h
index e6a60d55fd..12584ed1b7 100644
--- a/include/qemu/coroutine.h
+++ b/include/qemu/coroutine.h
@@ -71,6 +71,12 @@ Coroutine *qemu_coroutine_create(CoroutineEntry *entry, void *opaque);
void qemu_coroutine_enter(Coroutine *coroutine);
/**
+ * Transfer control to a coroutine if it's not active (i.e. part of the call
+ * stack of the running coroutine). Otherwise, do nothing.
+ */
+void qemu_coroutine_enter_if_inactive(Coroutine *co);
+
+/**
* Transfer control back to a coroutine's caller
*
* This function does not return until the coroutine is re-entered using
diff --git a/include/standard-headers/linux/pci_regs.h b/include/standard-headers/linux/pci_regs.h
index e5a2e68b22..be5b066aa4 100644
--- a/include/standard-headers/linux/pci_regs.h
+++ b/include/standard-headers/linux/pci_regs.h
@@ -678,6 +678,7 @@
#define PCI_EXT_CAP_ID_MAX PCI_EXT_CAP_ID_PTM
#define PCI_EXT_CAP_DSN_SIZEOF 12
+#define PCI_EXT_CAP_ATS_SIZEOF 8
#define PCI_EXT_CAP_MCAST_ENDPOINT_SIZEOF 40
/* Advanced Error Reporting */
diff --git a/include/sysemu/cryptodev.h b/include/sysemu/cryptodev.h
index 84526c0d35..a9d0d1ee25 100644
--- a/include/sysemu/cryptodev.h
+++ b/include/sysemu/cryptodev.h
@@ -202,6 +202,8 @@ struct CryptoDevBackend {
Object parent_obj;
bool ready;
+ /* Tag the cryptodev backend is used by virtio-crypto or not */
+ bool is_used;
CryptoDevBackendConf conf;
};
@@ -295,4 +297,44 @@ int cryptodev_backend_crypto_operation(
void *opaque,
uint32_t queue_index, Error **errp);
+/**
+ * cryptodev_backend_set_used:
+ * @backend: the cryptodev backend object
+ * @used: ture or false
+ *
+ * Set the cryptodev backend is used by virtio-crypto or not
+ */
+void cryptodev_backend_set_used(CryptoDevBackend *backend, bool used);
+
+/**
+ * cryptodev_backend_is_used:
+ * @backend: the cryptodev backend object
+ *
+ * Return the status that the cryptodev backend is used
+ * by virtio-crypto or not
+ *
+ * Returns: true on used, or false on not used
+ */
+bool cryptodev_backend_is_used(CryptoDevBackend *backend);
+
+/**
+ * cryptodev_backend_set_ready:
+ * @backend: the cryptodev backend object
+ * @ready: ture or false
+ *
+ * Set the cryptodev backend is ready or not, which is called
+ * by the children of the cryptodev banckend interface.
+ */
+void cryptodev_backend_set_ready(CryptoDevBackend *backend, bool ready);
+
+/**
+ * cryptodev_backend_is_ready:
+ * @backend: the cryptodev backend object
+ *
+ * Return the status that the cryptodev backend is ready or not
+ *
+ * Returns: true on ready, or false on not ready
+ */
+bool cryptodev_backend_is_ready(CryptoDevBackend *backend);
+
#endif /* CRYPTODEV_H */
diff --git a/include/sysemu/replay.h b/include/sysemu/replay.h
index f80d6d28e8..abb35ca8c9 100644
--- a/include/sysemu/replay.h
+++ b/include/sysemu/replay.h
@@ -39,6 +39,8 @@ enum ReplayCheckpoint {
};
typedef enum ReplayCheckpoint ReplayCheckpoint;
+typedef struct ReplayNetState ReplayNetState;
+
extern ReplayMode replay_mode;
/* Replay process control functions */
@@ -137,4 +139,14 @@ void replay_char_read_all_save_error(int res);
/*! Writes character read_all execution result into the replay log. */
void replay_char_read_all_save_buf(uint8_t *buf, int offset);
+/* Network */
+
+/*! Registers replay network filter attached to some backend. */
+ReplayNetState *replay_register_net(NetFilterState *nfs);
+/*! Unregisters replay network filter. */
+void replay_unregister_net(ReplayNetState *rns);
+/*! Called to write network packet to the replay log. */
+void replay_net_packet_event(ReplayNetState *rns, unsigned flags,
+ const struct iovec *iov, int iovcnt);
+
#endif
diff --git a/memory.c b/memory.c
index 33110e9698..2bfc37f65c 100644
--- a/memory.c
+++ b/memory.c
@@ -1603,6 +1603,11 @@ static void memory_region_update_iommu_notify_flags(MemoryRegion *mr)
void memory_region_register_iommu_notifier(MemoryRegion *mr,
IOMMUNotifier *n)
{
+ if (mr->alias) {
+ memory_region_register_iommu_notifier(mr->alias, n);
+ return;
+ }
+
/* We need to register for at least one bitfield */
assert(n->notifier_flags != IOMMU_NOTIFIER_NONE);
QLIST_INSERT_HEAD(&mr->iommu_notify, n, node);
@@ -1643,6 +1648,10 @@ void memory_region_iommu_replay(MemoryRegion *mr, IOMMUNotifier *n,
void memory_region_unregister_iommu_notifier(MemoryRegion *mr,
IOMMUNotifier *n)
{
+ if (mr->alias) {
+ memory_region_unregister_iommu_notifier(mr->alias, n);
+ return;
+ }
QLIST_REMOVE(n, node);
memory_region_update_iommu_notify_flags(mr);
}
diff --git a/migration/savevm.c b/migration/savevm.c
index 0363372acc..f9c06e9f96 100644
--- a/migration/savevm.c
+++ b/migration/savevm.c
@@ -532,6 +532,34 @@ static int calculate_compat_instance_id(const char *idstr)
return instance_id;
}
+static inline MigrationPriority save_state_priority(SaveStateEntry *se)
+{
+ if (se->vmsd) {
+ return se->vmsd->priority;
+ }
+ return MIG_PRI_DEFAULT;
+}
+
+static void savevm_state_handler_insert(SaveStateEntry *nse)
+{
+ MigrationPriority priority = save_state_priority(nse);
+ SaveStateEntry *se;
+
+ assert(priority <= MIG_PRI_MAX);
+
+ QTAILQ_FOREACH(se, &savevm_state.handlers, entry) {
+ if (save_state_priority(se) < priority) {
+ break;
+ }
+ }
+
+ if (se) {
+ QTAILQ_INSERT_BEFORE(se, nse, entry);
+ } else {
+ QTAILQ_INSERT_TAIL(&savevm_state.handlers, nse, entry);
+ }
+}
+
/* TODO: Individual devices generally have very little idea about the rest
of the system, so instance_id should be removed/replaced.
Meanwhile pass -1 as instance_id if you do not already have a clearly
@@ -578,8 +606,7 @@ int register_savevm_live(DeviceState *dev,
se->instance_id = instance_id;
}
assert(!se->compat || se->instance_id == 0);
- /* add at the end of list */
- QTAILQ_INSERT_TAIL(&savevm_state.handlers, se, entry);
+ savevm_state_handler_insert(se);
return 0;
}
@@ -662,8 +689,7 @@ int vmstate_register_with_alias_id(DeviceState *dev, int instance_id,
se->instance_id = instance_id;
}
assert(!se->compat || se->instance_id == 0);
- /* add at the end of list */
- QTAILQ_INSERT_TAIL(&savevm_state.handlers, se, entry);
+ savevm_state_handler_insert(se);
return 0;
}
diff --git a/net/Makefile.objs b/net/Makefile.objs
index 2a80df5fa7..2e2fd43014 100644
--- a/net/Makefile.objs
+++ b/net/Makefile.objs
@@ -19,3 +19,4 @@ common-obj-y += filter-mirror.o
common-obj-y += colo-compare.o
common-obj-y += colo.o
common-obj-y += filter-rewriter.o
+common-obj-y += filter-replay.o
diff --git a/net/filter-replay.c b/net/filter-replay.c
new file mode 100644
index 0000000000..cff65f86e5
--- /dev/null
+++ b/net/filter-replay.c
@@ -0,0 +1,92 @@
+/*
+ * filter-replay.c
+ *
+ * Copyright (c) 2010-2016 Institute for System Programming
+ * of the Russian Academy of Sciences.
+ *
+ * This work is licensed under the terms of the GNU GPL, version 2 or later.
+ * See the COPYING file in the top-level directory.
+ *
+ */
+
+#include "qemu/osdep.h"
+#include "clients.h"
+#include "qapi/error.h"
+#include "qemu-common.h"
+#include "qemu/error-report.h"
+#include "qemu/iov.h"
+#include "qemu/log.h"
+#include "qemu/timer.h"
+#include "qapi/visitor.h"
+#include "net/filter.h"
+#include "sysemu/replay.h"
+
+#define TYPE_FILTER_REPLAY "filter-replay"
+
+#define FILTER_REPLAY(obj) \
+ OBJECT_CHECK(NetFilterReplayState, (obj), TYPE_FILTER_REPLAY)
+
+struct NetFilterReplayState {
+ NetFilterState nfs;
+ ReplayNetState *rns;
+};
+typedef struct NetFilterReplayState NetFilterReplayState;
+
+static ssize_t filter_replay_receive_iov(NetFilterState *nf,
+ NetClientState *sndr,
+ unsigned flags,
+ const struct iovec *iov,
+ int iovcnt, NetPacketSent *sent_cb)
+{
+ NetFilterReplayState *nfrs = FILTER_REPLAY(nf);
+ switch (replay_mode) {
+ case REPLAY_MODE_RECORD:
+ if (nf->netdev == sndr) {
+ replay_net_packet_event(nfrs->rns, flags, iov, iovcnt);
+ return iov_size(iov, iovcnt);
+ }
+ return 0;
+ case REPLAY_MODE_PLAY:
+ /* Drop all packets in replay mode.
+ Packets from the log will be injected by the replay module. */
+ return iov_size(iov, iovcnt);
+ default:
+ /* Pass all the packets. */
+ return 0;
+ }
+}
+
+static void filter_replay_instance_init(Object *obj)
+{
+ NetFilterReplayState *nfrs = FILTER_REPLAY(obj);
+ nfrs->rns = replay_register_net(&nfrs->nfs);
+}
+
+static void filter_replay_instance_finalize(Object *obj)
+{
+ NetFilterReplayState *nfrs = FILTER_REPLAY(obj);
+ replay_unregister_net(nfrs->rns);
+}
+
+static void filter_replay_class_init(ObjectClass *oc, void *data)
+{
+ NetFilterClass *nfc = NETFILTER_CLASS(oc);
+
+ nfc->receive_iov = filter_replay_receive_iov;
+}
+
+static const TypeInfo filter_replay_info = {
+ .name = TYPE_FILTER_REPLAY,
+ .parent = TYPE_NETFILTER,
+ .class_init = filter_replay_class_init,
+ .instance_init = filter_replay_instance_init,
+ .instance_finalize = filter_replay_instance_finalize,
+ .instance_size = sizeof(NetFilterReplayState),
+};
+
+static void filter_replay_register_types(void)
+{
+ type_register_static(&filter_replay_info);
+}
+
+type_init(filter_replay_register_types);
diff --git a/qemu-img.c b/qemu-img.c
index 6949b73ca5..5df66fe661 100644
--- a/qemu-img.c
+++ b/qemu-img.c
@@ -3559,20 +3559,23 @@ static void bench_cb(void *opaque, int ret)
}
while (b->n > b->in_flight && b->in_flight < b->nrreq) {
+ int64_t offset = b->offset;
+ /* blk_aio_* might look for completed I/Os and kick bench_cb
+ * again, so make sure this operation is counted by in_flight
+ * and b->offset is ready for the next submission.
+ */
+ b->in_flight++;
+ b->offset += b->step;
+ b->offset %= b->image_size;
if (b->write) {
- acb = blk_aio_pwritev(b->blk, b->offset, b->qiov, 0,
- bench_cb, b);
+ acb = blk_aio_pwritev(b->blk, offset, b->qiov, 0, bench_cb, b);
} else {
- acb = blk_aio_preadv(b->blk, b->offset, b->qiov, 0,
- bench_cb, b);
+ acb = blk_aio_preadv(b->blk, offset, b->qiov, 0, bench_cb, b);
}
if (!acb) {
error_report("Failed to issue request");
exit(EXIT_FAILURE);
}
- b->in_flight++;
- b->offset += b->step;
- b->offset %= b->image_size;
}
}
diff --git a/replay/Makefile.objs b/replay/Makefile.objs
index c8ad3ebb89..b2afd4030a 100644
--- a/replay/Makefile.objs
+++ b/replay/Makefile.objs
@@ -5,3 +5,4 @@ common-obj-y += replay-time.o
common-obj-y += replay-input.o
common-obj-y += replay-char.o
common-obj-y += replay-snapshot.o
+common-obj-y += replay-net.o
diff --git a/replay/replay-events.c b/replay/replay-events.c
index c513913671..94a6dcccfc 100644
--- a/replay/replay-events.c
+++ b/replay/replay-events.c
@@ -54,6 +54,9 @@ static void replay_run_event(Event *event)
case REPLAY_ASYNC_EVENT_BLOCK:
aio_bh_call(event->opaque);
break;
+ case REPLAY_ASYNC_EVENT_NET:
+ replay_event_net_run(event->opaque);
+ break;
default:
error_report("Replay: invalid async event ID (%d) in the queue",
event->event_kind);
@@ -189,6 +192,9 @@ static void replay_save_event(Event *event, int checkpoint)
case REPLAY_ASYNC_EVENT_BLOCK:
replay_put_qword(event->id);
break;
+ case REPLAY_ASYNC_EVENT_NET:
+ replay_event_net_save(event->opaque);
+ break;
default:
error_report("Unknown ID %" PRId64 " of replay event", event->id);
exit(1);
@@ -252,6 +258,11 @@ static Event *replay_read_event(int checkpoint)
read_id = replay_get_qword();
}
break;
+ case REPLAY_ASYNC_EVENT_NET:
+ event = g_malloc0(sizeof(Event));
+ event->event_kind = read_event_kind;
+ event->opaque = replay_event_net_load();
+ return event;
default:
error_report("Unknown ID %d of replay event", read_event_kind);
exit(1);
diff --git a/replay/replay-internal.h b/replay/replay-internal.h
index 9117e442d0..c26d0795f2 100644
--- a/replay/replay-internal.h
+++ b/replay/replay-internal.h
@@ -50,6 +50,7 @@ enum ReplayAsyncEventKind {
REPLAY_ASYNC_EVENT_INPUT_SYNC,
REPLAY_ASYNC_EVENT_CHAR_READ,
REPLAY_ASYNC_EVENT_BLOCK,
+ REPLAY_ASYNC_EVENT_NET,
REPLAY_ASYNC_COUNT
};
@@ -161,6 +162,15 @@ void replay_event_char_read_save(void *opaque);
/*! Reads char event read from the file. */
void *replay_event_char_read_load(void);
+/* Network devices */
+
+/*! Called to run network event. */
+void replay_event_net_run(void *opaque);
+/*! Writes network event to the file. */
+void replay_event_net_save(void *opaque);
+/*! Reads network from the file. */
+void *replay_event_net_load(void);
+
/* VMState-related functions */
/* Registers replay VMState.
diff --git a/replay/replay-net.c b/replay/replay-net.c
new file mode 100644
index 0000000000..80b7054156
--- /dev/null
+++ b/replay/replay-net.c
@@ -0,0 +1,102 @@
+/*
+ * replay-net.c
+ *
+ * Copyright (c) 2010-2016 Institute for System Programming
+ * of the Russian Academy of Sciences.
+ *
+ * This work is licensed under the terms of the GNU GPL, version 2 or later.
+ * See the COPYING file in the top-level directory.
+ *
+ */
+
+#include "qemu/osdep.h"
+#include "qemu/error-report.h"
+#include "sysemu/replay.h"
+#include "replay-internal.h"
+#include "sysemu/sysemu.h"
+#include "net/net.h"
+#include "net/filter.h"
+#include "qemu/iov.h"
+
+struct ReplayNetState {
+ NetFilterState *nfs;
+ int id;
+};
+
+typedef struct NetEvent {
+ uint8_t id;
+ uint32_t flags;
+ uint8_t *data;
+ size_t size;
+} NetEvent;
+
+static NetFilterState **network_filters;
+static int network_filters_count;
+
+ReplayNetState *replay_register_net(NetFilterState *nfs)
+{
+ ReplayNetState *rns = g_new0(ReplayNetState, 1);
+ rns->nfs = nfs;
+ rns->id = network_filters_count++;
+ network_filters = g_realloc(network_filters,
+ network_filters_count
+ * sizeof(*network_filters));
+ network_filters[network_filters_count - 1] = nfs;
+ return rns;
+}
+
+void replay_unregister_net(ReplayNetState *rns)
+{
+ network_filters[rns->id] = NULL;
+ g_free(rns);
+}
+
+void replay_net_packet_event(ReplayNetState *rns, unsigned flags,
+ const struct iovec *iov, int iovcnt)
+{
+ NetEvent *event = g_new(NetEvent, 1);
+ event->flags = flags;
+ event->data = g_malloc(iov_size(iov, iovcnt));
+ event->size = iov_size(iov, iovcnt);
+ event->id = rns->id;
+ iov_to_buf(iov, iovcnt, 0, event->data, event->size);
+
+ replay_add_event(REPLAY_ASYNC_EVENT_NET, event, NULL, 0);
+}
+
+void replay_event_net_run(void *opaque)
+{
+ NetEvent *event = opaque;
+ struct iovec iov = {
+ .iov_base = (void *)event->data,
+ .iov_len = event->size
+ };
+
+ assert(event->id < network_filters_count);
+
+ qemu_netfilter_pass_to_next(network_filters[event->id]->netdev,
+ event->flags, &iov, 1, network_filters[event->id]);
+
+ g_free(event->data);
+ g_free(event);
+}
+
+void replay_event_net_save(void *opaque)
+{
+ NetEvent *event = opaque;
+
+ replay_put_byte(event->id);
+ replay_put_dword(event->flags);
+ replay_put_array(event->data, event->size);
+}
+
+void *replay_event_net_load(void)
+{
+ NetEvent *event = g_new(NetEvent, 1);
+
+ event->id = replay_get_byte();
+ event->flags = replay_get_dword();
+ replay_get_array_alloc(&event->data, &event->size);
+
+ return event;
+}
diff --git a/replay/replay.c b/replay/replay.c
index c797aeae8a..7f27cf17b0 100644
--- a/replay/replay.c
+++ b/replay/replay.c
@@ -21,7 +21,7 @@
/* Current version of the replay mechanism.
Increase it when file format changes. */
-#define REPLAY_VERSION 0xe02004
+#define REPLAY_VERSION 0xe02005
/* Size of replay log header */
#define HEADER_SIZE (sizeof(uint32_t) + sizeof(uint64_t))
diff --git a/tcg/mips/tcg-target.h b/tcg/mips/tcg-target.h
index 3aeac87614..d352c97389 100644
--- a/tcg/mips/tcg-target.h
+++ b/tcg/mips/tcg-target.h
@@ -27,6 +27,14 @@
#ifndef MIPS_TCG_TARGET_H
#define MIPS_TCG_TARGET_H
+#if _MIPS_SIM == _ABIO32
+# define TCG_TARGET_REG_BITS 32
+#elif _MIPS_SIM == _ABIN32 || _MIPS_SIM == _ABI64
+# define TCG_TARGET_REG_BITS 64
+#else
+# error "Unknown ABI"
+#endif
+
#define TCG_TARGET_INSN_UNIT_SIZE 4
#define TCG_TARGET_TLB_DISPLACEMENT_BITS 16
#define TCG_TARGET_NB_REGS 32
@@ -70,9 +78,13 @@ typedef enum {
} TCGReg;
/* used for function call generation */
-#define TCG_TARGET_STACK_ALIGN 8
-#define TCG_TARGET_CALL_STACK_OFFSET 16
-#define TCG_TARGET_CALL_ALIGN_ARGS 1
+#define TCG_TARGET_STACK_ALIGN 16
+#if _MIPS_SIM == _ABIO32
+# define TCG_TARGET_CALL_STACK_OFFSET 16
+#else
+# define TCG_TARGET_CALL_STACK_OFFSET 0
+#endif
+#define TCG_TARGET_CALL_ALIGN_ARGS 1
/* MOVN/MOVZ instructions detection */
#if (defined(__mips_isa_rev) && (__mips_isa_rev >= 1)) || \
@@ -117,21 +129,61 @@ extern bool use_mips32r2_instructions;
#define TCG_TARGET_HAS_muls2_i32 (!use_mips32r6_instructions)
#define TCG_TARGET_HAS_muluh_i32 1
#define TCG_TARGET_HAS_mulsh_i32 1
+#define TCG_TARGET_HAS_bswap32_i32 1
+
+#if TCG_TARGET_REG_BITS == 64
+#define TCG_TARGET_HAS_add2_i32 0
+#define TCG_TARGET_HAS_sub2_i32 0
+#define TCG_TARGET_HAS_extrl_i64_i32 1
+#define TCG_TARGET_HAS_extrh_i64_i32 1
+#define TCG_TARGET_HAS_div_i64 1
+#define TCG_TARGET_HAS_rem_i64 1
+#define TCG_TARGET_HAS_not_i64 1
+#define TCG_TARGET_HAS_nor_i64 1
+#define TCG_TARGET_HAS_andc_i64 0
+#define TCG_TARGET_HAS_orc_i64 0
+#define TCG_TARGET_HAS_eqv_i64 0
+#define TCG_TARGET_HAS_nand_i64 0
+#define TCG_TARGET_HAS_add2_i64 0
+#define TCG_TARGET_HAS_sub2_i64 0
+#define TCG_TARGET_HAS_mulu2_i64 (!use_mips32r6_instructions)
+#define TCG_TARGET_HAS_muls2_i64 (!use_mips32r6_instructions)
+#define TCG_TARGET_HAS_muluh_i64 1
+#define TCG_TARGET_HAS_mulsh_i64 1
+#define TCG_TARGET_HAS_ext32s_i64 1
+#define TCG_TARGET_HAS_ext32u_i64 1
+#endif
/* optional instructions detected at runtime */
#define TCG_TARGET_HAS_movcond_i32 use_movnz_instructions
#define TCG_TARGET_HAS_bswap16_i32 use_mips32r2_instructions
-#define TCG_TARGET_HAS_bswap32_i32 use_mips32r2_instructions
#define TCG_TARGET_HAS_deposit_i32 use_mips32r2_instructions
#define TCG_TARGET_HAS_ext8s_i32 use_mips32r2_instructions
#define TCG_TARGET_HAS_ext16s_i32 use_mips32r2_instructions
#define TCG_TARGET_HAS_rot_i32 use_mips32r2_instructions
+#if TCG_TARGET_REG_BITS == 64
+#define TCG_TARGET_HAS_movcond_i64 use_movnz_instructions
+#define TCG_TARGET_HAS_bswap16_i64 use_mips32r2_instructions
+#define TCG_TARGET_HAS_bswap32_i64 use_mips32r2_instructions
+#define TCG_TARGET_HAS_bswap64_i64 use_mips32r2_instructions
+#define TCG_TARGET_HAS_deposit_i64 use_mips32r2_instructions
+#define TCG_TARGET_HAS_ext8s_i64 use_mips32r2_instructions
+#define TCG_TARGET_HAS_ext16s_i64 use_mips32r2_instructions
+#define TCG_TARGET_HAS_rot_i64 use_mips32r2_instructions
+#endif
+
/* optional instructions automatically implemented */
#define TCG_TARGET_HAS_neg_i32 0 /* sub rd, zero, rt */
#define TCG_TARGET_HAS_ext8u_i32 0 /* andi rt, rs, 0xff */
#define TCG_TARGET_HAS_ext16u_i32 0 /* andi rt, rs, 0xffff */
+#if TCG_TARGET_REG_BITS == 64
+#define TCG_TARGET_HAS_neg_i64 0 /* sub rd, zero, rt */
+#define TCG_TARGET_HAS_ext8u_i64 0 /* andi rt, rs, 0xff */
+#define TCG_TARGET_HAS_ext16u_i64 0 /* andi rt, rs, 0xffff */
+#endif
+
#ifdef __OpenBSD__
#include <machine/sysarch.h>
#else
diff --git a/tcg/mips/tcg-target.inc.c b/tcg/mips/tcg-target.inc.c
index abce6026f8..5b2fe988de 100644
--- a/tcg/mips/tcg-target.inc.c
+++ b/tcg/mips/tcg-target.inc.c
@@ -32,8 +32,16 @@
# define MIPS_BE 0
#endif
-#define LO_OFF (MIPS_BE * 4)
-#define HI_OFF (4 - LO_OFF)
+#if TCG_TARGET_REG_BITS == 32
+# define LO_OFF (MIPS_BE * 4)
+# define HI_OFF (4 - LO_OFF)
+#else
+/* To assert at compile-time that these values are never used
+ for TCG_TARGET_REG_BITS == 64. */
+/* extern */ int link_error(void);
+# define LO_OFF link_error()
+# define HI_OFF link_error()
+#endif
#ifdef CONFIG_DEBUG_TCG
static const char * const tcg_target_reg_names[TCG_TARGET_NB_REGS] = {
@@ -74,6 +82,8 @@ static const char * const tcg_target_reg_names[TCG_TARGET_NB_REGS] = {
#define TCG_TMP0 TCG_REG_AT
#define TCG_TMP1 TCG_REG_T9
+#define TCG_TMP2 TCG_REG_T8
+#define TCG_TMP3 TCG_REG_T7
/* check if we really need so many registers :P */
static const int tcg_target_reg_alloc_order[] = {
@@ -89,10 +99,6 @@ static const int tcg_target_reg_alloc_order[] = {
TCG_REG_S8,
/* Call clobbered registers. */
- TCG_REG_T0,
- TCG_REG_T1,
- TCG_REG_T2,
- TCG_REG_T3,
TCG_REG_T4,
TCG_REG_T5,
TCG_REG_T6,
@@ -103,17 +109,27 @@ static const int tcg_target_reg_alloc_order[] = {
TCG_REG_V0,
/* Argument registers, opposite order of allocation. */
+ TCG_REG_T3,
+ TCG_REG_T2,
+ TCG_REG_T1,
+ TCG_REG_T0,
TCG_REG_A3,
TCG_REG_A2,
TCG_REG_A1,
TCG_REG_A0,
};
-static const TCGReg tcg_target_call_iarg_regs[4] = {
+static const TCGReg tcg_target_call_iarg_regs[] = {
TCG_REG_A0,
TCG_REG_A1,
TCG_REG_A2,
- TCG_REG_A3
+ TCG_REG_A3,
+#if _MIPS_SIM == _ABIN32 || _MIPS_SIM == _ABI64
+ TCG_REG_T0,
+ TCG_REG_T1,
+ TCG_REG_T2,
+ TCG_REG_T3,
+#endif
};
static const TCGReg tcg_target_call_oarg_regs[2] = {
@@ -122,6 +138,9 @@ static const TCGReg tcg_target_call_oarg_regs[2] = {
};
static tcg_insn_unit *tb_ret_addr;
+static tcg_insn_unit *bswap32_addr;
+static tcg_insn_unit *bswap32u_addr;
+static tcg_insn_unit *bswap64_addr;
static inline uint32_t reloc_pc16_val(tcg_insn_unit *pc, tcg_insn_unit *target)
{
@@ -177,17 +196,12 @@ static int target_parse_constraint(TCGArgConstraint *ct, const char **pct_str)
ct->ct |= TCG_CT_REG;
tcg_regset_set(ct->u.regs, 0xffffffff);
break;
- case 'L': /* qemu_ld output arg constraint */
- ct->ct |= TCG_CT_REG;
- tcg_regset_set(ct->u.regs, 0xffffffff);
- tcg_regset_reset_reg(ct->u.regs, TCG_REG_V0);
- break;
- case 'l': /* qemu_ld input arg constraint */
+ case 'L': /* qemu_ld input arg constraint */
ct->ct |= TCG_CT_REG;
tcg_regset_set(ct->u.regs, 0xffffffff);
tcg_regset_reset_reg(ct->u.regs, TCG_REG_A0);
#if defined(CONFIG_SOFTMMU)
- if (TARGET_LONG_BITS == 64) {
+ if (TCG_TARGET_REG_BITS < TARGET_LONG_BITS) {
tcg_regset_reset_reg(ct->u.regs, TCG_REG_A2);
}
#endif
@@ -197,11 +211,11 @@ static int target_parse_constraint(TCGArgConstraint *ct, const char **pct_str)
tcg_regset_set(ct->u.regs, 0xffffffff);
tcg_regset_reset_reg(ct->u.regs, TCG_REG_A0);
#if defined(CONFIG_SOFTMMU)
- if (TARGET_LONG_BITS == 32) {
- tcg_regset_reset_reg(ct->u.regs, TCG_REG_A1);
- } else {
+ if (TCG_TARGET_REG_BITS < TARGET_LONG_BITS) {
tcg_regset_reset_reg(ct->u.regs, TCG_REG_A2);
tcg_regset_reset_reg(ct->u.regs, TCG_REG_A3);
+ } else {
+ tcg_regset_reset_reg(ct->u.regs, TCG_REG_A1);
}
#endif
break;
@@ -256,81 +270,118 @@ static inline int tcg_target_const_match(tcg_target_long val, TCGType type,
/* instruction opcodes */
typedef enum {
- OPC_J = 0x02 << 26,
- OPC_JAL = 0x03 << 26,
- OPC_BEQ = 0x04 << 26,
- OPC_BNE = 0x05 << 26,
- OPC_BLEZ = 0x06 << 26,
- OPC_BGTZ = 0x07 << 26,
- OPC_ADDIU = 0x09 << 26,
- OPC_SLTI = 0x0A << 26,
- OPC_SLTIU = 0x0B << 26,
- OPC_ANDI = 0x0C << 26,
- OPC_ORI = 0x0D << 26,
- OPC_XORI = 0x0E << 26,
- OPC_LUI = 0x0F << 26,
- OPC_LB = 0x20 << 26,
- OPC_LH = 0x21 << 26,
- OPC_LW = 0x23 << 26,
- OPC_LBU = 0x24 << 26,
- OPC_LHU = 0x25 << 26,
- OPC_LWU = 0x27 << 26,
- OPC_SB = 0x28 << 26,
- OPC_SH = 0x29 << 26,
- OPC_SW = 0x2B << 26,
-
- OPC_SPECIAL = 0x00 << 26,
- OPC_SLL = OPC_SPECIAL | 0x00,
- OPC_SRL = OPC_SPECIAL | 0x02,
- OPC_ROTR = OPC_SPECIAL | (0x01 << 21) | 0x02,
- OPC_SRA = OPC_SPECIAL | 0x03,
- OPC_SLLV = OPC_SPECIAL | 0x04,
- OPC_SRLV = OPC_SPECIAL | 0x06,
- OPC_ROTRV = OPC_SPECIAL | (0x01 << 6) | 0x06,
- OPC_SRAV = OPC_SPECIAL | 0x07,
- OPC_JR_R5 = OPC_SPECIAL | 0x08,
- OPC_JALR = OPC_SPECIAL | 0x09,
- OPC_MOVZ = OPC_SPECIAL | 0x0A,
- OPC_MOVN = OPC_SPECIAL | 0x0B,
- OPC_SYNC = OPC_SPECIAL | 0x0F,
- OPC_MFHI = OPC_SPECIAL | 0x10,
- OPC_MFLO = OPC_SPECIAL | 0x12,
- OPC_MULT = OPC_SPECIAL | 0x18,
- OPC_MUL_R6 = OPC_SPECIAL | (0x02 << 6) | 0x18,
- OPC_MUH = OPC_SPECIAL | (0x03 << 6) | 0x18,
- OPC_MULTU = OPC_SPECIAL | 0x19,
- OPC_MULU = OPC_SPECIAL | (0x02 << 6) | 0x19,
- OPC_MUHU = OPC_SPECIAL | (0x03 << 6) | 0x19,
- OPC_DIV = OPC_SPECIAL | 0x1A,
- OPC_DIV_R6 = OPC_SPECIAL | (0x02 << 6) | 0x1A,
- OPC_MOD = OPC_SPECIAL | (0x03 << 6) | 0x1A,
- OPC_DIVU = OPC_SPECIAL | 0x1B,
- OPC_DIVU_R6 = OPC_SPECIAL | (0x02 << 6) | 0x1B,
- OPC_MODU = OPC_SPECIAL | (0x03 << 6) | 0x1B,
- OPC_ADDU = OPC_SPECIAL | 0x21,
- OPC_SUBU = OPC_SPECIAL | 0x23,
- OPC_AND = OPC_SPECIAL | 0x24,
- OPC_OR = OPC_SPECIAL | 0x25,
- OPC_XOR = OPC_SPECIAL | 0x26,
- OPC_NOR = OPC_SPECIAL | 0x27,
- OPC_SLT = OPC_SPECIAL | 0x2A,
- OPC_SLTU = OPC_SPECIAL | 0x2B,
- OPC_SELEQZ = OPC_SPECIAL | 0x35,
- OPC_SELNEZ = OPC_SPECIAL | 0x37,
-
- OPC_REGIMM = 0x01 << 26,
- OPC_BLTZ = OPC_REGIMM | (0x00 << 16),
- OPC_BGEZ = OPC_REGIMM | (0x01 << 16),
-
- OPC_SPECIAL2 = 0x1c << 26,
- OPC_MUL_R5 = OPC_SPECIAL2 | 0x002,
-
- OPC_SPECIAL3 = 0x1f << 26,
- OPC_EXT = OPC_SPECIAL3 | 0x000,
- OPC_INS = OPC_SPECIAL3 | 0x004,
- OPC_WSBH = OPC_SPECIAL3 | 0x0a0,
- OPC_SEB = OPC_SPECIAL3 | 0x420,
- OPC_SEH = OPC_SPECIAL3 | 0x620,
+ OPC_J = 002 << 26,
+ OPC_JAL = 003 << 26,
+ OPC_BEQ = 004 << 26,
+ OPC_BNE = 005 << 26,
+ OPC_BLEZ = 006 << 26,
+ OPC_BGTZ = 007 << 26,
+ OPC_ADDIU = 011 << 26,
+ OPC_SLTI = 012 << 26,
+ OPC_SLTIU = 013 << 26,
+ OPC_ANDI = 014 << 26,
+ OPC_ORI = 015 << 26,
+ OPC_XORI = 016 << 26,
+ OPC_LUI = 017 << 26,
+ OPC_DADDIU = 031 << 26,
+ OPC_LB = 040 << 26,
+ OPC_LH = 041 << 26,
+ OPC_LW = 043 << 26,
+ OPC_LBU = 044 << 26,
+ OPC_LHU = 045 << 26,
+ OPC_LWU = 047 << 26,
+ OPC_SB = 050 << 26,
+ OPC_SH = 051 << 26,
+ OPC_SW = 053 << 26,
+ OPC_LD = 067 << 26,
+ OPC_SD = 077 << 26,
+
+ OPC_SPECIAL = 000 << 26,
+ OPC_SLL = OPC_SPECIAL | 000,
+ OPC_SRL = OPC_SPECIAL | 002,
+ OPC_ROTR = OPC_SPECIAL | 002 | (1 << 21),
+ OPC_SRA = OPC_SPECIAL | 003,
+ OPC_SLLV = OPC_SPECIAL | 004,
+ OPC_SRLV = OPC_SPECIAL | 006,
+ OPC_ROTRV = OPC_SPECIAL | 006 | 0100,
+ OPC_SRAV = OPC_SPECIAL | 007,
+ OPC_JR_R5 = OPC_SPECIAL | 010,
+ OPC_JALR = OPC_SPECIAL | 011,
+ OPC_MOVZ = OPC_SPECIAL | 012,
+ OPC_MOVN = OPC_SPECIAL | 013,
+ OPC_SYNC = OPC_SPECIAL | 017,
+ OPC_MFHI = OPC_SPECIAL | 020,
+ OPC_MFLO = OPC_SPECIAL | 022,
+ OPC_DSLLV = OPC_SPECIAL | 024,
+ OPC_DSRLV = OPC_SPECIAL | 026,
+ OPC_DROTRV = OPC_SPECIAL | 026 | 0100,
+ OPC_DSRAV = OPC_SPECIAL | 027,
+ OPC_MULT = OPC_SPECIAL | 030,
+ OPC_MUL_R6 = OPC_SPECIAL | 030 | 0200,
+ OPC_MUH = OPC_SPECIAL | 030 | 0300,
+ OPC_MULTU = OPC_SPECIAL | 031,
+ OPC_MULU = OPC_SPECIAL | 031 | 0200,
+ OPC_MUHU = OPC_SPECIAL | 031 | 0300,
+ OPC_DIV = OPC_SPECIAL | 032,
+ OPC_DIV_R6 = OPC_SPECIAL | 032 | 0200,
+ OPC_MOD = OPC_SPECIAL | 032 | 0300,
+ OPC_DIVU = OPC_SPECIAL | 033,
+ OPC_DIVU_R6 = OPC_SPECIAL | 033 | 0200,
+ OPC_MODU = OPC_SPECIAL | 033 | 0300,
+ OPC_DMULT = OPC_SPECIAL | 034,
+ OPC_DMUL = OPC_SPECIAL | 034 | 0200,
+ OPC_DMUH = OPC_SPECIAL | 034 | 0300,
+ OPC_DMULTU = OPC_SPECIAL | 035,
+ OPC_DMULU = OPC_SPECIAL | 035 | 0200,
+ OPC_DMUHU = OPC_SPECIAL | 035 | 0300,
+ OPC_DDIV = OPC_SPECIAL | 036,
+ OPC_DDIV_R6 = OPC_SPECIAL | 036 | 0200,
+ OPC_DMOD = OPC_SPECIAL | 036 | 0300,
+ OPC_DDIVU = OPC_SPECIAL | 037,
+ OPC_DDIVU_R6 = OPC_SPECIAL | 037 | 0200,
+ OPC_DMODU = OPC_SPECIAL | 037 | 0300,
+ OPC_ADDU = OPC_SPECIAL | 041,
+ OPC_SUBU = OPC_SPECIAL | 043,
+ OPC_AND = OPC_SPECIAL | 044,
+ OPC_OR = OPC_SPECIAL | 045,
+ OPC_XOR = OPC_SPECIAL | 046,
+ OPC_NOR = OPC_SPECIAL | 047,
+ OPC_SLT = OPC_SPECIAL | 052,
+ OPC_SLTU = OPC_SPECIAL | 053,
+ OPC_DADDU = OPC_SPECIAL | 055,
+ OPC_DSUBU = OPC_SPECIAL | 057,
+ OPC_SELEQZ = OPC_SPECIAL | 065,
+ OPC_SELNEZ = OPC_SPECIAL | 067,
+ OPC_DSLL = OPC_SPECIAL | 070,
+ OPC_DSRL = OPC_SPECIAL | 072,
+ OPC_DROTR = OPC_SPECIAL | 072 | (1 << 21),
+ OPC_DSRA = OPC_SPECIAL | 073,
+ OPC_DSLL32 = OPC_SPECIAL | 074,
+ OPC_DSRL32 = OPC_SPECIAL | 076,
+ OPC_DROTR32 = OPC_SPECIAL | 076 | (1 << 21),
+ OPC_DSRA32 = OPC_SPECIAL | 077,
+
+ OPC_REGIMM = 001 << 26,
+ OPC_BLTZ = OPC_REGIMM | (000 << 16),
+ OPC_BGEZ = OPC_REGIMM | (001 << 16),
+
+ OPC_SPECIAL2 = 034 << 26,
+ OPC_MUL_R5 = OPC_SPECIAL2 | 002,
+
+ OPC_SPECIAL3 = 037 << 26,
+ OPC_EXT = OPC_SPECIAL3 | 000,
+ OPC_DEXTM = OPC_SPECIAL3 | 001,
+ OPC_DEXTU = OPC_SPECIAL3 | 002,
+ OPC_DEXT = OPC_SPECIAL3 | 003,
+ OPC_INS = OPC_SPECIAL3 | 004,
+ OPC_DINSM = OPC_SPECIAL3 | 005,
+ OPC_DINSU = OPC_SPECIAL3 | 006,
+ OPC_DINS = OPC_SPECIAL3 | 007,
+ OPC_WSBH = OPC_SPECIAL3 | 00240,
+ OPC_DSBH = OPC_SPECIAL3 | 00244,
+ OPC_DSHD = OPC_SPECIAL3 | 00544,
+ OPC_SEB = OPC_SPECIAL3 | 02040,
+ OPC_SEH = OPC_SPECIAL3 | 03040,
/* MIPS r6 doesn't have JR, JALR should be used instead */
OPC_JR = use_mips32r6_instructions ? OPC_JALR : OPC_JR_R5,
@@ -348,6 +399,12 @@ typedef enum {
OPC_SYNC_ACQUIRE = OPC_SYNC | 0x11 << 5,
OPC_SYNC_RELEASE = OPC_SYNC | 0x12 << 5,
OPC_SYNC_RMB = OPC_SYNC | 0x13 << 5,
+
+ /* Aliases for convenience. */
+ ALIAS_PADD = sizeof(void *) == 4 ? OPC_ADDU : OPC_DADDU,
+ ALIAS_PADDI = sizeof(void *) == 4 ? OPC_ADDIU : OPC_DADDIU,
+ ALIAS_TSRL = TARGET_LONG_BITS == 32 || TCG_TARGET_REG_BITS == 32
+ ? OPC_SRL : OPC_DSRL,
} MIPSInsn;
/*
@@ -396,6 +453,21 @@ static inline void tcg_out_opc_bf(TCGContext *s, MIPSInsn opc, TCGReg rt,
tcg_out32(s, inst);
}
+static inline void tcg_out_opc_bf64(TCGContext *s, MIPSInsn opc, MIPSInsn opm,
+ MIPSInsn oph, TCGReg rt, TCGReg rs,
+ int msb, int lsb)
+{
+ if (lsb >= 32) {
+ opc = oph;
+ msb -= 32;
+ lsb -= 32;
+ } else if (msb >= 32) {
+ opc = opm;
+ msb -= 32;
+ }
+ tcg_out_opc_bf(s, opc, rt, rs, msb, lsb);
+}
+
/*
* Type branch
*/
@@ -426,6 +498,18 @@ static inline void tcg_out_opc_sa(TCGContext *s, MIPSInsn opc,
}
+static void tcg_out_opc_sa64(TCGContext *s, MIPSInsn opc1, MIPSInsn opc2,
+ TCGReg rd, TCGReg rt, TCGArg sa)
+{
+ int32_t inst;
+
+ inst = (sa & 32 ? opc2 : opc1);
+ inst |= (rt & 0x1F) << 16;
+ inst |= (rd & 0x1F) << 11;
+ inst |= (sa & 0x1F) << 6;
+ tcg_out32(s, inst);
+}
+
/*
* Type jump.
* Returns true if the branch was in range and the insn was emitted.
@@ -454,28 +538,59 @@ static inline void tcg_out_nop(TCGContext *s)
tcg_out32(s, 0);
}
+static inline void tcg_out_dsll(TCGContext *s, TCGReg rd, TCGReg rt, TCGArg sa)
+{
+ tcg_out_opc_sa64(s, OPC_DSLL, OPC_DSLL32, rd, rt, sa);
+}
+
+static inline void tcg_out_dsrl(TCGContext *s, TCGReg rd, TCGReg rt, TCGArg sa)
+{
+ tcg_out_opc_sa64(s, OPC_DSRL, OPC_DSRL32, rd, rt, sa);
+}
+
+static inline void tcg_out_dsra(TCGContext *s, TCGReg rd, TCGReg rt, TCGArg sa)
+{
+ tcg_out_opc_sa64(s, OPC_DSRA, OPC_DSRA32, rd, rt, sa);
+}
+
static inline void tcg_out_mov(TCGContext *s, TCGType type,
TCGReg ret, TCGReg arg)
{
/* Simple reg-reg move, optimising out the 'do nothing' case */
if (ret != arg) {
- tcg_out_opc_reg(s, OPC_ADDU, ret, arg, TCG_REG_ZERO);
+ tcg_out_opc_reg(s, OPC_OR, ret, arg, TCG_REG_ZERO);
}
}
-static inline void tcg_out_movi(TCGContext *s, TCGType type,
- TCGReg reg, tcg_target_long arg)
+static void tcg_out_movi(TCGContext *s, TCGType type,
+ TCGReg ret, tcg_target_long arg)
{
+ if (TCG_TARGET_REG_BITS == 64 && type == TCG_TYPE_I32) {
+ arg = (int32_t)arg;
+ }
if (arg == (int16_t)arg) {
- tcg_out_opc_imm(s, OPC_ADDIU, reg, TCG_REG_ZERO, arg);
- } else if (arg == (uint16_t)arg) {
- tcg_out_opc_imm(s, OPC_ORI, reg, TCG_REG_ZERO, arg);
+ tcg_out_opc_imm(s, OPC_ADDIU, ret, TCG_REG_ZERO, arg);
+ return;
+ }
+ if (arg == (uint16_t)arg) {
+ tcg_out_opc_imm(s, OPC_ORI, ret, TCG_REG_ZERO, arg);
+ return;
+ }
+ if (TCG_TARGET_REG_BITS == 32 || arg == (int32_t)arg) {
+ tcg_out_opc_imm(s, OPC_LUI, ret, TCG_REG_ZERO, arg >> 16);
} else {
- tcg_out_opc_imm(s, OPC_LUI, reg, TCG_REG_ZERO, arg >> 16);
- if (arg & 0xffff) {
- tcg_out_opc_imm(s, OPC_ORI, reg, reg, arg & 0xffff);
+ tcg_out_movi(s, TCG_TYPE_I32, ret, arg >> 31 >> 1);
+ if (arg & 0xffff0000ull) {
+ tcg_out_dsll(s, ret, ret, 16);
+ tcg_out_opc_imm(s, OPC_ORI, ret, ret, arg >> 16);
+ tcg_out_dsll(s, ret, ret, 16);
+ } else {
+ tcg_out_dsll(s, ret, ret, 32);
}
}
+ if (arg & 0xffff) {
+ tcg_out_opc_imm(s, OPC_ORI, ret, ret, arg & 0xffff);
+ }
}
static inline void tcg_out_bswap16(TCGContext *s, TCGReg ret, TCGReg arg)
@@ -513,29 +628,49 @@ static inline void tcg_out_bswap16s(TCGContext *s, TCGReg ret, TCGReg arg)
}
}
-static inline void tcg_out_bswap32(TCGContext *s, TCGReg ret, TCGReg arg)
+static void tcg_out_bswap_subr(TCGContext *s, tcg_insn_unit *sub)
+{
+ bool ok = tcg_out_opc_jmp(s, OPC_JAL, sub);
+ tcg_debug_assert(ok);
+}
+
+static void tcg_out_bswap32(TCGContext *s, TCGReg ret, TCGReg arg)
{
if (use_mips32r2_instructions) {
tcg_out_opc_reg(s, OPC_WSBH, ret, 0, arg);
tcg_out_opc_sa(s, OPC_ROTR, ret, ret, 16);
} else {
- /* ret and arg must be different and can't be register at */
- if (ret == arg || ret == TCG_TMP0 || arg == TCG_TMP0) {
- tcg_abort();
- }
-
- tcg_out_opc_sa(s, OPC_SLL, ret, arg, 24);
-
- tcg_out_opc_sa(s, OPC_SRL, TCG_TMP0, arg, 24);
- tcg_out_opc_reg(s, OPC_OR, ret, ret, TCG_TMP0);
+ tcg_out_bswap_subr(s, bswap32_addr);
+ /* delay slot -- never omit the insn, like tcg_out_mov might. */
+ tcg_out_opc_reg(s, OPC_OR, TCG_TMP0, arg, TCG_REG_ZERO);
+ tcg_out_mov(s, TCG_TYPE_I32, ret, TCG_TMP3);
+ }
+}
- tcg_out_opc_imm(s, OPC_ANDI, TCG_TMP0, arg, 0xff00);
- tcg_out_opc_sa(s, OPC_SLL, TCG_TMP0, TCG_TMP0, 8);
- tcg_out_opc_reg(s, OPC_OR, ret, ret, TCG_TMP0);
+static void tcg_out_bswap32u(TCGContext *s, TCGReg ret, TCGReg arg)
+{
+ if (use_mips32r2_instructions) {
+ tcg_out_opc_reg(s, OPC_DSBH, ret, 0, arg);
+ tcg_out_opc_reg(s, OPC_DSHD, ret, 0, ret);
+ tcg_out_dsrl(s, ret, ret, 32);
+ } else {
+ tcg_out_bswap_subr(s, bswap32u_addr);
+ /* delay slot -- never omit the insn, like tcg_out_mov might. */
+ tcg_out_opc_reg(s, OPC_OR, TCG_TMP0, arg, TCG_REG_ZERO);
+ tcg_out_mov(s, TCG_TYPE_I32, ret, TCG_TMP3);
+ }
+}
- tcg_out_opc_sa(s, OPC_SRL, TCG_TMP0, arg, 8);
- tcg_out_opc_imm(s, OPC_ANDI, TCG_TMP0, TCG_TMP0, 0xff00);
- tcg_out_opc_reg(s, OPC_OR, ret, ret, TCG_TMP0);
+static void tcg_out_bswap64(TCGContext *s, TCGReg ret, TCGReg arg)
+{
+ if (use_mips32r2_instructions) {
+ tcg_out_opc_reg(s, OPC_DSBH, ret, 0, arg);
+ tcg_out_opc_reg(s, OPC_DSHD, ret, 0, ret);
+ } else {
+ tcg_out_bswap_subr(s, bswap64_addr);
+ /* delay slot -- never omit the insn, like tcg_out_mov might. */
+ tcg_out_opc_reg(s, OPC_OR, TCG_TMP0, arg, TCG_REG_ZERO);
+ tcg_out_mov(s, TCG_TYPE_I32, ret, TCG_TMP3);
}
}
@@ -559,6 +694,16 @@ static inline void tcg_out_ext16s(TCGContext *s, TCGReg ret, TCGReg arg)
}
}
+static inline void tcg_out_ext32u(TCGContext *s, TCGReg ret, TCGReg arg)
+{
+ if (use_mips32r2_instructions) {
+ tcg_out_opc_bf(s, OPC_DEXT, ret, arg, 31, 0);
+ } else {
+ tcg_out_dsll(s, ret, arg, 32);
+ tcg_out_dsrl(s, ret, ret, 32);
+ }
+}
+
static void tcg_out_ldst(TCGContext *s, MIPSInsn opc, TCGReg data,
TCGReg addr, intptr_t ofs)
{
@@ -566,7 +711,7 @@ static void tcg_out_ldst(TCGContext *s, MIPSInsn opc, TCGReg data,
if (ofs != lo) {
tcg_out_movi(s, TCG_TYPE_PTR, TCG_TMP0, ofs - lo);
if (addr != TCG_REG_ZERO) {
- tcg_out_opc_reg(s, OPC_ADDU, TCG_TMP0, TCG_TMP0, addr);
+ tcg_out_opc_reg(s, ALIAS_PADD, TCG_TMP0, TCG_TMP0, addr);
}
addr = TCG_TMP0;
}
@@ -576,13 +721,21 @@ static void tcg_out_ldst(TCGContext *s, MIPSInsn opc, TCGReg data,
static inline void tcg_out_ld(TCGContext *s, TCGType type, TCGReg arg,
TCGReg arg1, intptr_t arg2)
{
- tcg_out_ldst(s, OPC_LW, arg, arg1, arg2);
+ MIPSInsn opc = OPC_LD;
+ if (TCG_TARGET_REG_BITS == 32 || type == TCG_TYPE_I32) {
+ opc = OPC_LW;
+ }
+ tcg_out_ldst(s, opc, arg, arg1, arg2);
}
static inline void tcg_out_st(TCGContext *s, TCGType type, TCGReg arg,
TCGReg arg1, intptr_t arg2)
{
- tcg_out_ldst(s, OPC_SW, arg, arg1, arg2);
+ MIPSInsn opc = OPC_SD;
+ if (TCG_TARGET_REG_BITS == 32 || type == TCG_TYPE_I32) {
+ opc = OPC_SW;
+ }
+ tcg_out_ldst(s, opc, arg, arg1, arg2);
}
static inline bool tcg_out_sti(TCGContext *s, TCGType type, TCGArg val,
@@ -595,16 +748,6 @@ static inline bool tcg_out_sti(TCGContext *s, TCGType type, TCGArg val,
return false;
}
-static inline void tcg_out_addi(TCGContext *s, TCGReg reg, TCGArg val)
-{
- if (val == (int16_t)val) {
- tcg_out_opc_imm(s, OPC_ADDIU, reg, reg, val);
- } else {
- tcg_out_movi(s, TCG_TYPE_PTR, TCG_TMP0, val);
- tcg_out_opc_reg(s, OPC_ADDU, reg, reg, TCG_TMP0);
- }
-}
-
static void tcg_out_addsub2(TCGContext *s, TCGReg rl, TCGReg rh, TCGReg al,
TCGReg ah, TCGArg bl, TCGArg bh, bool cbl,
bool cbh, bool is_sub)
@@ -969,6 +1112,10 @@ static void * const qemu_ld_helpers[16] = {
[MO_BESW] = helper_be_ldsw_mmu,
[MO_BEUL] = helper_be_ldul_mmu,
[MO_BEQ] = helper_be_ldq_mmu,
+#if TCG_TARGET_REG_BITS == 64
+ [MO_LESL] = helper_le_ldsl_mmu,
+ [MO_BESL] = helper_be_ldsl_mmu,
+#endif
};
static void * const qemu_st_helpers[16] = {
@@ -996,6 +1143,9 @@ static int tcg_out_call_iarg_reg(TCGContext *s, int i, TCGReg arg)
if (i < ARRAY_SIZE(tcg_target_call_iarg_regs)) {
tcg_out_mov(s, TCG_TYPE_REG, tcg_target_call_iarg_regs[i], arg);
} else {
+ /* For N32 and N64, the initial offset is different. But there
+ we also have 8 argument register so we don't run out here. */
+ tcg_debug_assert(TCG_TARGET_REG_BITS == 32);
tcg_out_st(s, TCG_TYPE_REG, arg, TCG_REG_SP, 4 * i);
}
return i + 1;
@@ -1037,6 +1187,7 @@ static int tcg_out_call_iarg_imm(TCGContext *s, int i, TCGArg arg)
static int tcg_out_call_iarg_reg2(TCGContext *s, int i, TCGReg al, TCGReg ah)
{
+ tcg_debug_assert(TCG_TARGET_REG_BITS == 32);
i = (i + 1) & ~1;
i = tcg_out_call_iarg_reg(s, i, (MIPS_BE ? ah : al));
i = tcg_out_call_iarg_reg(s, i, (MIPS_BE ? al : ah));
@@ -1044,7 +1195,7 @@ static int tcg_out_call_iarg_reg2(TCGContext *s, int i, TCGReg al, TCGReg ah)
}
/* Perform the tlb comparison operation. The complete host address is
- placed in BASE. Clobbers AT, T0, A0. */
+ placed in BASE. Clobbers TMP0, TMP1, TMP2, A0. */
static void tcg_out_tlb_load(TCGContext *s, TCGReg base, TCGReg addrl,
TCGReg addrh, TCGMemOpIdx oi,
tcg_insn_unit *label_ptr[2], bool is_load)
@@ -1052,6 +1203,7 @@ static void tcg_out_tlb_load(TCGContext *s, TCGReg base, TCGReg addrl,
TCGMemOp opc = get_memop(oi);
unsigned s_bits = opc & MO_SIZE;
unsigned a_bits = get_alignment_bits(opc);
+ target_ulong mask;
int mem_index = get_mmuidx(oi);
int cmp_off
= (is_load
@@ -1059,11 +1211,11 @@ static void tcg_out_tlb_load(TCGContext *s, TCGReg base, TCGReg addrl,
: offsetof(CPUArchState, tlb_table[mem_index][0].addr_write));
int add_off = offsetof(CPUArchState, tlb_table[mem_index][0].addend);
- tcg_out_opc_sa(s, OPC_SRL, TCG_REG_A0, addrl,
+ tcg_out_opc_sa(s, ALIAS_TSRL, TCG_REG_A0, addrl,
TARGET_PAGE_BITS - CPU_TLB_ENTRY_BITS);
tcg_out_opc_imm(s, OPC_ANDI, TCG_REG_A0, TCG_REG_A0,
(CPU_TLB_SIZE - 1) << CPU_TLB_ENTRY_BITS);
- tcg_out_opc_reg(s, OPC_ADDU, TCG_REG_A0, TCG_REG_A0, TCG_AREG0);
+ tcg_out_opc_reg(s, ALIAS_PADD, TCG_REG_A0, TCG_REG_A0, TCG_AREG0);
/* Compensate for very large offsets. */
if (add_off >= 0x8000) {
@@ -1073,51 +1225,63 @@ static void tcg_out_tlb_load(TCGContext *s, TCGReg base, TCGReg addrl,
QEMU_BUILD_BUG_ON(offsetof(CPUArchState,
tlb_table[NB_MMU_MODES - 1][1])
> 0x7ff0 + 0x7fff);
- tcg_out_opc_imm(s, OPC_ADDIU, TCG_REG_A0, TCG_REG_A0, 0x7ff0);
+ tcg_out_opc_imm(s, ALIAS_PADDI, TCG_REG_A0, TCG_REG_A0, 0x7ff0);
cmp_off -= 0x7ff0;
add_off -= 0x7ff0;
}
- /* Load the (low half) tlb comparator. */
- tcg_out_opc_imm(s, OPC_LW, TCG_TMP0, TCG_REG_A0,
- cmp_off + (TARGET_LONG_BITS == 64 ? LO_OFF : 0));
-
/* We don't currently support unaligned accesses.
We could do so with mips32r6. */
if (a_bits < s_bits) {
a_bits = s_bits;
}
- /* Mask the page bits, keeping the alignment bits to compare against.
- In between on 32-bit targets, load the tlb addend for the fast path. */
- tcg_out_movi(s, TCG_TYPE_I32, TCG_TMP1,
- TARGET_PAGE_MASK | ((1 << a_bits) - 1));
- if (TARGET_LONG_BITS == 32) {
- tcg_out_opc_imm(s, OPC_LW, TCG_REG_A0, TCG_REG_A0, add_off);
+
+ mask = (target_ulong)TARGET_PAGE_MASK | ((1 << a_bits) - 1);
+
+ /* Load the (low half) tlb comparator. Mask the page bits, keeping the
+ alignment bits to compare against. */
+ if (TCG_TARGET_REG_BITS < TARGET_LONG_BITS) {
+ tcg_out_ld(s, TCG_TYPE_I32, TCG_TMP0, TCG_REG_A0, cmp_off + LO_OFF);
+ tcg_out_movi(s, TCG_TYPE_I32, TCG_TMP1, mask);
+ } else {
+ tcg_out_ldst(s,
+ (TARGET_LONG_BITS == 64 ? OPC_LD
+ : TCG_TARGET_REG_BITS == 64 ? OPC_LWU : OPC_LW),
+ TCG_TMP0, TCG_REG_A0, cmp_off);
+ tcg_out_movi(s, TCG_TYPE_TL, TCG_TMP1, mask);
+ /* No second compare is required here;
+ load the tlb addend for the fast path. */
+ tcg_out_ld(s, TCG_TYPE_PTR, TCG_TMP2, TCG_REG_A0, add_off);
}
tcg_out_opc_reg(s, OPC_AND, TCG_TMP1, TCG_TMP1, addrl);
+ /* Zero extend a 32-bit guest address for a 64-bit host. */
+ if (TCG_TARGET_REG_BITS > TARGET_LONG_BITS) {
+ tcg_out_ext32u(s, base, addrl);
+ addrl = base;
+ }
+
label_ptr[0] = s->code_ptr;
tcg_out_opc_br(s, OPC_BNE, TCG_TMP1, TCG_TMP0);
/* Load and test the high half tlb comparator. */
- if (TARGET_LONG_BITS == 64) {
+ if (TCG_TARGET_REG_BITS < TARGET_LONG_BITS) {
/* delay slot */
- tcg_out_opc_imm(s, OPC_LW, TCG_TMP0, TCG_REG_A0, cmp_off + HI_OFF);
+ tcg_out_ld(s, TCG_TYPE_I32, TCG_TMP0, TCG_REG_A0, cmp_off + HI_OFF);
- /* Load the tlb addend for the fast path. We can't do it earlier with
- 64-bit targets or we'll clobber a0 before reading the high half tlb
- comparator. */
- tcg_out_opc_imm(s, OPC_LW, TCG_REG_A0, TCG_REG_A0, add_off);
+ /* Load the tlb addend for the fast path. */
+ tcg_out_ld(s, TCG_TYPE_PTR, TCG_TMP2, TCG_REG_A0, add_off);
label_ptr[1] = s->code_ptr;
tcg_out_opc_br(s, OPC_BNE, addrh, TCG_TMP0);
}
/* delay slot */
- tcg_out_opc_reg(s, OPC_ADDU, base, TCG_REG_A0, addrl);
+ tcg_out_opc_reg(s, ALIAS_PADD, base, TCG_TMP2, addrl);
}
static void add_qemu_ldst_label(TCGContext *s, int is_ld, TCGMemOpIdx oi,
+ TCGType ext,
TCGReg datalo, TCGReg datahi,
TCGReg addrlo, TCGReg addrhi,
void *raddr, tcg_insn_unit *label_ptr[2])
@@ -1126,13 +1290,14 @@ static void add_qemu_ldst_label(TCGContext *s, int is_ld, TCGMemOpIdx oi,
label->is_ld = is_ld;
label->oi = oi;
+ label->type = ext;
label->datalo_reg = datalo;
label->datahi_reg = datahi;
label->addrlo_reg = addrlo;
label->addrhi_reg = addrhi;
label->raddr = raddr;
label->label_ptr[0] = label_ptr[0];
- if (TARGET_LONG_BITS == 64) {
+ if (TCG_TARGET_REG_BITS < TARGET_LONG_BITS) {
label->label_ptr[1] = label_ptr[1];
}
}
@@ -1146,12 +1311,12 @@ static void tcg_out_qemu_ld_slow_path(TCGContext *s, TCGLabelQemuLdst *l)
/* resolve label address */
reloc_pc16(l->label_ptr[0], s->code_ptr);
- if (TARGET_LONG_BITS == 64) {
+ if (TCG_TARGET_REG_BITS < TARGET_LONG_BITS) {
reloc_pc16(l->label_ptr[1], s->code_ptr);
}
i = 1;
- if (TARGET_LONG_BITS == 64) {
+ if (TCG_TARGET_REG_BITS < TARGET_LONG_BITS) {
i = tcg_out_call_iarg_reg2(s, i, l->addrlo_reg, l->addrhi_reg);
} else {
i = tcg_out_call_iarg_reg(s, i, l->addrlo_reg);
@@ -1163,7 +1328,7 @@ static void tcg_out_qemu_ld_slow_path(TCGContext *s, TCGLabelQemuLdst *l)
tcg_out_mov(s, TCG_TYPE_PTR, tcg_target_call_iarg_regs[0], TCG_AREG0);
v0 = l->datalo_reg;
- if ((opc & MO_SIZE) == MO_64) {
+ if (TCG_TARGET_REG_BITS == 32 && (opc & MO_SIZE) == MO_64) {
/* We eliminated V0 from the possible output registers, so it
cannot be clobbered here. So we must move V1 first. */
if (MIPS_BE) {
@@ -1177,7 +1342,12 @@ static void tcg_out_qemu_ld_slow_path(TCGContext *s, TCGLabelQemuLdst *l)
reloc_pc16(s->code_ptr, l->raddr);
tcg_out_opc_br(s, OPC_BEQ, TCG_REG_ZERO, TCG_REG_ZERO);
/* delay slot */
- tcg_out_mov(s, TCG_TYPE_REG, v0, TCG_REG_V0);
+ if (TCG_TARGET_REG_BITS == 64 && l->type == TCG_TYPE_I32) {
+ /* we always sign-extend 32-bit loads */
+ tcg_out_opc_sa(s, OPC_SLL, v0, TCG_REG_V0, 0);
+ } else {
+ tcg_out_opc_reg(s, OPC_OR, v0, TCG_REG_V0, TCG_REG_ZERO);
+ }
}
static void tcg_out_qemu_st_slow_path(TCGContext *s, TCGLabelQemuLdst *l)
@@ -1189,12 +1359,12 @@ static void tcg_out_qemu_st_slow_path(TCGContext *s, TCGLabelQemuLdst *l)
/* resolve label address */
reloc_pc16(l->label_ptr[0], s->code_ptr);
- if (TARGET_LONG_BITS == 64) {
+ if (TCG_TARGET_REG_BITS < TARGET_LONG_BITS) {
reloc_pc16(l->label_ptr[1], s->code_ptr);
}
i = 1;
- if (TARGET_LONG_BITS == 64) {
+ if (TCG_TARGET_REG_BITS < TARGET_LONG_BITS) {
i = tcg_out_call_iarg_reg2(s, i, l->addrlo_reg, l->addrhi_reg);
} else {
i = tcg_out_call_iarg_reg(s, i, l->addrlo_reg);
@@ -1210,7 +1380,11 @@ static void tcg_out_qemu_st_slow_path(TCGContext *s, TCGLabelQemuLdst *l)
i = tcg_out_call_iarg_reg(s, i, l->datalo_reg);
break;
case MO_64:
- i = tcg_out_call_iarg_reg2(s, i, l->datalo_reg, l->datahi_reg);
+ if (TCG_TARGET_REG_BITS == 32) {
+ i = tcg_out_call_iarg_reg2(s, i, l->datalo_reg, l->datahi_reg);
+ } else {
+ i = tcg_out_call_iarg_reg(s, i, l->datalo_reg);
+ }
break;
default:
tcg_abort();
@@ -1227,46 +1401,104 @@ static void tcg_out_qemu_st_slow_path(TCGContext *s, TCGLabelQemuLdst *l)
}
#endif
-static void tcg_out_qemu_ld_direct(TCGContext *s, TCGReg datalo, TCGReg datahi,
- TCGReg base, TCGMemOp opc)
+static void tcg_out_qemu_ld_direct(TCGContext *s, TCGReg lo, TCGReg hi,
+ TCGReg base, TCGMemOp opc, bool is_64)
{
switch (opc & (MO_SSIZE | MO_BSWAP)) {
case MO_UB:
- tcg_out_opc_imm(s, OPC_LBU, datalo, base, 0);
+ tcg_out_opc_imm(s, OPC_LBU, lo, base, 0);
break;
case MO_SB:
- tcg_out_opc_imm(s, OPC_LB, datalo, base, 0);
+ tcg_out_opc_imm(s, OPC_LB, lo, base, 0);
break;
case MO_UW | MO_BSWAP:
tcg_out_opc_imm(s, OPC_LHU, TCG_TMP1, base, 0);
- tcg_out_bswap16(s, datalo, TCG_TMP1);
+ tcg_out_bswap16(s, lo, TCG_TMP1);
break;
case MO_UW:
- tcg_out_opc_imm(s, OPC_LHU, datalo, base, 0);
+ tcg_out_opc_imm(s, OPC_LHU, lo, base, 0);
break;
case MO_SW | MO_BSWAP:
tcg_out_opc_imm(s, OPC_LHU, TCG_TMP1, base, 0);
- tcg_out_bswap16s(s, datalo, TCG_TMP1);
+ tcg_out_bswap16s(s, lo, TCG_TMP1);
break;
case MO_SW:
- tcg_out_opc_imm(s, OPC_LH, datalo, base, 0);
+ tcg_out_opc_imm(s, OPC_LH, lo, base, 0);
break;
case MO_UL | MO_BSWAP:
- tcg_out_opc_imm(s, OPC_LW, TCG_TMP1, base, 0);
- tcg_out_bswap32(s, datalo, TCG_TMP1);
+ if (TCG_TARGET_REG_BITS == 64 && is_64) {
+ if (use_mips32r2_instructions) {
+ tcg_out_opc_imm(s, OPC_LWU, lo, base, 0);
+ tcg_out_bswap32u(s, lo, lo);
+ } else {
+ tcg_out_bswap_subr(s, bswap32u_addr);
+ /* delay slot */
+ tcg_out_opc_imm(s, OPC_LWU, TCG_TMP0, base, 0);
+ tcg_out_mov(s, TCG_TYPE_I64, lo, TCG_TMP3);
+ }
+ break;
+ }
+ /* FALLTHRU */
+ case MO_SL | MO_BSWAP:
+ if (use_mips32r2_instructions) {
+ tcg_out_opc_imm(s, OPC_LW, lo, base, 0);
+ tcg_out_bswap32(s, lo, lo);
+ } else {
+ tcg_out_bswap_subr(s, bswap32_addr);
+ /* delay slot */
+ tcg_out_opc_imm(s, OPC_LW, TCG_TMP0, base, 0);
+ tcg_out_mov(s, TCG_TYPE_I32, lo, TCG_TMP3);
+ }
break;
case MO_UL:
- tcg_out_opc_imm(s, OPC_LW, datalo, base, 0);
+ if (TCG_TARGET_REG_BITS == 64 && is_64) {
+ tcg_out_opc_imm(s, OPC_LWU, lo, base, 0);
+ break;
+ }
+ /* FALLTHRU */
+ case MO_SL:
+ tcg_out_opc_imm(s, OPC_LW, lo, base, 0);
break;
case MO_Q | MO_BSWAP:
- tcg_out_opc_imm(s, OPC_LW, TCG_TMP1, base, HI_OFF);
- tcg_out_bswap32(s, datalo, TCG_TMP1);
- tcg_out_opc_imm(s, OPC_LW, TCG_TMP1, base, LO_OFF);
- tcg_out_bswap32(s, datahi, TCG_TMP1);
+ if (TCG_TARGET_REG_BITS == 64) {
+ if (use_mips32r2_instructions) {
+ tcg_out_opc_imm(s, OPC_LD, lo, base, 0);
+ tcg_out_bswap64(s, lo, lo);
+ } else {
+ tcg_out_bswap_subr(s, bswap64_addr);
+ /* delay slot */
+ tcg_out_opc_imm(s, OPC_LD, TCG_TMP0, base, 0);
+ tcg_out_mov(s, TCG_TYPE_I64, lo, TCG_TMP3);
+ }
+ } else if (use_mips32r2_instructions) {
+ tcg_out_opc_imm(s, OPC_LW, TCG_TMP0, base, 0);
+ tcg_out_opc_imm(s, OPC_LW, TCG_TMP1, base, 4);
+ tcg_out_opc_reg(s, OPC_WSBH, TCG_TMP0, 0, TCG_TMP0);
+ tcg_out_opc_reg(s, OPC_WSBH, TCG_TMP1, 0, TCG_TMP1);
+ tcg_out_opc_sa(s, OPC_ROTR, MIPS_BE ? lo : hi, TCG_TMP0, 16);
+ tcg_out_opc_sa(s, OPC_ROTR, MIPS_BE ? hi : lo, TCG_TMP1, 16);
+ } else {
+ tcg_out_bswap_subr(s, bswap32_addr);
+ /* delay slot */
+ tcg_out_opc_imm(s, OPC_LW, TCG_TMP0, base, 0);
+ tcg_out_opc_imm(s, OPC_LW, TCG_TMP0, base, 4);
+ tcg_out_bswap_subr(s, bswap32_addr);
+ /* delay slot */
+ tcg_out_mov(s, TCG_TYPE_I32, MIPS_BE ? lo : hi, TCG_TMP3);
+ tcg_out_mov(s, TCG_TYPE_I32, MIPS_BE ? hi : lo, TCG_TMP3);
+ }
break;
case MO_Q:
- tcg_out_opc_imm(s, OPC_LW, datalo, base, LO_OFF);
- tcg_out_opc_imm(s, OPC_LW, datahi, base, HI_OFF);
+ /* Prefer to load from offset 0 first, but allow for overlap. */
+ if (TCG_TARGET_REG_BITS == 64) {
+ tcg_out_opc_imm(s, OPC_LD, lo, base, 0);
+ } else if (MIPS_BE ? hi != base : lo == base) {
+ tcg_out_opc_imm(s, OPC_LW, hi, base, HI_OFF);
+ tcg_out_opc_imm(s, OPC_LW, lo, base, LO_OFF);
+ } else {
+ tcg_out_opc_imm(s, OPC_LW, lo, base, LO_OFF);
+ tcg_out_opc_imm(s, OPC_LW, hi, base, HI_OFF);
+ }
break;
default:
tcg_abort();
@@ -1282,69 +1514,94 @@ static void tcg_out_qemu_ld(TCGContext *s, const TCGArg *args, bool is_64)
#if defined(CONFIG_SOFTMMU)
tcg_insn_unit *label_ptr[2];
#endif
- /* Note that we've eliminated V0 from the output registers,
- so we won't overwrite the base register during loading. */
- TCGReg base = TCG_REG_V0;
+ TCGReg base = TCG_REG_A0;
data_regl = *args++;
- data_regh = (is_64 ? *args++ : 0);
+ data_regh = (TCG_TARGET_REG_BITS == 32 && is_64 ? *args++ : 0);
addr_regl = *args++;
- addr_regh = (TARGET_LONG_BITS == 64 ? *args++ : 0);
+ addr_regh = (TCG_TARGET_REG_BITS < TARGET_LONG_BITS ? *args++ : 0);
oi = *args++;
opc = get_memop(oi);
#if defined(CONFIG_SOFTMMU)
tcg_out_tlb_load(s, base, addr_regl, addr_regh, oi, label_ptr, 1);
- tcg_out_qemu_ld_direct(s, data_regl, data_regh, base, opc);
- add_qemu_ldst_label(s, 1, oi, data_regl, data_regh, addr_regl, addr_regh,
+ tcg_out_qemu_ld_direct(s, data_regl, data_regh, base, opc, is_64);
+ add_qemu_ldst_label(s, 1, oi,
+ (is_64 ? TCG_TYPE_I64 : TCG_TYPE_I32),
+ data_regl, data_regh, addr_regl, addr_regh,
s->code_ptr, label_ptr);
#else
+ if (TCG_TARGET_REG_BITS > TARGET_LONG_BITS) {
+ tcg_out_ext32u(s, base, addr_regl);
+ addr_regl = base;
+ }
if (guest_base == 0 && data_regl != addr_regl) {
base = addr_regl;
} else if (guest_base == (int16_t)guest_base) {
- tcg_out_opc_imm(s, OPC_ADDIU, base, addr_regl, guest_base);
+ tcg_out_opc_imm(s, ALIAS_PADDI, base, addr_regl, guest_base);
} else {
tcg_out_movi(s, TCG_TYPE_PTR, base, guest_base);
- tcg_out_opc_reg(s, OPC_ADDU, base, base, addr_regl);
+ tcg_out_opc_reg(s, ALIAS_PADD, base, base, addr_regl);
}
- tcg_out_qemu_ld_direct(s, data_regl, data_regh, base, opc);
+ tcg_out_qemu_ld_direct(s, data_regl, data_regh, base, opc, is_64);
#endif
}
-static void tcg_out_qemu_st_direct(TCGContext *s, TCGReg datalo, TCGReg datahi,
+static void tcg_out_qemu_st_direct(TCGContext *s, TCGReg lo, TCGReg hi,
TCGReg base, TCGMemOp opc)
{
+ /* Don't clutter the code below with checks to avoid bswapping ZERO. */
+ if ((lo | hi) == 0) {
+ opc &= ~MO_BSWAP;
+ }
+
switch (opc & (MO_SIZE | MO_BSWAP)) {
case MO_8:
- tcg_out_opc_imm(s, OPC_SB, datalo, base, 0);
+ tcg_out_opc_imm(s, OPC_SB, lo, base, 0);
break;
case MO_16 | MO_BSWAP:
- tcg_out_opc_imm(s, OPC_ANDI, TCG_TMP1, datalo, 0xffff);
+ tcg_out_opc_imm(s, OPC_ANDI, TCG_TMP1, lo, 0xffff);
tcg_out_bswap16(s, TCG_TMP1, TCG_TMP1);
- datalo = TCG_TMP1;
+ lo = TCG_TMP1;
/* FALLTHRU */
case MO_16:
- tcg_out_opc_imm(s, OPC_SH, datalo, base, 0);
+ tcg_out_opc_imm(s, OPC_SH, lo, base, 0);
break;
case MO_32 | MO_BSWAP:
- tcg_out_bswap32(s, TCG_TMP1, datalo);
- datalo = TCG_TMP1;
+ tcg_out_bswap32(s, TCG_TMP3, lo);
+ lo = TCG_TMP3;
/* FALLTHRU */
case MO_32:
- tcg_out_opc_imm(s, OPC_SW, datalo, base, 0);
+ tcg_out_opc_imm(s, OPC_SW, lo, base, 0);
break;
case MO_64 | MO_BSWAP:
- tcg_out_bswap32(s, TCG_TMP1, datalo);
- tcg_out_opc_imm(s, OPC_SW, TCG_TMP1, base, HI_OFF);
- tcg_out_bswap32(s, TCG_TMP1, datahi);
- tcg_out_opc_imm(s, OPC_SW, TCG_TMP1, base, LO_OFF);
+ if (TCG_TARGET_REG_BITS == 64) {
+ tcg_out_bswap64(s, TCG_TMP3, lo);
+ tcg_out_opc_imm(s, OPC_SD, TCG_TMP3, base, 0);
+ } else if (use_mips32r2_instructions) {
+ tcg_out_opc_reg(s, OPC_WSBH, TCG_TMP0, 0, MIPS_BE ? lo : hi);
+ tcg_out_opc_reg(s, OPC_WSBH, TCG_TMP1, 0, MIPS_BE ? hi : lo);
+ tcg_out_opc_sa(s, OPC_ROTR, TCG_TMP0, TCG_TMP0, 16);
+ tcg_out_opc_sa(s, OPC_ROTR, TCG_TMP1, TCG_TMP1, 16);
+ tcg_out_opc_imm(s, OPC_SW, TCG_TMP0, base, 0);
+ tcg_out_opc_imm(s, OPC_SW, TCG_TMP1, base, 4);
+ } else {
+ tcg_out_bswap32(s, TCG_TMP3, MIPS_BE ? lo : hi);
+ tcg_out_opc_imm(s, OPC_SW, TCG_TMP3, base, 0);
+ tcg_out_bswap32(s, TCG_TMP3, MIPS_BE ? hi : lo);
+ tcg_out_opc_imm(s, OPC_SW, TCG_TMP3, base, 4);
+ }
break;
case MO_64:
- tcg_out_opc_imm(s, OPC_SW, datalo, base, LO_OFF);
- tcg_out_opc_imm(s, OPC_SW, datahi, base, HI_OFF);
+ if (TCG_TARGET_REG_BITS == 64) {
+ tcg_out_opc_imm(s, OPC_SD, lo, base, 0);
+ } else {
+ tcg_out_opc_imm(s, OPC_SW, MIPS_BE ? hi : lo, base, 0);
+ tcg_out_opc_imm(s, OPC_SW, MIPS_BE ? lo : hi, base, 4);
+ }
break;
default:
@@ -1355,39 +1612,41 @@ static void tcg_out_qemu_st_direct(TCGContext *s, TCGReg datalo, TCGReg datahi,
static void tcg_out_qemu_st(TCGContext *s, const TCGArg *args, bool is_64)
{
TCGReg addr_regl, addr_regh __attribute__((unused));
- TCGReg data_regl, data_regh, base;
+ TCGReg data_regl, data_regh;
TCGMemOpIdx oi;
TCGMemOp opc;
#if defined(CONFIG_SOFTMMU)
tcg_insn_unit *label_ptr[2];
#endif
+ TCGReg base = TCG_REG_A0;
data_regl = *args++;
- data_regh = (is_64 ? *args++ : 0);
+ data_regh = (TCG_TARGET_REG_BITS == 32 && is_64 ? *args++ : 0);
addr_regl = *args++;
- addr_regh = (TARGET_LONG_BITS == 64 ? *args++ : 0);
+ addr_regh = (TCG_TARGET_REG_BITS < TARGET_LONG_BITS ? *args++ : 0);
oi = *args++;
opc = get_memop(oi);
#if defined(CONFIG_SOFTMMU)
- /* Note that we eliminated the helper's address argument,
- so we can reuse that for the base. */
- base = (TARGET_LONG_BITS == 32 ? TCG_REG_A1 : TCG_REG_A2);
tcg_out_tlb_load(s, base, addr_regl, addr_regh, oi, label_ptr, 0);
tcg_out_qemu_st_direct(s, data_regl, data_regh, base, opc);
- add_qemu_ldst_label(s, 0, oi, data_regl, data_regh, addr_regl, addr_regh,
+ add_qemu_ldst_label(s, 0, oi,
+ (is_64 ? TCG_TYPE_I64 : TCG_TYPE_I32),
+ data_regl, data_regh, addr_regl, addr_regh,
s->code_ptr, label_ptr);
#else
+ base = TCG_REG_A0;
+ if (TCG_TARGET_REG_BITS > TARGET_LONG_BITS) {
+ tcg_out_ext32u(s, base, addr_regl);
+ addr_regl = base;
+ }
if (guest_base == 0) {
base = addr_regl;
+ } else if (guest_base == (int16_t)guest_base) {
+ tcg_out_opc_imm(s, ALIAS_PADDI, base, addr_regl, guest_base);
} else {
- base = TCG_REG_A0;
- if (guest_base == (int16_t)guest_base) {
- tcg_out_opc_imm(s, OPC_ADDIU, base, addr_regl, guest_base);
- } else {
- tcg_out_movi(s, TCG_TYPE_PTR, base, guest_base);
- tcg_out_opc_reg(s, OPC_ADDU, base, base, addr_regl);
- }
+ tcg_out_movi(s, TCG_TYPE_PTR, base, guest_base);
+ tcg_out_opc_reg(s, ALIAS_PADD, base, base, addr_regl);
}
tcg_out_qemu_st_direct(s, data_regl, data_regh, base, opc);
#endif
@@ -1426,6 +1685,7 @@ static inline void tcg_out_op(TCGContext *s, TCGOpcode opc,
{
TCGReg b0 = TCG_REG_ZERO;
+ a0 = (intptr_t)a0;
if (a0 & ~0xffff) {
tcg_out_movi(s, TCG_TYPE_PTR, TCG_REG_V0, a0 & ~0xffff);
b0 = TCG_REG_V0;
@@ -1459,28 +1719,45 @@ static inline void tcg_out_op(TCGContext *s, TCGOpcode opc,
break;
case INDEX_op_ld8u_i32:
+ case INDEX_op_ld8u_i64:
i1 = OPC_LBU;
goto do_ldst;
case INDEX_op_ld8s_i32:
+ case INDEX_op_ld8s_i64:
i1 = OPC_LB;
goto do_ldst;
case INDEX_op_ld16u_i32:
+ case INDEX_op_ld16u_i64:
i1 = OPC_LHU;
goto do_ldst;
case INDEX_op_ld16s_i32:
+ case INDEX_op_ld16s_i64:
i1 = OPC_LH;
goto do_ldst;
case INDEX_op_ld_i32:
+ case INDEX_op_ld32s_i64:
i1 = OPC_LW;
goto do_ldst;
+ case INDEX_op_ld32u_i64:
+ i1 = OPC_LWU;
+ goto do_ldst;
+ case INDEX_op_ld_i64:
+ i1 = OPC_LD;
+ goto do_ldst;
case INDEX_op_st8_i32:
+ case INDEX_op_st8_i64:
i1 = OPC_SB;
goto do_ldst;
case INDEX_op_st16_i32:
+ case INDEX_op_st16_i64:
i1 = OPC_SH;
goto do_ldst;
case INDEX_op_st_i32:
+ case INDEX_op_st32_i64:
i1 = OPC_SW;
+ goto do_ldst;
+ case INDEX_op_st_i64:
+ i1 = OPC_SD;
do_ldst:
tcg_out_ldst(s, i1, a0, a1, a2);
break;
@@ -1488,10 +1765,15 @@ static inline void tcg_out_op(TCGContext *s, TCGOpcode opc,
case INDEX_op_add_i32:
i1 = OPC_ADDU, i2 = OPC_ADDIU;
goto do_binary;
+ case INDEX_op_add_i64:
+ i1 = OPC_DADDU, i2 = OPC_DADDIU;
+ goto do_binary;
case INDEX_op_or_i32:
+ case INDEX_op_or_i64:
i1 = OPC_OR, i2 = OPC_ORI;
goto do_binary;
case INDEX_op_xor_i32:
+ case INDEX_op_xor_i64:
i1 = OPC_XOR, i2 = OPC_XORI;
do_binary:
if (c2) {
@@ -1503,12 +1785,16 @@ static inline void tcg_out_op(TCGContext *s, TCGOpcode opc,
break;
case INDEX_op_sub_i32:
+ i1 = OPC_SUBU, i2 = OPC_ADDIU;
+ goto do_subtract;
+ case INDEX_op_sub_i64:
+ i1 = OPC_DSUBU, i2 = OPC_DADDIU;
+ do_subtract:
if (c2) {
- tcg_out_opc_imm(s, OPC_ADDIU, a0, a1, -a2);
+ tcg_out_opc_imm(s, i2, a0, a1, -a2);
break;
}
- i1 = OPC_SUBU;
- goto do_binary;
+ goto do_binaryv;
case INDEX_op_and_i32:
if (c2 && a2 != (uint16_t)a2) {
int msb = ctz32(~a2) - 1;
@@ -1519,7 +1805,18 @@ static inline void tcg_out_op(TCGContext *s, TCGOpcode opc,
}
i1 = OPC_AND, i2 = OPC_ANDI;
goto do_binary;
+ case INDEX_op_and_i64:
+ if (c2 && a2 != (uint16_t)a2) {
+ int msb = ctz64(~a2) - 1;
+ tcg_debug_assert(use_mips32r2_instructions);
+ tcg_debug_assert(is_p2m1(a2));
+ tcg_out_opc_bf64(s, OPC_DEXT, OPC_DEXTM, OPC_DEXTU, a0, a1, msb, 0);
+ break;
+ }
+ i1 = OPC_AND, i2 = OPC_ANDI;
+ goto do_binary;
case INDEX_op_nor_i32:
+ case INDEX_op_nor_i64:
i1 = OPC_NOR;
goto do_binaryv;
@@ -1571,6 +1868,55 @@ static inline void tcg_out_op(TCGContext *s, TCGOpcode opc,
break;
}
i1 = OPC_DIVU, i2 = OPC_MFHI;
+ goto do_hilo1;
+ case INDEX_op_mul_i64:
+ if (use_mips32r6_instructions) {
+ tcg_out_opc_reg(s, OPC_DMUL, a0, a1, a2);
+ break;
+ }
+ i1 = OPC_DMULT, i2 = OPC_MFLO;
+ goto do_hilo1;
+ case INDEX_op_mulsh_i64:
+ if (use_mips32r6_instructions) {
+ tcg_out_opc_reg(s, OPC_DMUH, a0, a1, a2);
+ break;
+ }
+ i1 = OPC_DMULT, i2 = OPC_MFHI;
+ goto do_hilo1;
+ case INDEX_op_muluh_i64:
+ if (use_mips32r6_instructions) {
+ tcg_out_opc_reg(s, OPC_DMUHU, a0, a1, a2);
+ break;
+ }
+ i1 = OPC_DMULTU, i2 = OPC_MFHI;
+ goto do_hilo1;
+ case INDEX_op_div_i64:
+ if (use_mips32r6_instructions) {
+ tcg_out_opc_reg(s, OPC_DDIV_R6, a0, a1, a2);
+ break;
+ }
+ i1 = OPC_DDIV, i2 = OPC_MFLO;
+ goto do_hilo1;
+ case INDEX_op_divu_i64:
+ if (use_mips32r6_instructions) {
+ tcg_out_opc_reg(s, OPC_DDIVU_R6, a0, a1, a2);
+ break;
+ }
+ i1 = OPC_DDIVU, i2 = OPC_MFLO;
+ goto do_hilo1;
+ case INDEX_op_rem_i64:
+ if (use_mips32r6_instructions) {
+ tcg_out_opc_reg(s, OPC_DMOD, a0, a1, a2);
+ break;
+ }
+ i1 = OPC_DDIV, i2 = OPC_MFHI;
+ goto do_hilo1;
+ case INDEX_op_remu_i64:
+ if (use_mips32r6_instructions) {
+ tcg_out_opc_reg(s, OPC_DMODU, a0, a1, a2);
+ break;
+ }
+ i1 = OPC_DDIVU, i2 = OPC_MFHI;
do_hilo1:
tcg_out_opc_reg(s, i1, 0, a1, a2);
tcg_out_opc_reg(s, i2, a0, 0, 0);
@@ -1581,6 +1927,12 @@ static inline void tcg_out_op(TCGContext *s, TCGOpcode opc,
goto do_hilo2;
case INDEX_op_mulu2_i32:
i1 = OPC_MULTU;
+ goto do_hilo2;
+ case INDEX_op_muls2_i64:
+ i1 = OPC_DMULT;
+ goto do_hilo2;
+ case INDEX_op_mulu2_i64:
+ i1 = OPC_DMULTU;
do_hilo2:
tcg_out_opc_reg(s, i1, 0, a2, args[3]);
tcg_out_opc_reg(s, OPC_MFLO, a0, 0, 0);
@@ -1588,20 +1940,46 @@ static inline void tcg_out_op(TCGContext *s, TCGOpcode opc,
break;
case INDEX_op_not_i32:
+ case INDEX_op_not_i64:
i1 = OPC_NOR;
goto do_unary;
case INDEX_op_bswap16_i32:
+ case INDEX_op_bswap16_i64:
i1 = OPC_WSBH;
goto do_unary;
case INDEX_op_ext8s_i32:
+ case INDEX_op_ext8s_i64:
i1 = OPC_SEB;
goto do_unary;
case INDEX_op_ext16s_i32:
+ case INDEX_op_ext16s_i64:
i1 = OPC_SEH;
do_unary:
tcg_out_opc_reg(s, i1, a0, TCG_REG_ZERO, a1);
break;
+ case INDEX_op_bswap32_i32:
+ tcg_out_bswap32(s, a0, a1);
+ break;
+ case INDEX_op_bswap32_i64:
+ tcg_out_bswap32u(s, a0, a1);
+ break;
+ case INDEX_op_bswap64_i64:
+ tcg_out_bswap64(s, a0, a1);
+ break;
+ case INDEX_op_extrh_i64_i32:
+ tcg_out_dsra(s, a0, a1, 32);
+ break;
+ case INDEX_op_ext32s_i64:
+ case INDEX_op_ext_i32_i64:
+ case INDEX_op_extrl_i64_i32:
+ tcg_out_opc_sa(s, OPC_SLL, a0, a1, 0);
+ break;
+ case INDEX_op_ext32u_i64:
+ case INDEX_op_extu_i32_i64:
+ tcg_out_ext32u(s, a0, a1);
+ break;
+
case INDEX_op_sar_i32:
i1 = OPC_SRAV, i2 = OPC_SRA;
goto do_shift;
@@ -1616,9 +1994,10 @@ static inline void tcg_out_op(TCGContext *s, TCGOpcode opc,
do_shift:
if (c2) {
tcg_out_opc_sa(s, i2, a0, a1, a2);
- } else {
- tcg_out_opc_reg(s, i1, a0, a2, a1);
+ break;
}
+ do_shiftv:
+ tcg_out_opc_reg(s, i1, a0, a2, a1);
break;
case INDEX_op_rotl_i32:
if (c2) {
@@ -1628,17 +2007,53 @@ static inline void tcg_out_op(TCGContext *s, TCGOpcode opc,
tcg_out_opc_reg(s, OPC_ROTRV, a0, TCG_TMP0, a1);
}
break;
-
- case INDEX_op_bswap32_i32:
- tcg_out_opc_reg(s, OPC_WSBH, a0, 0, a1);
- tcg_out_opc_sa(s, OPC_ROTR, a0, a0, 16);
+ case INDEX_op_sar_i64:
+ if (c2) {
+ tcg_out_dsra(s, a0, a1, a2);
+ break;
+ }
+ i1 = OPC_DSRAV;
+ goto do_shiftv;
+ case INDEX_op_shl_i64:
+ if (c2) {
+ tcg_out_dsll(s, a0, a1, a2);
+ break;
+ }
+ i1 = OPC_DSLLV;
+ goto do_shiftv;
+ case INDEX_op_shr_i64:
+ if (c2) {
+ tcg_out_dsrl(s, a0, a1, a2);
+ break;
+ }
+ i1 = OPC_DSRLV;
+ goto do_shiftv;
+ case INDEX_op_rotr_i64:
+ if (c2) {
+ tcg_out_opc_sa64(s, OPC_DROTR, OPC_DROTR32, a0, a1, a2);
+ break;
+ }
+ i1 = OPC_DROTRV;
+ goto do_shiftv;
+ case INDEX_op_rotl_i64:
+ if (c2) {
+ tcg_out_opc_sa64(s, OPC_DROTR, OPC_DROTR32, a0, a1, 64 - a2);
+ } else {
+ tcg_out_opc_reg(s, OPC_DSUBU, TCG_TMP0, TCG_REG_ZERO, a2);
+ tcg_out_opc_reg(s, OPC_DROTRV, a0, TCG_TMP0, a1);
+ }
break;
case INDEX_op_deposit_i32:
tcg_out_opc_bf(s, OPC_INS, a0, a2, args[3] + args[4] - 1, args[3]);
break;
+ case INDEX_op_deposit_i64:
+ tcg_out_opc_bf64(s, OPC_DINS, OPC_DINSM, OPC_DINSU, a0, a2,
+ args[3] + args[4] - 1, args[3]);
+ break;
case INDEX_op_brcond_i32:
+ case INDEX_op_brcond_i64:
tcg_out_brcond(s, a2, a0, a1, arg_label(args[3]));
break;
case INDEX_op_brcond2_i32:
@@ -1646,10 +2061,12 @@ static inline void tcg_out_op(TCGContext *s, TCGOpcode opc,
break;
case INDEX_op_movcond_i32:
+ case INDEX_op_movcond_i64:
tcg_out_movcond(s, args[5], a0, a1, a2, args[3], args[4]);
break;
case INDEX_op_setcond_i32:
+ case INDEX_op_setcond_i64:
tcg_out_setcond(s, args[3], a0, a1, a2);
break;
case INDEX_op_setcond2_i32:
@@ -1682,7 +2099,9 @@ static inline void tcg_out_op(TCGContext *s, TCGOpcode opc,
tcg_out_mb(s, a0);
break;
case INDEX_op_mov_i32: /* Always emitted via tcg_out_mov. */
+ case INDEX_op_mov_i64:
case INDEX_op_movi_i32: /* Always emitted via tcg_out_movi. */
+ case INDEX_op_movi_i64:
case INDEX_op_call: /* Always emitted via tcg_out_call. */
default:
tcg_abort();
@@ -1744,21 +2163,89 @@ static const TCGTargetOpDef mips_op_defs[] = {
{ INDEX_op_movcond_i32, { "r", "rZ", "rZ", "rZ", "0" } },
#endif
{ INDEX_op_setcond_i32, { "r", "rZ", "rZ" } },
- { INDEX_op_setcond2_i32, { "r", "rZ", "rZ", "rZ", "rZ" } },
+#if TCG_TARGET_REG_BITS == 32
{ INDEX_op_add2_i32, { "r", "r", "rZ", "rZ", "rN", "rN" } },
{ INDEX_op_sub2_i32, { "r", "r", "rZ", "rZ", "rN", "rN" } },
+ { INDEX_op_setcond2_i32, { "r", "rZ", "rZ", "rZ", "rZ" } },
{ INDEX_op_brcond2_i32, { "rZ", "rZ", "rZ", "rZ" } },
+#endif
+
+#if TCG_TARGET_REG_BITS == 64
+ { INDEX_op_ld8u_i64, { "r", "r" } },
+ { INDEX_op_ld8s_i64, { "r", "r" } },
+ { INDEX_op_ld16u_i64, { "r", "r" } },
+ { INDEX_op_ld16s_i64, { "r", "r" } },
+ { INDEX_op_ld32s_i64, { "r", "r" } },
+ { INDEX_op_ld32u_i64, { "r", "r" } },
+ { INDEX_op_ld_i64, { "r", "r" } },
+ { INDEX_op_st8_i64, { "rZ", "r" } },
+ { INDEX_op_st16_i64, { "rZ", "r" } },
+ { INDEX_op_st32_i64, { "rZ", "r" } },
+ { INDEX_op_st_i64, { "rZ", "r" } },
+
+ { INDEX_op_add_i64, { "r", "rZ", "rJ" } },
+ { INDEX_op_mul_i64, { "r", "rZ", "rZ" } },
+#if !use_mips32r6_instructions
+ { INDEX_op_muls2_i64, { "r", "r", "rZ", "rZ" } },
+ { INDEX_op_mulu2_i64, { "r", "r", "rZ", "rZ" } },
+#endif
+ { INDEX_op_mulsh_i64, { "r", "rZ", "rZ" } },
+ { INDEX_op_muluh_i64, { "r", "rZ", "rZ" } },
+ { INDEX_op_div_i64, { "r", "rZ", "rZ" } },
+ { INDEX_op_divu_i64, { "r", "rZ", "rZ" } },
+ { INDEX_op_rem_i64, { "r", "rZ", "rZ" } },
+ { INDEX_op_remu_i64, { "r", "rZ", "rZ" } },
+ { INDEX_op_sub_i64, { "r", "rZ", "rN" } },
+
+ { INDEX_op_and_i64, { "r", "rZ", "rIK" } },
+ { INDEX_op_nor_i64, { "r", "rZ", "rZ" } },
+ { INDEX_op_not_i64, { "r", "rZ" } },
+ { INDEX_op_or_i64, { "r", "rZ", "rI" } },
+ { INDEX_op_xor_i64, { "r", "rZ", "rI" } },
+
+ { INDEX_op_shl_i64, { "r", "rZ", "ri" } },
+ { INDEX_op_shr_i64, { "r", "rZ", "ri" } },
+ { INDEX_op_sar_i64, { "r", "rZ", "ri" } },
+ { INDEX_op_rotr_i64, { "r", "rZ", "ri" } },
+ { INDEX_op_rotl_i64, { "r", "rZ", "ri" } },
+
+ { INDEX_op_bswap16_i64, { "r", "r" } },
+ { INDEX_op_bswap32_i64, { "r", "r" } },
+ { INDEX_op_bswap64_i64, { "r", "r" } },
+
+ { INDEX_op_ext8s_i64, { "r", "rZ" } },
+ { INDEX_op_ext16s_i64, { "r", "rZ" } },
+ { INDEX_op_ext32s_i64, { "r", "rZ" } },
+ { INDEX_op_ext32u_i64, { "r", "rZ" } },
+ { INDEX_op_ext_i32_i64, { "r", "rZ" } },
+ { INDEX_op_extu_i32_i64, { "r", "rZ" } },
+ { INDEX_op_extrl_i64_i32, { "r", "rZ" } },
+ { INDEX_op_extrh_i64_i32, { "r", "rZ" } },
+
+ { INDEX_op_deposit_i64, { "r", "0", "rZ" } },
+
+ { INDEX_op_brcond_i64, { "rZ", "rZ" } },
+#if use_mips32r6_instructions
+ { INDEX_op_movcond_i64, { "r", "rZ", "rZ", "rZ", "rZ" } },
+#else
+ { INDEX_op_movcond_i64, { "r", "rZ", "rZ", "rZ", "0" } },
+#endif
+ { INDEX_op_setcond_i64, { "r", "rZ", "rZ" } },
-#if TARGET_LONG_BITS == 32
- { INDEX_op_qemu_ld_i32, { "L", "lZ" } },
+ { INDEX_op_qemu_ld_i32, { "r", "LZ" } },
+ { INDEX_op_qemu_st_i32, { "SZ", "SZ" } },
+ { INDEX_op_qemu_ld_i64, { "r", "LZ" } },
+ { INDEX_op_qemu_st_i64, { "SZ", "SZ" } },
+#elif TARGET_LONG_BITS == 32
+ { INDEX_op_qemu_ld_i32, { "r", "LZ" } },
{ INDEX_op_qemu_st_i32, { "SZ", "SZ" } },
- { INDEX_op_qemu_ld_i64, { "L", "L", "lZ" } },
+ { INDEX_op_qemu_ld_i64, { "r", "r", "LZ" } },
{ INDEX_op_qemu_st_i64, { "SZ", "SZ", "SZ" } },
#else
- { INDEX_op_qemu_ld_i32, { "L", "lZ", "lZ" } },
+ { INDEX_op_qemu_ld_i32, { "r", "LZ", "LZ" } },
{ INDEX_op_qemu_st_i32, { "SZ", "SZ", "SZ" } },
- { INDEX_op_qemu_ld_i64, { "L", "L", "lZ", "lZ" } },
+ { INDEX_op_qemu_ld_i64, { "r", "r", "LZ", "LZ" } },
{ INDEX_op_qemu_st_i64, { "SZ", "SZ", "SZ", "SZ" } },
#endif
@@ -1858,47 +2345,186 @@ static void tcg_target_detect_isa(void)
sigaction(SIGILL, &sa_old, NULL);
}
+static tcg_insn_unit *align_code_ptr(TCGContext *s)
+{
+ uintptr_t p = (uintptr_t)s->code_ptr;
+ if (p & 15) {
+ p = (p + 15) & -16;
+ s->code_ptr = (void *)p;
+ }
+ return s->code_ptr;
+}
+
+/* Stack frame parameters. */
+#define REG_SIZE (TCG_TARGET_REG_BITS / 8)
+#define SAVE_SIZE ((int)ARRAY_SIZE(tcg_target_callee_save_regs) * REG_SIZE)
+#define TEMP_SIZE (CPU_TEMP_BUF_NLONGS * (int)sizeof(long))
+
+#define FRAME_SIZE ((TCG_STATIC_CALL_ARGS_SIZE + TEMP_SIZE + SAVE_SIZE \
+ + TCG_TARGET_STACK_ALIGN - 1) \
+ & -TCG_TARGET_STACK_ALIGN)
+#define SAVE_OFS (TCG_STATIC_CALL_ARGS_SIZE + TEMP_SIZE)
+
+/* We're expecting to be able to use an immediate for frame allocation. */
+QEMU_BUILD_BUG_ON(FRAME_SIZE > 0x7fff);
+
/* Generate global QEMU prologue and epilogue code */
static void tcg_target_qemu_prologue(TCGContext *s)
{
- int i, frame_size;
+ int i;
- /* reserve some stack space, also for TCG temps. */
- frame_size = ARRAY_SIZE(tcg_target_callee_save_regs) * 4
- + TCG_STATIC_CALL_ARGS_SIZE
- + CPU_TEMP_BUF_NLONGS * sizeof(long);
- frame_size = (frame_size + TCG_TARGET_STACK_ALIGN - 1) &
- ~(TCG_TARGET_STACK_ALIGN - 1);
- tcg_set_frame(s, TCG_REG_SP, ARRAY_SIZE(tcg_target_callee_save_regs) * 4
- + TCG_STATIC_CALL_ARGS_SIZE,
- CPU_TEMP_BUF_NLONGS * sizeof(long));
+ tcg_set_frame(s, TCG_REG_SP, TCG_STATIC_CALL_ARGS_SIZE, TEMP_SIZE);
/* TB prologue */
- tcg_out_addi(s, TCG_REG_SP, -frame_size);
- for(i = 0 ; i < ARRAY_SIZE(tcg_target_callee_save_regs) ; i++) {
- tcg_out_st(s, TCG_TYPE_I32, tcg_target_callee_save_regs[i],
- TCG_REG_SP, TCG_STATIC_CALL_ARGS_SIZE + i * 4);
+ tcg_out_opc_imm(s, ALIAS_PADDI, TCG_REG_SP, TCG_REG_SP, -FRAME_SIZE);
+ for (i = 0; i < ARRAY_SIZE(tcg_target_callee_save_regs); i++) {
+ tcg_out_st(s, TCG_TYPE_REG, tcg_target_callee_save_regs[i],
+ TCG_REG_SP, SAVE_OFS + i * REG_SIZE);
}
/* Call generated code */
tcg_out_opc_reg(s, OPC_JR, 0, tcg_target_call_iarg_regs[1], 0);
+ /* delay slot */
tcg_out_mov(s, TCG_TYPE_PTR, TCG_AREG0, tcg_target_call_iarg_regs[0]);
- tb_ret_addr = s->code_ptr;
/* TB epilogue */
- for(i = 0 ; i < ARRAY_SIZE(tcg_target_callee_save_regs) ; i++) {
- tcg_out_ld(s, TCG_TYPE_I32, tcg_target_callee_save_regs[i],
- TCG_REG_SP, TCG_STATIC_CALL_ARGS_SIZE + i * 4);
+ tb_ret_addr = s->code_ptr;
+ for (i = 0; i < ARRAY_SIZE(tcg_target_callee_save_regs); i++) {
+ tcg_out_ld(s, TCG_TYPE_REG, tcg_target_callee_save_regs[i],
+ TCG_REG_SP, SAVE_OFS + i * REG_SIZE);
+ }
+
+ tcg_out_opc_reg(s, OPC_JR, 0, TCG_REG_RA, 0);
+ /* delay slot */
+ tcg_out_opc_imm(s, ALIAS_PADDI, TCG_REG_SP, TCG_REG_SP, FRAME_SIZE);
+
+ if (use_mips32r2_instructions) {
+ return;
+ }
+
+ /* Bswap subroutines: Input in TCG_TMP0, output in TCG_TMP3;
+ clobbers TCG_TMP1, TCG_TMP2. */
+
+ /*
+ * bswap32 -- 32-bit swap (signed result for mips64). a0 = abcd.
+ */
+ bswap32_addr = align_code_ptr(s);
+ /* t3 = (ssss)d000 */
+ tcg_out_opc_sa(s, OPC_SLL, TCG_TMP3, TCG_TMP0, 24);
+ /* t1 = 000a */
+ tcg_out_opc_sa(s, OPC_SRL, TCG_TMP1, TCG_TMP0, 24);
+ /* t2 = 00c0 */
+ tcg_out_opc_imm(s, OPC_ANDI, TCG_TMP2, TCG_TMP0, 0xff00);
+ /* t3 = d00a */
+ tcg_out_opc_reg(s, OPC_OR, TCG_TMP3, TCG_TMP3, TCG_TMP1);
+ /* t1 = 0abc */
+ tcg_out_opc_sa(s, OPC_SRL, TCG_TMP1, TCG_TMP0, 8);
+ /* t2 = 0c00 */
+ tcg_out_opc_sa(s, OPC_SLL, TCG_TMP2, TCG_TMP2, 8);
+ /* t1 = 00b0 */
+ tcg_out_opc_imm(s, OPC_ANDI, TCG_TMP1, TCG_TMP1, 0xff00);
+ /* t3 = dc0a */
+ tcg_out_opc_reg(s, OPC_OR, TCG_TMP3, TCG_TMP3, TCG_TMP2);
+ tcg_out_opc_reg(s, OPC_JR, 0, TCG_REG_RA, 0);
+ /* t3 = dcba -- delay slot */
+ tcg_out_opc_reg(s, OPC_OR, TCG_TMP3, TCG_TMP3, TCG_TMP1);
+
+ if (TCG_TARGET_REG_BITS == 32) {
+ return;
}
+ /*
+ * bswap32u -- unsigned 32-bit swap. a0 = ....abcd.
+ */
+ bswap32u_addr = align_code_ptr(s);
+ /* t1 = (0000)000d */
+ tcg_out_opc_imm(s, OPC_ANDI, TCG_TMP1, TCG_TMP0, 0xff);
+ /* t3 = 000a */
+ tcg_out_opc_sa(s, OPC_SRL, TCG_TMP3, TCG_TMP0, 24);
+ /* t1 = (0000)d000 */
+ tcg_out_dsll(s, TCG_TMP1, TCG_TMP1, 24);
+ /* t2 = 00c0 */
+ tcg_out_opc_imm(s, OPC_ANDI, TCG_TMP2, TCG_TMP0, 0xff00);
+ /* t3 = d00a */
+ tcg_out_opc_reg(s, OPC_OR, TCG_TMP3, TCG_TMP3, TCG_TMP1);
+ /* t1 = 0abc */
+ tcg_out_opc_sa(s, OPC_SRL, TCG_TMP1, TCG_TMP0, 8);
+ /* t2 = 0c00 */
+ tcg_out_opc_sa(s, OPC_SLL, TCG_TMP2, TCG_TMP2, 8);
+ /* t1 = 00b0 */
+ tcg_out_opc_imm(s, OPC_ANDI, TCG_TMP1, TCG_TMP1, 0xff00);
+ /* t3 = dc0a */
+ tcg_out_opc_reg(s, OPC_OR, TCG_TMP3, TCG_TMP3, TCG_TMP2);
+ tcg_out_opc_reg(s, OPC_JR, 0, TCG_REG_RA, 0);
+ /* t3 = dcba -- delay slot */
+ tcg_out_opc_reg(s, OPC_OR, TCG_TMP3, TCG_TMP3, TCG_TMP1);
+
+ /*
+ * bswap64 -- 64-bit swap. a0 = abcdefgh
+ */
+ bswap64_addr = align_code_ptr(s);
+ /* t3 = h0000000 */
+ tcg_out_dsll(s, TCG_TMP3, TCG_TMP0, 56);
+ /* t1 = 0000000a */
+ tcg_out_dsrl(s, TCG_TMP1, TCG_TMP0, 56);
+
+ /* t2 = 000000g0 */
+ tcg_out_opc_imm(s, OPC_ANDI, TCG_TMP2, TCG_TMP0, 0xff00);
+ /* t3 = h000000a */
+ tcg_out_opc_reg(s, OPC_OR, TCG_TMP3, TCG_TMP3, TCG_TMP1);
+ /* t1 = 00000abc */
+ tcg_out_dsrl(s, TCG_TMP1, TCG_TMP0, 40);
+ /* t2 = 0g000000 */
+ tcg_out_dsll(s, TCG_TMP2, TCG_TMP2, 40);
+ /* t1 = 000000b0 */
+ tcg_out_opc_imm(s, OPC_ANDI, TCG_TMP1, TCG_TMP1, 0xff00);
+
+ /* t3 = hg00000a */
+ tcg_out_opc_reg(s, OPC_OR, TCG_TMP3, TCG_TMP3, TCG_TMP2);
+ /* t2 = 0000abcd */
+ tcg_out_dsrl(s, TCG_TMP2, TCG_TMP0, 32);
+ /* t3 = hg0000ba */
+ tcg_out_opc_reg(s, OPC_OR, TCG_TMP3, TCG_TMP3, TCG_TMP1);
+
+ /* t1 = 000000c0 */
+ tcg_out_opc_imm(s, OPC_ANDI, TCG_TMP1, TCG_TMP2, 0xff00);
+ /* t2 = 0000000d */
+ tcg_out_opc_imm(s, OPC_ANDI, TCG_TMP2, TCG_TMP2, 0x00ff);
+ /* t1 = 00000c00 */
+ tcg_out_dsll(s, TCG_TMP1, TCG_TMP1, 8);
+ /* t2 = 0000d000 */
+ tcg_out_dsll(s, TCG_TMP2, TCG_TMP2, 24);
+
+ /* t3 = hg000cba */
+ tcg_out_opc_reg(s, OPC_OR, TCG_TMP3, TCG_TMP3, TCG_TMP1);
+ /* t1 = 00abcdef */
+ tcg_out_dsrl(s, TCG_TMP1, TCG_TMP0, 16);
+ /* t3 = hg00dcba */
+ tcg_out_opc_reg(s, OPC_OR, TCG_TMP3, TCG_TMP3, TCG_TMP2);
+
+ /* t2 = 0000000f */
+ tcg_out_opc_imm(s, OPC_ANDI, TCG_TMP2, TCG_TMP1, 0x00ff);
+ /* t1 = 000000e0 */
+ tcg_out_opc_imm(s, OPC_ANDI, TCG_TMP1, TCG_TMP1, 0xff00);
+ /* t2 = 00f00000 */
+ tcg_out_dsll(s, TCG_TMP2, TCG_TMP2, 40);
+ /* t1 = 000e0000 */
+ tcg_out_dsll(s, TCG_TMP1, TCG_TMP1, 24);
+
+ /* t3 = hgf0dcba */
+ tcg_out_opc_reg(s, OPC_OR, TCG_TMP3, TCG_TMP3, TCG_TMP2);
tcg_out_opc_reg(s, OPC_JR, 0, TCG_REG_RA, 0);
- tcg_out_addi(s, TCG_REG_SP, frame_size);
+ /* t3 = hgfedcba -- delay slot */
+ tcg_out_opc_reg(s, OPC_OR, TCG_TMP3, TCG_TMP3, TCG_TMP1);
}
static void tcg_target_init(TCGContext *s)
{
tcg_target_detect_isa();
tcg_regset_set(tcg_target_available_regs[TCG_TYPE_I32], 0xffffffff);
+ if (TCG_TARGET_REG_BITS == 64) {
+ tcg_regset_set(tcg_target_available_regs[TCG_TYPE_I64], 0xffffffff);
+ }
tcg_regset_set(tcg_target_call_clobber_regs,
(1 << TCG_REG_V0) |
(1 << TCG_REG_V1) |
@@ -1923,6 +2549,8 @@ static void tcg_target_init(TCGContext *s)
tcg_regset_set_reg(s->reserved_regs, TCG_REG_K1); /* kernel use only */
tcg_regset_set_reg(s->reserved_regs, TCG_TMP0); /* internal use */
tcg_regset_set_reg(s->reserved_regs, TCG_TMP1); /* internal use */
+ tcg_regset_set_reg(s->reserved_regs, TCG_TMP2); /* internal use */
+ tcg_regset_set_reg(s->reserved_regs, TCG_TMP3); /* internal use */
tcg_regset_set_reg(s->reserved_regs, TCG_REG_RA); /* return address */
tcg_regset_set_reg(s->reserved_regs, TCG_REG_SP); /* stack pointer */
tcg_regset_set_reg(s->reserved_regs, TCG_REG_GP); /* global pointer */
@@ -1935,3 +2563,47 @@ void tb_set_jmp_target1(uintptr_t jmp_addr, uintptr_t addr)
atomic_set((uint32_t *)jmp_addr, deposit32(OPC_J, 0, 26, addr >> 2));
flush_icache_range(jmp_addr, jmp_addr + 4);
}
+
+typedef struct {
+ DebugFrameHeader h;
+ uint8_t fde_def_cfa[4];
+ uint8_t fde_reg_ofs[ARRAY_SIZE(tcg_target_callee_save_regs) * 2];
+} DebugFrame;
+
+#define ELF_HOST_MACHINE EM_MIPS
+/* GDB doesn't appear to require proper setting of ELF_HOST_FLAGS,
+ which is good because they're really quite complicated for MIPS. */
+
+static const DebugFrame debug_frame = {
+ .h.cie.len = sizeof(DebugFrameCIE) - 4, /* length after .len member */
+ .h.cie.id = -1,
+ .h.cie.version = 1,
+ .h.cie.code_align = 1,
+ .h.cie.data_align = -(TCG_TARGET_REG_BITS / 8) & 0x7f, /* sleb128 */
+ .h.cie.return_column = TCG_REG_RA,
+
+ /* Total FDE size does not include the "len" member. */
+ .h.fde.len = sizeof(DebugFrame) - offsetof(DebugFrame, h.fde.cie_offset),
+
+ .fde_def_cfa = {
+ 12, TCG_REG_SP, /* DW_CFA_def_cfa sp, ... */
+ (FRAME_SIZE & 0x7f) | 0x80, /* ... uleb128 FRAME_SIZE */
+ (FRAME_SIZE >> 7)
+ },
+ .fde_reg_ofs = {
+ 0x80 + 16, 9, /* DW_CFA_offset, s0, -72 */
+ 0x80 + 17, 8, /* DW_CFA_offset, s2, -64 */
+ 0x80 + 18, 7, /* DW_CFA_offset, s3, -56 */
+ 0x80 + 19, 6, /* DW_CFA_offset, s4, -48 */
+ 0x80 + 20, 5, /* DW_CFA_offset, s5, -40 */
+ 0x80 + 21, 4, /* DW_CFA_offset, s6, -32 */
+ 0x80 + 22, 3, /* DW_CFA_offset, s7, -24 */
+ 0x80 + 30, 2, /* DW_CFA_offset, s8, -16 */
+ 0x80 + 31, 1, /* DW_CFA_offset, ra, -8 */
+ }
+};
+
+void tcg_register_jit(void *buf, size_t buf_size)
+{
+ tcg_register_jit_int(buf, buf_size, &debug_frame, sizeof(debug_frame));
+}
diff --git a/tests/Makefile.include b/tests/Makefile.include
index 4841d582a1..f776404d86 100644
--- a/tests/Makefile.include
+++ b/tests/Makefile.include
@@ -689,7 +689,7 @@ tests/test-filter-mirror$(EXESUF): tests/test-filter-mirror.o $(qtest-obj-y)
tests/test-filter-redirector$(EXESUF): tests/test-filter-redirector.o $(qtest-obj-y)
tests/test-x86-cpuid-compat$(EXESUF): tests/test-x86-cpuid-compat.o $(qtest-obj-y)
tests/ivshmem-test$(EXESUF): tests/ivshmem-test.o contrib/ivshmem-server/ivshmem-server.o $(libqos-pc-obj-y)
-tests/vhost-user-bridge$(EXESUF): tests/vhost-user-bridge.o
+tests/vhost-user-bridge$(EXESUF): tests/vhost-user-bridge.o contrib/libvhost-user/libvhost-user.o $(test-util-obj-y)
tests/test-uuid$(EXESUF): tests/test-uuid.o $(test-util-obj-y)
tests/test-arm-mptimer$(EXESUF): tests/test-arm-mptimer.o
diff --git a/tests/acpi-test-data/pc/DSDT b/tests/acpi-test-data/pc/DSDT
index 8053d71105..15c3135d65 100644
--- a/tests/acpi-test-data/pc/DSDT
+++ b/tests/acpi-test-data/pc/DSDT
Binary files differ
diff --git a/tests/acpi-test-data/pc/DSDT.bridge b/tests/acpi-test-data/pc/DSDT.bridge
index 850e71a973..d38586c95b 100644
--- a/tests/acpi-test-data/pc/DSDT.bridge
+++ b/tests/acpi-test-data/pc/DSDT.bridge
Binary files differ
diff --git a/tests/acpi-test-data/pc/DSDT.cphp b/tests/acpi-test-data/pc/DSDT.cphp
index 9f405cfd83..2dd70bf952 100644
--- a/tests/acpi-test-data/pc/DSDT.cphp
+++ b/tests/acpi-test-data/pc/DSDT.cphp
Binary files differ
diff --git a/tests/acpi-test-data/pc/DSDT.ipmikcs b/tests/acpi-test-data/pc/DSDT.ipmikcs
index 8ac48afb6a..2796d96b0e 100644
--- a/tests/acpi-test-data/pc/DSDT.ipmikcs
+++ b/tests/acpi-test-data/pc/DSDT.ipmikcs
Binary files differ
diff --git a/tests/acpi-test-data/pc/DSDT.memhp b/tests/acpi-test-data/pc/DSDT.memhp
new file mode 100644
index 0000000000..53f6d58243
--- /dev/null
+++ b/tests/acpi-test-data/pc/DSDT.memhp
Binary files differ
diff --git a/tests/acpi-test-data/pc/SRAT.memhp b/tests/acpi-test-data/pc/SRAT.memhp
new file mode 100644
index 0000000000..66ce9a8981
--- /dev/null
+++ b/tests/acpi-test-data/pc/SRAT.memhp
Binary files differ
diff --git a/tests/acpi-test-data/q35/DSDT b/tests/acpi-test-data/q35/DSDT
index 58fbb3d2e2..d11567c3dc 100644
--- a/tests/acpi-test-data/q35/DSDT
+++ b/tests/acpi-test-data/q35/DSDT
Binary files differ
diff --git a/tests/acpi-test-data/q35/DSDT.bridge b/tests/acpi-test-data/q35/DSDT.bridge
index c392802a95..412a6e9104 100644
--- a/tests/acpi-test-data/q35/DSDT.bridge
+++ b/tests/acpi-test-data/q35/DSDT.bridge
Binary files differ
diff --git a/tests/acpi-test-data/q35/DSDT.cphp b/tests/acpi-test-data/q35/DSDT.cphp
index a0ce6b3264..79902d0d30 100644
--- a/tests/acpi-test-data/q35/DSDT.cphp
+++ b/tests/acpi-test-data/q35/DSDT.cphp
Binary files differ
diff --git a/tests/acpi-test-data/q35/DSDT.ipmibt b/tests/acpi-test-data/q35/DSDT.ipmibt
index 0ea38e1e72..b658329c5b 100644
--- a/tests/acpi-test-data/q35/DSDT.ipmibt
+++ b/tests/acpi-test-data/q35/DSDT.ipmibt
Binary files differ
diff --git a/tests/acpi-test-data/q35/DSDT.memhp b/tests/acpi-test-data/q35/DSDT.memhp
new file mode 100644
index 0000000000..e46c1fb5a2
--- /dev/null
+++ b/tests/acpi-test-data/q35/DSDT.memhp
Binary files differ
diff --git a/tests/acpi-test-data/q35/SRAT.memhp b/tests/acpi-test-data/q35/SRAT.memhp
new file mode 100644
index 0000000000..66ce9a8981
--- /dev/null
+++ b/tests/acpi-test-data/q35/SRAT.memhp
Binary files differ
diff --git a/tests/bios-tables-test.c b/tests/bios-tables-test.c
index 812f830539..54048050c0 100644
--- a/tests/bios-tables-test.c
+++ b/tests/bios-tables-test.c
@@ -867,6 +867,28 @@ static void test_acpi_piix4_tcg_ipmi(void)
free_test_data(&data);
}
+static void test_acpi_q35_tcg_memhp(void)
+{
+ test_data data;
+
+ memset(&data, 0, sizeof(data));
+ data.machine = MACHINE_Q35;
+ data.variant = ".memhp";
+ test_acpi_one(" -m 128,slots=3,maxmem=1G -numa node", &data);
+ free_test_data(&data);
+}
+
+static void test_acpi_piix4_tcg_memhp(void)
+{
+ test_data data;
+
+ memset(&data, 0, sizeof(data));
+ data.machine = MACHINE_PC;
+ data.variant = ".memhp";
+ test_acpi_one(" -m 128,slots=3,maxmem=1G -numa node", &data);
+ free_test_data(&data);
+}
+
int main(int argc, char *argv[])
{
const char *arch = qtest_get_arch();
@@ -887,6 +909,8 @@ int main(int argc, char *argv[])
qtest_add_func("acpi/q35/ipmi", test_acpi_q35_tcg_ipmi);
qtest_add_func("acpi/piix4/cpuhp", test_acpi_piix4_tcg_cphp);
qtest_add_func("acpi/q35/cpuhp", test_acpi_q35_tcg_cphp);
+ qtest_add_func("acpi/piix4/memhp", test_acpi_piix4_tcg_memhp);
+ qtest_add_func("acpi/q35/memhp", test_acpi_q35_tcg_memhp);
}
ret = g_test_run();
boot_sector_cleanup(disk);
diff --git a/tests/qemu-iotests/071.out b/tests/qemu-iotests/071.out
index 8ff423f56b..dd879f1212 100644
--- a/tests/qemu-iotests/071.out
+++ b/tests/qemu-iotests/071.out
@@ -12,7 +12,7 @@ read 512/512 bytes at offset 229376
512 bytes, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
wrote 512/512 bytes at offset 0
512 bytes, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
-blkverify: read sector_num=0 nb_sectors=1 contents mismatch in sector 0
+blkverify: read offset=0 bytes=512 contents mismatch at offset 0
=== Testing blkverify through file blockref ===
@@ -26,7 +26,7 @@ read 512/512 bytes at offset 229376
512 bytes, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
wrote 512/512 bytes at offset 0
512 bytes, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
-blkverify: read sector_num=0 nb_sectors=1 contents mismatch in sector 0
+blkverify: read offset=0 bytes=512 contents mismatch at offset 0
=== Testing blkdebug through filename ===
@@ -56,7 +56,7 @@ QMP_VERSION
{"return": {}}
{"return": {}}
{"return": {}}
-blkverify: read sector_num=0 nb_sectors=1 contents mismatch in sector 0
+blkverify: read offset=0 bytes=512 contents mismatch at offset 0
=== Testing blkverify on existing raw block device ===
@@ -66,7 +66,7 @@ QMP_VERSION
{"return": {}}
{"return": {}}
{"return": {}}
-blkverify: read sector_num=0 nb_sectors=1 contents mismatch in sector 0
+blkverify: read offset=0 bytes=512 contents mismatch at offset 0
=== Testing blkdebug's set-state through QMP ===
diff --git a/tests/vhost-user-bridge.c b/tests/vhost-user-bridge.c
index 775e031069..8618c20d53 100644
--- a/tests/vhost-user-bridge.c
+++ b/tests/vhost-user-bridge.c
@@ -30,17 +30,9 @@
#define _FILE_OFFSET_BITS 64
#include "qemu/osdep.h"
-#include <sys/socket.h>
-#include <sys/un.h>
-#include <sys/unistd.h>
-#include <sys/eventfd.h>
-#include <arpa/inet.h>
-#include <netdb.h>
-#include <linux/vhost.h>
-
-#include "qemu/atomic.h"
+#include "qemu/iov.h"
#include "standard-headers/linux/virtio_net.h"
-#include "standard-headers/linux/virtio_ring.h"
+#include "contrib/libvhost-user/libvhost-user.h"
#define VHOST_USER_BRIDGE_DEBUG 1
@@ -64,6 +56,17 @@ typedef struct Dispatcher {
Event events[FD_SETSIZE];
} Dispatcher;
+typedef struct VubrDev {
+ VuDev vudev;
+ Dispatcher dispatcher;
+ int backend_udp_sock;
+ struct sockaddr_in backend_udp_dest;
+ int hdrlen;
+ int sock;
+ int ready;
+ int quit;
+} VubrDev;
+
static void
vubr_die(const char *s)
{
@@ -101,8 +104,6 @@ dispatcher_add(Dispatcher *dispr, int sock, void *ctx, CallbackFunc cb)
return 0;
}
-/* dispatcher_remove() is not currently in use but may be useful
- * in the future. */
static int
dispatcher_remove(Dispatcher *dispr, int sock)
{
@@ -157,1039 +158,313 @@ dispatcher_wait(Dispatcher *dispr, uint32_t timeout)
return 0;
}
-typedef struct VubrVirtq {
- int call_fd;
- int kick_fd;
- uint32_t size;
- uint16_t last_avail_index;
- uint16_t last_used_index;
- struct vring_desc *desc;
- struct vring_avail *avail;
- struct vring_used *used;
- uint64_t log_guest_addr;
- int enable;
-} VubrVirtq;
-
-/* Based on qemu/hw/virtio/vhost-user.c */
-
-#define VHOST_MEMORY_MAX_NREGIONS 8
-#define VHOST_USER_F_PROTOCOL_FEATURES 30
-/* v1.0 compliant. */
-#define VIRTIO_F_VERSION_1 32
-
-#define VHOST_LOG_PAGE 4096
-
-enum VhostUserProtocolFeature {
- VHOST_USER_PROTOCOL_F_MQ = 0,
- VHOST_USER_PROTOCOL_F_LOG_SHMFD = 1,
- VHOST_USER_PROTOCOL_F_RARP = 2,
-
- VHOST_USER_PROTOCOL_F_MAX
-};
-
-#define VHOST_USER_PROTOCOL_FEATURE_MASK ((1 << VHOST_USER_PROTOCOL_F_MAX) - 1)
-
-typedef enum VhostUserRequest {
- VHOST_USER_NONE = 0,
- VHOST_USER_GET_FEATURES = 1,
- VHOST_USER_SET_FEATURES = 2,
- VHOST_USER_SET_OWNER = 3,
- VHOST_USER_RESET_OWNER = 4,
- VHOST_USER_SET_MEM_TABLE = 5,
- VHOST_USER_SET_LOG_BASE = 6,
- VHOST_USER_SET_LOG_FD = 7,
- VHOST_USER_SET_VRING_NUM = 8,
- VHOST_USER_SET_VRING_ADDR = 9,
- VHOST_USER_SET_VRING_BASE = 10,
- VHOST_USER_GET_VRING_BASE = 11,
- VHOST_USER_SET_VRING_KICK = 12,
- VHOST_USER_SET_VRING_CALL = 13,
- VHOST_USER_SET_VRING_ERR = 14,
- VHOST_USER_GET_PROTOCOL_FEATURES = 15,
- VHOST_USER_SET_PROTOCOL_FEATURES = 16,
- VHOST_USER_GET_QUEUE_NUM = 17,
- VHOST_USER_SET_VRING_ENABLE = 18,
- VHOST_USER_SEND_RARP = 19,
- VHOST_USER_MAX
-} VhostUserRequest;
-
-typedef struct VhostUserMemoryRegion {
- uint64_t guest_phys_addr;
- uint64_t memory_size;
- uint64_t userspace_addr;
- uint64_t mmap_offset;
-} VhostUserMemoryRegion;
-
-typedef struct VhostUserMemory {
- uint32_t nregions;
- uint32_t padding;
- VhostUserMemoryRegion regions[VHOST_MEMORY_MAX_NREGIONS];
-} VhostUserMemory;
-
-typedef struct VhostUserLog {
- uint64_t mmap_size;
- uint64_t mmap_offset;
-} VhostUserLog;
-
-typedef struct VhostUserMsg {
- VhostUserRequest request;
-
-#define VHOST_USER_VERSION_MASK (0x3)
-#define VHOST_USER_REPLY_MASK (0x1<<2)
- uint32_t flags;
- uint32_t size; /* the following payload size */
- union {
-#define VHOST_USER_VRING_IDX_MASK (0xff)
-#define VHOST_USER_VRING_NOFD_MASK (0x1<<8)
- uint64_t u64;
- struct vhost_vring_state state;
- struct vhost_vring_addr addr;
- VhostUserMemory memory;
- VhostUserLog log;
- } payload;
- int fds[VHOST_MEMORY_MAX_NREGIONS];
- int fd_num;
-} QEMU_PACKED VhostUserMsg;
-
-#define VHOST_USER_HDR_SIZE offsetof(VhostUserMsg, payload.u64)
-
-/* The version of the protocol we support */
-#define VHOST_USER_VERSION (0x1)
-
-#define MAX_NR_VIRTQUEUE (8)
-
-typedef struct VubrDevRegion {
- /* Guest Physical address. */
- uint64_t gpa;
- /* Memory region size. */
- uint64_t size;
- /* QEMU virtual address (userspace). */
- uint64_t qva;
- /* Starting offset in our mmaped space. */
- uint64_t mmap_offset;
- /* Start address of mmaped space. */
- uint64_t mmap_addr;
-} VubrDevRegion;
-
-typedef struct VubrDev {
- int sock;
- Dispatcher dispatcher;
- uint32_t nregions;
- VubrDevRegion regions[VHOST_MEMORY_MAX_NREGIONS];
- VubrVirtq vq[MAX_NR_VIRTQUEUE];
- int log_call_fd;
- uint64_t log_size;
- uint8_t *log_table;
- int backend_udp_sock;
- struct sockaddr_in backend_udp_dest;
- int ready;
- uint64_t features;
- int hdrlen;
-} VubrDev;
-
-static const char *vubr_request_str[] = {
- [VHOST_USER_NONE] = "VHOST_USER_NONE",
- [VHOST_USER_GET_FEATURES] = "VHOST_USER_GET_FEATURES",
- [VHOST_USER_SET_FEATURES] = "VHOST_USER_SET_FEATURES",
- [VHOST_USER_SET_OWNER] = "VHOST_USER_SET_OWNER",
- [VHOST_USER_RESET_OWNER] = "VHOST_USER_RESET_OWNER",
- [VHOST_USER_SET_MEM_TABLE] = "VHOST_USER_SET_MEM_TABLE",
- [VHOST_USER_SET_LOG_BASE] = "VHOST_USER_SET_LOG_BASE",
- [VHOST_USER_SET_LOG_FD] = "VHOST_USER_SET_LOG_FD",
- [VHOST_USER_SET_VRING_NUM] = "VHOST_USER_SET_VRING_NUM",
- [VHOST_USER_SET_VRING_ADDR] = "VHOST_USER_SET_VRING_ADDR",
- [VHOST_USER_SET_VRING_BASE] = "VHOST_USER_SET_VRING_BASE",
- [VHOST_USER_GET_VRING_BASE] = "VHOST_USER_GET_VRING_BASE",
- [VHOST_USER_SET_VRING_KICK] = "VHOST_USER_SET_VRING_KICK",
- [VHOST_USER_SET_VRING_CALL] = "VHOST_USER_SET_VRING_CALL",
- [VHOST_USER_SET_VRING_ERR] = "VHOST_USER_SET_VRING_ERR",
- [VHOST_USER_GET_PROTOCOL_FEATURES] = "VHOST_USER_GET_PROTOCOL_FEATURES",
- [VHOST_USER_SET_PROTOCOL_FEATURES] = "VHOST_USER_SET_PROTOCOL_FEATURES",
- [VHOST_USER_GET_QUEUE_NUM] = "VHOST_USER_GET_QUEUE_NUM",
- [VHOST_USER_SET_VRING_ENABLE] = "VHOST_USER_SET_VRING_ENABLE",
- [VHOST_USER_SEND_RARP] = "VHOST_USER_SEND_RARP",
- [VHOST_USER_MAX] = "VHOST_USER_MAX",
-};
-
static void
-print_buffer(uint8_t *buf, size_t len)
+vubr_handle_tx(VuDev *dev, int qidx)
{
- int i;
- printf("Raw buffer:\n");
- for (i = 0; i < len; i++) {
- if (i % 16 == 0) {
- printf("\n");
- }
- if (i % 4 == 0) {
- printf(" ");
- }
- printf("%02x ", buf[i]);
- }
- printf("\n............................................................\n");
-}
+ VuVirtq *vq = vu_get_queue(dev, qidx);
+ VubrDev *vubr = container_of(dev, VubrDev, vudev);
+ int hdrlen = vubr->hdrlen;
+ VuVirtqElement *elem = NULL;
-/* Translate guest physical address to our virtual address. */
-static uint64_t
-gpa_to_va(VubrDev *dev, uint64_t guest_addr)
-{
- int i;
-
- /* Find matching memory region. */
- for (i = 0; i < dev->nregions; i++) {
- VubrDevRegion *r = &dev->regions[i];
-
- if ((guest_addr >= r->gpa) && (guest_addr < (r->gpa + r->size))) {
- return guest_addr - r->gpa + r->mmap_addr + r->mmap_offset;
- }
- }
-
- assert(!"address not found in regions");
- return 0;
-}
-
-/* Translate qemu virtual address to our virtual address. */
-static uint64_t
-qva_to_va(VubrDev *dev, uint64_t qemu_addr)
-{
- int i;
+ assert(qidx % 2);
- /* Find matching memory region. */
- for (i = 0; i < dev->nregions; i++) {
- VubrDevRegion *r = &dev->regions[i];
+ for (;;) {
+ ssize_t ret;
+ unsigned int out_num;
+ struct iovec sg[VIRTQUEUE_MAX_SIZE], *out_sg;
- if ((qemu_addr >= r->qva) && (qemu_addr < (r->qva + r->size))) {
- return qemu_addr - r->qva + r->mmap_addr + r->mmap_offset;
+ elem = vu_queue_pop(dev, vq, sizeof(VuVirtqElement));
+ if (!elem) {
+ break;
}
- }
-
- assert(!"address not found in regions");
- return 0;
-}
-static void
-vubr_message_read(int conn_fd, VhostUserMsg *vmsg)
-{
- char control[CMSG_SPACE(VHOST_MEMORY_MAX_NREGIONS * sizeof(int))] = { };
- struct iovec iov = {
- .iov_base = (char *)vmsg,
- .iov_len = VHOST_USER_HDR_SIZE,
- };
- struct msghdr msg = {
- .msg_iov = &iov,
- .msg_iovlen = 1,
- .msg_control = control,
- .msg_controllen = sizeof(control),
- };
- size_t fd_size;
- struct cmsghdr *cmsg;
- int rc;
-
- rc = recvmsg(conn_fd, &msg, 0);
-
- if (rc == 0) {
- vubr_die("recvmsg");
- fprintf(stderr, "Peer disconnected.\n");
- exit(1);
- }
- if (rc < 0) {
- vubr_die("recvmsg");
- }
-
- vmsg->fd_num = 0;
- for (cmsg = CMSG_FIRSTHDR(&msg);
- cmsg != NULL;
- cmsg = CMSG_NXTHDR(&msg, cmsg))
- {
- if (cmsg->cmsg_level == SOL_SOCKET && cmsg->cmsg_type == SCM_RIGHTS) {
- fd_size = cmsg->cmsg_len - CMSG_LEN(0);
- vmsg->fd_num = fd_size / sizeof(int);
- memcpy(vmsg->fds, CMSG_DATA(cmsg), fd_size);
+ out_num = elem->out_num;
+ out_sg = elem->out_sg;
+ if (out_num < 1) {
+ fprintf(stderr, "virtio-net header not in first element\n");
break;
}
- }
-
- if (vmsg->size > sizeof(vmsg->payload)) {
- fprintf(stderr,
- "Error: too big message request: %d, size: vmsg->size: %u, "
- "while sizeof(vmsg->payload) = %zu\n",
- vmsg->request, vmsg->size, sizeof(vmsg->payload));
- exit(1);
- }
-
- if (vmsg->size) {
- rc = read(conn_fd, &vmsg->payload, vmsg->size);
- if (rc == 0) {
- vubr_die("recvmsg");
- fprintf(stderr, "Peer disconnected.\n");
- exit(1);
+ if (VHOST_USER_BRIDGE_DEBUG) {
+ iov_hexdump(out_sg, out_num, stderr, "TX:", 1024);
}
- if (rc < 0) {
- vubr_die("recvmsg");
+
+ if (hdrlen) {
+ unsigned sg_num = iov_copy(sg, ARRAY_SIZE(sg),
+ out_sg, out_num,
+ hdrlen, -1);
+ out_num = sg_num;
+ out_sg = sg;
}
- assert(rc == vmsg->size);
- }
-}
+ struct msghdr msg = {
+ .msg_name = (struct sockaddr *) &vubr->backend_udp_dest,
+ .msg_namelen = sizeof(struct sockaddr_in),
+ .msg_iov = out_sg,
+ .msg_iovlen = out_num,
+ };
+ do {
+ ret = sendmsg(vubr->backend_udp_sock, &msg, 0);
+ } while (ret == -1 && (errno == EAGAIN || errno == EINTR));
-static void
-vubr_message_write(int conn_fd, VhostUserMsg *vmsg)
-{
- int rc;
+ if (ret == -1) {
+ vubr_die("sendmsg()");
+ }
- do {
- rc = write(conn_fd, vmsg, VHOST_USER_HDR_SIZE + vmsg->size);
- } while (rc < 0 && errno == EINTR);
+ vu_queue_push(dev, vq, elem, 0);
+ vu_queue_notify(dev, vq);
- if (rc < 0) {
- vubr_die("write");
+ free(elem);
+ elem = NULL;
}
-}
-static void
-vubr_backend_udp_sendbuf(VubrDev *dev, uint8_t *buf, size_t len)
-{
- int slen = sizeof(struct sockaddr_in);
-
- if (sendto(dev->backend_udp_sock, buf, len, 0,
- (struct sockaddr *) &dev->backend_udp_dest, slen) == -1) {
- vubr_die("sendto()");
- }
+ free(elem);
}
-static int
-vubr_backend_udp_recvbuf(VubrDev *dev, uint8_t *buf, size_t buflen)
+static void
+iov_restore_front(struct iovec *front, struct iovec *iov, size_t bytes)
{
- int slen = sizeof(struct sockaddr_in);
- int rc;
+ struct iovec *cur;
- rc = recvfrom(dev->backend_udp_sock, buf, buflen, 0,
- (struct sockaddr *) &dev->backend_udp_dest,
- (socklen_t *)&slen);
- if (rc == -1) {
- vubr_die("recvfrom()");
+ for (cur = front; front != iov; cur++) {
+ bytes -= cur->iov_len;
}
- return rc;
+ cur->iov_base -= bytes;
+ cur->iov_len += bytes;
}
static void
-vubr_consume_raw_packet(VubrDev *dev, uint8_t *buf, uint32_t len)
+iov_truncate(struct iovec *iov, unsigned iovc, size_t bytes)
{
- int hdrlen = dev->hdrlen;
- DPRINT(" hdrlen = %d\n", dev->hdrlen);
+ unsigned i;
- if (VHOST_USER_BRIDGE_DEBUG) {
- print_buffer(buf, len);
- }
- vubr_backend_udp_sendbuf(dev, buf + hdrlen, len - hdrlen);
-}
+ for (i = 0; i < iovc; i++, iov++) {
+ if (bytes < iov->iov_len) {
+ iov->iov_len = bytes;
+ return;
+ }
-/* Kick the log_call_fd if required. */
-static void
-vubr_log_kick(VubrDev *dev)
-{
- if (dev->log_call_fd != -1) {
- DPRINT("Kicking the QEMU's log...\n");
- eventfd_write(dev->log_call_fd, 1);
+ bytes -= iov->iov_len;
}
-}
-/* Kick the guest if necessary. */
-static void
-vubr_virtqueue_kick(VubrVirtq *vq)
-{
- if (!(vq->avail->flags & VRING_AVAIL_F_NO_INTERRUPT)) {
- DPRINT("Kicking the guest...\n");
- eventfd_write(vq->call_fd, 1);
- }
+ assert(!"couldn't truncate iov");
}
static void
-vubr_log_page(uint8_t *log_table, uint64_t page)
+vubr_backend_recv_cb(int sock, void *ctx)
{
- DPRINT("Logged dirty guest page: %"PRId64"\n", page);
- atomic_or(&log_table[page / 8], 1 << (page % 8));
-}
+ VubrDev *vubr = (VubrDev *) ctx;
+ VuDev *dev = &vubr->vudev;
+ VuVirtq *vq = vu_get_queue(dev, 0);
+ VuVirtqElement *elem = NULL;
+ struct iovec mhdr_sg[VIRTQUEUE_MAX_SIZE];
+ struct virtio_net_hdr_mrg_rxbuf mhdr;
+ unsigned mhdr_cnt = 0;
+ int hdrlen = vubr->hdrlen;
+ int i = 0;
+ struct virtio_net_hdr hdr = {
+ .flags = 0,
+ .gso_type = VIRTIO_NET_HDR_GSO_NONE
+ };
-static void
-vubr_log_write(VubrDev *dev, uint64_t address, uint64_t length)
-{
- uint64_t page;
+ DPRINT("\n\n *** IN UDP RECEIVE CALLBACK ***\n\n");
+ DPRINT(" hdrlen = %d\n", hdrlen);
- if (!(dev->features & (1ULL << VHOST_F_LOG_ALL)) ||
- !dev->log_table || !length) {
+ if (!vu_queue_enabled(dev, vq) ||
+ !vu_queue_avail_bytes(dev, vq, hdrlen, 0)) {
+ DPRINT("Got UDP packet, but no available descriptors on RX virtq.\n");
return;
}
- assert(dev->log_size > ((address + length - 1) / VHOST_LOG_PAGE / 8));
-
- page = address / VHOST_LOG_PAGE;
- while (page * VHOST_LOG_PAGE < address + length) {
- vubr_log_page(dev->log_table, page);
- page += VHOST_LOG_PAGE;
- }
- vubr_log_kick(dev);
-}
-
-static void
-vubr_post_buffer(VubrDev *dev, VubrVirtq *vq, uint8_t *buf, int32_t len)
-{
- struct vring_desc *desc = vq->desc;
- struct vring_avail *avail = vq->avail;
- struct vring_used *used = vq->used;
- uint64_t log_guest_addr = vq->log_guest_addr;
- int32_t remaining_len = len;
-
- unsigned int size = vq->size;
-
- uint16_t avail_index = atomic_mb_read(&avail->idx);
-
- /* We check the available descriptors before posting the
- * buffer, so here we assume that enough available
- * descriptors. */
- assert(vq->last_avail_index != avail_index);
- uint16_t a_index = vq->last_avail_index % size;
- uint16_t u_index = vq->last_used_index % size;
- uint16_t d_index = avail->ring[a_index];
-
- int i = d_index;
- uint32_t written_len = 0;
-
do {
- DPRINT("Post packet to guest on vq:\n");
- DPRINT(" size = %d\n", vq->size);
- DPRINT(" last_avail_index = %d\n", vq->last_avail_index);
- DPRINT(" last_used_index = %d\n", vq->last_used_index);
- DPRINT(" a_index = %d\n", a_index);
- DPRINT(" u_index = %d\n", u_index);
- DPRINT(" d_index = %d\n", d_index);
- DPRINT(" desc[%d].addr = 0x%016"PRIx64"\n", i, desc[i].addr);
- DPRINT(" desc[%d].len = %d\n", i, desc[i].len);
- DPRINT(" desc[%d].flags = %d\n", i, desc[i].flags);
- DPRINT(" avail->idx = %d\n", avail_index);
- DPRINT(" used->idx = %d\n", used->idx);
-
- if (!(desc[i].flags & VRING_DESC_F_WRITE)) {
- /* FIXME: we should find writable descriptor. */
- fprintf(stderr, "Error: descriptor is not writable. Exiting.\n");
- exit(1);
- }
-
- void *chunk_start = (void *)(uintptr_t)gpa_to_va(dev, desc[i].addr);
- uint32_t chunk_len = desc[i].len;
- uint32_t chunk_write_len = MIN(remaining_len, chunk_len);
+ struct iovec *sg;
+ ssize_t ret, total = 0;
+ unsigned int num;
- memcpy(chunk_start, buf + written_len, chunk_write_len);
- vubr_log_write(dev, desc[i].addr, chunk_write_len);
- remaining_len -= chunk_write_len;
- written_len += chunk_write_len;
-
- if ((remaining_len == 0) || !(desc[i].flags & VRING_DESC_F_NEXT)) {
+ elem = vu_queue_pop(dev, vq, sizeof(VuVirtqElement));
+ if (!elem) {
break;
}
- i = desc[i].next;
- } while (1);
-
- if (remaining_len > 0) {
- fprintf(stderr,
- "Too long packet for RX, remaining_len = %d, Dropping...\n",
- remaining_len);
- return;
- }
-
- /* Add descriptor to the used ring. */
- used->ring[u_index].id = d_index;
- used->ring[u_index].len = len;
- vubr_log_write(dev,
- log_guest_addr + offsetof(struct vring_used, ring[u_index]),
- sizeof(used->ring[u_index]));
-
- vq->last_avail_index++;
- vq->last_used_index++;
-
- atomic_mb_set(&used->idx, vq->last_used_index);
- vubr_log_write(dev,
- log_guest_addr + offsetof(struct vring_used, idx),
- sizeof(used->idx));
-
- /* Kick the guest if necessary. */
- vubr_virtqueue_kick(vq);
-}
-
-static int
-vubr_process_desc(VubrDev *dev, VubrVirtq *vq)
-{
- struct vring_desc *desc = vq->desc;
- struct vring_avail *avail = vq->avail;
- struct vring_used *used = vq->used;
- uint64_t log_guest_addr = vq->log_guest_addr;
-
- unsigned int size = vq->size;
-
- uint16_t a_index = vq->last_avail_index % size;
- uint16_t u_index = vq->last_used_index % size;
- uint16_t d_index = avail->ring[a_index];
-
- uint32_t i, len = 0;
- size_t buf_size = 4096;
- uint8_t buf[4096];
-
- DPRINT("Chunks: ");
- i = d_index;
- do {
- void *chunk_start = (void *)(uintptr_t)gpa_to_va(dev, desc[i].addr);
- uint32_t chunk_len = desc[i].len;
-
- assert(!(desc[i].flags & VRING_DESC_F_WRITE));
-
- if (len + chunk_len < buf_size) {
- memcpy(buf + len, chunk_start, chunk_len);
- DPRINT("%d ", chunk_len);
- } else {
- fprintf(stderr, "Error: too long packet. Dropping...\n");
+ if (elem->in_num < 1) {
+ fprintf(stderr, "virtio-net contains no in buffers\n");
break;
}
- len += chunk_len;
-
- if (!(desc[i].flags & VRING_DESC_F_NEXT)) {
- break;
+ sg = elem->in_sg;
+ num = elem->in_num;
+ if (i == 0) {
+ if (hdrlen == 12) {
+ mhdr_cnt = iov_copy(mhdr_sg, ARRAY_SIZE(mhdr_sg),
+ sg, elem->in_num,
+ offsetof(typeof(mhdr), num_buffers),
+ sizeof(mhdr.num_buffers));
+ }
+ iov_from_buf(sg, elem->in_num, 0, &hdr, sizeof hdr);
+ total += hdrlen;
+ assert(iov_discard_front(&sg, &num, hdrlen) == hdrlen);
}
- i = desc[i].next;
- } while (1);
- DPRINT("\n");
-
- if (!len) {
- return -1;
- }
-
- /* Add descriptor to the used ring. */
- used->ring[u_index].id = d_index;
- used->ring[u_index].len = len;
- vubr_log_write(dev,
- log_guest_addr + offsetof(struct vring_used, ring[u_index]),
- sizeof(used->ring[u_index]));
-
- vubr_consume_raw_packet(dev, buf, len);
-
- return 0;
-}
+ struct msghdr msg = {
+ .msg_name = (struct sockaddr *) &vubr->backend_udp_dest,
+ .msg_namelen = sizeof(struct sockaddr_in),
+ .msg_iov = sg,
+ .msg_iovlen = elem->in_num,
+ .msg_flags = MSG_DONTWAIT,
+ };
+ do {
+ ret = recvmsg(vubr->backend_udp_sock, &msg, 0);
+ } while (ret == -1 && (errno == EINTR));
-static void
-vubr_process_avail(VubrDev *dev, VubrVirtq *vq)
-{
- struct vring_avail *avail = vq->avail;
- struct vring_used *used = vq->used;
- uint64_t log_guest_addr = vq->log_guest_addr;
-
- while (vq->last_avail_index != atomic_mb_read(&avail->idx)) {
- vubr_process_desc(dev, vq);
- vq->last_avail_index++;
- vq->last_used_index++;
- }
+ if (i == 0) {
+ iov_restore_front(elem->in_sg, sg, hdrlen);
+ }
- atomic_mb_set(&used->idx, vq->last_used_index);
- vubr_log_write(dev,
- log_guest_addr + offsetof(struct vring_used, idx),
- sizeof(used->idx));
-}
+ if (ret == -1) {
+ if (errno == EWOULDBLOCK) {
+ vu_queue_rewind(dev, vq, 1);
+ break;
+ }
-static void
-vubr_backend_recv_cb(int sock, void *ctx)
-{
- VubrDev *dev = (VubrDev *) ctx;
- VubrVirtq *rx_vq = &dev->vq[0];
- uint8_t buf[4096];
- struct virtio_net_hdr_v1 *hdr = (struct virtio_net_hdr_v1 *)buf;
- int hdrlen = dev->hdrlen;
- int buflen = sizeof(buf);
- int len;
-
- if (!dev->ready) {
- return;
- }
+ vubr_die("recvmsg()");
+ }
- DPRINT("\n\n *** IN UDP RECEIVE CALLBACK ***\n\n");
- DPRINT(" hdrlen = %d\n", hdrlen);
+ total += ret;
+ iov_truncate(elem->in_sg, elem->in_num, total);
+ vu_queue_fill(dev, vq, elem, total, i++);
- uint16_t avail_index = atomic_mb_read(&rx_vq->avail->idx);
+ free(elem);
+ elem = NULL;
+ } while (false); /* could loop if DONTWAIT worked? */
- /* If there is no available descriptors, just do nothing.
- * The buffer will be handled by next arrived UDP packet,
- * or next kick on receive virtq. */
- if (rx_vq->last_avail_index == avail_index) {
- DPRINT("Got UDP packet, but no available descriptors on RX virtq.\n");
- return;
+ if (mhdr_cnt) {
+ mhdr.num_buffers = i;
+ iov_from_buf(mhdr_sg, mhdr_cnt,
+ 0,
+ &mhdr.num_buffers, sizeof mhdr.num_buffers);
}
- memset(buf, 0, hdrlen);
- /* TODO: support mergeable buffers. */
- if (hdrlen == 12)
- hdr->num_buffers = 1;
- len = vubr_backend_udp_recvbuf(dev, buf + hdrlen, buflen - hdrlen);
+ vu_queue_flush(dev, vq, i);
+ vu_queue_notify(dev, vq);
- vubr_post_buffer(dev, rx_vq, buf, len + hdrlen);
+ free(elem);
}
static void
-vubr_kick_cb(int sock, void *ctx)
+vubr_receive_cb(int sock, void *ctx)
{
- VubrDev *dev = (VubrDev *) ctx;
- eventfd_t kick_data;
- ssize_t rc;
+ VubrDev *vubr = (VubrDev *)ctx;
- rc = eventfd_read(sock, &kick_data);
- if (rc == -1) {
- vubr_die("eventfd_read()");
- } else {
- DPRINT("Got kick_data: %016"PRIx64"\n", kick_data);
- vubr_process_avail(dev, &dev->vq[1]);
+ if (!vu_dispatch(&vubr->vudev)) {
+ fprintf(stderr, "Error while dispatching\n");
}
}
-static int
-vubr_none_exec(VubrDev *dev, VhostUserMsg *vmsg)
-{
- DPRINT("Function %s() not implemented yet.\n", __func__);
- return 0;
-}
+typedef struct WatchData {
+ VuDev *dev;
+ vu_watch_cb cb;
+ void *data;
+} WatchData;
-static int
-vubr_get_features_exec(VubrDev *dev, VhostUserMsg *vmsg)
+static void
+watch_cb(int sock, void *ctx)
{
- vmsg->payload.u64 =
- ((1ULL << VIRTIO_NET_F_MRG_RXBUF) |
- (1ULL << VHOST_F_LOG_ALL) |
- (1ULL << VIRTIO_NET_F_GUEST_ANNOUNCE) |
- (1ULL << VHOST_USER_F_PROTOCOL_FEATURES));
-
- vmsg->size = sizeof(vmsg->payload.u64);
+ struct WatchData *wd = ctx;
- DPRINT("Sending back to guest u64: 0x%016"PRIx64"\n", vmsg->payload.u64);
-
- /* Reply */
- return 1;
+ wd->cb(wd->dev, VU_WATCH_IN, wd->data);
}
-static int
-vubr_set_features_exec(VubrDev *dev, VhostUserMsg *vmsg)
+static void
+vubr_set_watch(VuDev *dev, int fd, int condition,
+ vu_watch_cb cb, void *data)
{
- DPRINT("u64: 0x%016"PRIx64"\n", vmsg->payload.u64);
-
- dev->features = vmsg->payload.u64;
- if ((dev->features & (1ULL << VIRTIO_F_VERSION_1)) ||
- (dev->features & (1ULL << VIRTIO_NET_F_MRG_RXBUF))) {
- dev->hdrlen = 12;
- } else {
- dev->hdrlen = 10;
- }
+ VubrDev *vubr = container_of(dev, VubrDev, vudev);
+ static WatchData watches[FD_SETSIZE];
+ struct WatchData *wd = &watches[fd];
- return 0;
-}
-
-static int
-vubr_set_owner_exec(VubrDev *dev, VhostUserMsg *vmsg)
-{
- return 0;
+ wd->cb = cb;
+ wd->data = data;
+ wd->dev = dev;
+ dispatcher_add(&vubr->dispatcher, fd, wd, watch_cb);
}
static void
-vubr_close_log(VubrDev *dev)
+vubr_remove_watch(VuDev *dev, int fd)
{
- if (dev->log_table) {
- if (munmap(dev->log_table, dev->log_size) != 0) {
- vubr_die("munmap()");
- }
+ VubrDev *vubr = container_of(dev, VubrDev, vudev);
- dev->log_table = 0;
- }
- if (dev->log_call_fd != -1) {
- close(dev->log_call_fd);
- dev->log_call_fd = -1;
- }
-}
-
-static int
-vubr_reset_device_exec(VubrDev *dev, VhostUserMsg *vmsg)
-{
- vubr_close_log(dev);
- dev->ready = 0;
- dev->features = 0;
- return 0;
+ dispatcher_remove(&vubr->dispatcher, fd);
}
static int
-vubr_set_mem_table_exec(VubrDev *dev, VhostUserMsg *vmsg)
+vubr_send_rarp_exec(VuDev *dev, VhostUserMsg *vmsg)
{
- int i;
- VhostUserMemory *memory = &vmsg->payload.memory;
- dev->nregions = memory->nregions;
-
- DPRINT("Nregions: %d\n", memory->nregions);
- for (i = 0; i < dev->nregions; i++) {
- void *mmap_addr;
- VhostUserMemoryRegion *msg_region = &memory->regions[i];
- VubrDevRegion *dev_region = &dev->regions[i];
-
- DPRINT("Region %d\n", i);
- DPRINT(" guest_phys_addr: 0x%016"PRIx64"\n",
- msg_region->guest_phys_addr);
- DPRINT(" memory_size: 0x%016"PRIx64"\n",
- msg_region->memory_size);
- DPRINT(" userspace_addr 0x%016"PRIx64"\n",
- msg_region->userspace_addr);
- DPRINT(" mmap_offset 0x%016"PRIx64"\n",
- msg_region->mmap_offset);
-
- dev_region->gpa = msg_region->guest_phys_addr;
- dev_region->size = msg_region->memory_size;
- dev_region->qva = msg_region->userspace_addr;
- dev_region->mmap_offset = msg_region->mmap_offset;
-
- /* We don't use offset argument of mmap() since the
- * mapped address has to be page aligned, and we use huge
- * pages. */
- mmap_addr = mmap(0, dev_region->size + dev_region->mmap_offset,
- PROT_READ | PROT_WRITE, MAP_SHARED,
- vmsg->fds[i], 0);
-
- if (mmap_addr == MAP_FAILED) {
- vubr_die("mmap");
- }
- dev_region->mmap_addr = (uint64_t)(uintptr_t)mmap_addr;
- DPRINT(" mmap_addr: 0x%016"PRIx64"\n", dev_region->mmap_addr);
-
- close(vmsg->fds[i]);
- }
-
+ DPRINT("Function %s() not implemented yet.\n", __func__);
return 0;
}
static int
-vubr_set_log_base_exec(VubrDev *dev, VhostUserMsg *vmsg)
+vubr_process_msg(VuDev *dev, VhostUserMsg *vmsg, int *do_reply)
{
- int fd;
- uint64_t log_mmap_size, log_mmap_offset;
- void *rc;
-
- assert(vmsg->fd_num == 1);
- fd = vmsg->fds[0];
-
- assert(vmsg->size == sizeof(vmsg->payload.log));
- log_mmap_offset = vmsg->payload.log.mmap_offset;
- log_mmap_size = vmsg->payload.log.mmap_size;
- DPRINT("Log mmap_offset: %"PRId64"\n", log_mmap_offset);
- DPRINT("Log mmap_size: %"PRId64"\n", log_mmap_size);
-
- rc = mmap(0, log_mmap_size, PROT_READ | PROT_WRITE, MAP_SHARED, fd,
- log_mmap_offset);
- if (rc == MAP_FAILED) {
- vubr_die("mmap");
+ switch (vmsg->request) {
+ case VHOST_USER_SEND_RARP:
+ *do_reply = vubr_send_rarp_exec(dev, vmsg);
+ return 1;
+ default:
+ /* let the library handle the rest */
+ return 0;
}
- dev->log_table = rc;
- dev->log_size = log_mmap_size;
- vmsg->size = sizeof(vmsg->payload.u64);
- /* Reply */
- return 1;
-}
-
-static int
-vubr_set_log_fd_exec(VubrDev *dev, VhostUserMsg *vmsg)
-{
- assert(vmsg->fd_num == 1);
- dev->log_call_fd = vmsg->fds[0];
- DPRINT("Got log_call_fd: %d\n", vmsg->fds[0]);
return 0;
}
-static int
-vubr_set_vring_num_exec(VubrDev *dev, VhostUserMsg *vmsg)
+static void
+vubr_set_features(VuDev *dev, uint64_t features)
{
- unsigned int index = vmsg->payload.state.index;
- unsigned int num = vmsg->payload.state.num;
-
- DPRINT("State.index: %d\n", index);
- DPRINT("State.num: %d\n", num);
- dev->vq[index].size = num;
- return 0;
-}
+ VubrDev *vubr = container_of(dev, VubrDev, vudev);
-static int
-vubr_set_vring_addr_exec(VubrDev *dev, VhostUserMsg *vmsg)
-{
- struct vhost_vring_addr *vra = &vmsg->payload.addr;
- unsigned int index = vra->index;
- VubrVirtq *vq = &dev->vq[index];
-
- DPRINT("vhost_vring_addr:\n");
- DPRINT(" index: %d\n", vra->index);
- DPRINT(" flags: %d\n", vra->flags);
- DPRINT(" desc_user_addr: 0x%016llx\n", vra->desc_user_addr);
- DPRINT(" used_user_addr: 0x%016llx\n", vra->used_user_addr);
- DPRINT(" avail_user_addr: 0x%016llx\n", vra->avail_user_addr);
- DPRINT(" log_guest_addr: 0x%016llx\n", vra->log_guest_addr);
-
- vq->desc = (struct vring_desc *)(uintptr_t)qva_to_va(dev, vra->desc_user_addr);
- vq->used = (struct vring_used *)(uintptr_t)qva_to_va(dev, vra->used_user_addr);
- vq->avail = (struct vring_avail *)(uintptr_t)qva_to_va(dev, vra->avail_user_addr);
- vq->log_guest_addr = vra->log_guest_addr;
-
- DPRINT("Setting virtq addresses:\n");
- DPRINT(" vring_desc at %p\n", vq->desc);
- DPRINT(" vring_used at %p\n", vq->used);
- DPRINT(" vring_avail at %p\n", vq->avail);
-
- vq->last_used_index = vq->used->idx;
-
- if (vq->last_avail_index != vq->used->idx) {
- DPRINT("Last avail index != used index: %d != %d, resuming",
- vq->last_avail_index, vq->used->idx);
- vq->last_avail_index = vq->used->idx;
+ if ((features & (1ULL << VIRTIO_F_VERSION_1)) ||
+ (features & (1ULL << VIRTIO_NET_F_MRG_RXBUF))) {
+ vubr->hdrlen = 12;
+ } else {
+ vubr->hdrlen = 10;
}
-
- return 0;
}
-static int
-vubr_set_vring_base_exec(VubrDev *dev, VhostUserMsg *vmsg)
-{
- unsigned int index = vmsg->payload.state.index;
- unsigned int num = vmsg->payload.state.num;
-
- DPRINT("State.index: %d\n", index);
- DPRINT("State.num: %d\n", num);
- dev->vq[index].last_avail_index = num;
-
- return 0;
-}
-
-static int
-vubr_get_vring_base_exec(VubrDev *dev, VhostUserMsg *vmsg)
-{
- unsigned int index = vmsg->payload.state.index;
-
- DPRINT("State.index: %d\n", index);
- vmsg->payload.state.num = dev->vq[index].last_avail_index;
- vmsg->size = sizeof(vmsg->payload.state);
- /* FIXME: this is a work-around for a bug in QEMU enabling
- * too early vrings. When protocol features are enabled,
- * we have to respect * VHOST_USER_SET_VRING_ENABLE request. */
- dev->ready = 0;
-
- if (dev->vq[index].call_fd != -1) {
- close(dev->vq[index].call_fd);
- dispatcher_remove(&dev->dispatcher, dev->vq[index].call_fd);
- dev->vq[index].call_fd = -1;
- }
- if (dev->vq[index].kick_fd != -1) {
- close(dev->vq[index].kick_fd);
- dispatcher_remove(&dev->dispatcher, dev->vq[index].kick_fd);
- dev->vq[index].kick_fd = -1;
- }
-
- /* Reply */
- return 1;
-}
-
-static int
-vubr_set_vring_kick_exec(VubrDev *dev, VhostUserMsg *vmsg)
+static uint64_t
+vubr_get_features(VuDev *dev)
{
- uint64_t u64_arg = vmsg->payload.u64;
- int index = u64_arg & VHOST_USER_VRING_IDX_MASK;
-
- DPRINT("u64: 0x%016"PRIx64"\n", vmsg->payload.u64);
-
- assert((u64_arg & VHOST_USER_VRING_NOFD_MASK) == 0);
- assert(vmsg->fd_num == 1);
-
- if (dev->vq[index].kick_fd != -1) {
- close(dev->vq[index].kick_fd);
- dispatcher_remove(&dev->dispatcher, dev->vq[index].kick_fd);
- }
- dev->vq[index].kick_fd = vmsg->fds[0];
- DPRINT("Got kick_fd: %d for vq: %d\n", vmsg->fds[0], index);
-
- if (index % 2 == 1) {
- /* TX queue. */
- dispatcher_add(&dev->dispatcher, dev->vq[index].kick_fd,
- dev, vubr_kick_cb);
-
- DPRINT("Waiting for kicks on fd: %d for vq: %d\n",
- dev->vq[index].kick_fd, index);
- }
- /* We temporarily use this hack to determine that both TX and RX
- * queues are set up and ready for processing.
- * FIXME: we need to rely in VHOST_USER_SET_VRING_ENABLE and
- * actual kicks. */
- if (dev->vq[0].kick_fd != -1 &&
- dev->vq[1].kick_fd != -1) {
- dev->ready = 1;
- DPRINT("vhost-user-bridge is ready for processing queues.\n");
- }
- return 0;
-
+ return 1ULL << VIRTIO_NET_F_GUEST_ANNOUNCE |
+ 1ULL << VIRTIO_NET_F_MRG_RXBUF;
}
-static int
-vubr_set_vring_call_exec(VubrDev *dev, VhostUserMsg *vmsg)
+static void
+vubr_queue_set_started(VuDev *dev, int qidx, bool started)
{
- uint64_t u64_arg = vmsg->payload.u64;
- int index = u64_arg & VHOST_USER_VRING_IDX_MASK;
+ VuVirtq *vq = vu_get_queue(dev, qidx);
- DPRINT("u64: 0x%016"PRIx64"\n", vmsg->payload.u64);
- assert((u64_arg & VHOST_USER_VRING_NOFD_MASK) == 0);
- assert(vmsg->fd_num == 1);
-
- if (dev->vq[index].call_fd != -1) {
- close(dev->vq[index].call_fd);
- dispatcher_remove(&dev->dispatcher, dev->vq[index].call_fd);
+ if (qidx % 2 == 1) {
+ vu_set_queue_handler(dev, vq, started ? vubr_handle_tx : NULL);
}
- dev->vq[index].call_fd = vmsg->fds[0];
- DPRINT("Got call_fd: %d for vq: %d\n", vmsg->fds[0], index);
-
- return 0;
-}
-
-static int
-vubr_set_vring_err_exec(VubrDev *dev, VhostUserMsg *vmsg)
-{
- DPRINT("u64: 0x%016"PRIx64"\n", vmsg->payload.u64);
- return 0;
-}
-
-static int
-vubr_get_protocol_features_exec(VubrDev *dev, VhostUserMsg *vmsg)
-{
- vmsg->payload.u64 = 1ULL << VHOST_USER_PROTOCOL_F_LOG_SHMFD;
- DPRINT("u64: 0x%016"PRIx64"\n", vmsg->payload.u64);
- vmsg->size = sizeof(vmsg->payload.u64);
-
- /* Reply */
- return 1;
}
-static int
-vubr_set_protocol_features_exec(VubrDev *dev, VhostUserMsg *vmsg)
-{
- /* FIXME: unimplented */
- DPRINT("u64: 0x%016"PRIx64"\n", vmsg->payload.u64);
- return 0;
-}
-
-static int
-vubr_get_queue_num_exec(VubrDev *dev, VhostUserMsg *vmsg)
-{
- DPRINT("Function %s() not implemented yet.\n", __func__);
- return 0;
-}
-
-static int
-vubr_set_vring_enable_exec(VubrDev *dev, VhostUserMsg *vmsg)
+static void
+vubr_panic(VuDev *dev, const char *msg)
{
- unsigned int index = vmsg->payload.state.index;
- unsigned int enable = vmsg->payload.state.num;
+ VubrDev *vubr = container_of(dev, VubrDev, vudev);
- DPRINT("State.index: %d\n", index);
- DPRINT("State.enable: %d\n", enable);
- dev->vq[index].enable = enable;
- return 0;
-}
+ fprintf(stderr, "PANIC: %s\n", msg);
-static int
-vubr_send_rarp_exec(VubrDev *dev, VhostUserMsg *vmsg)
-{
- DPRINT("Function %s() not implemented yet.\n", __func__);
- return 0;
+ dispatcher_remove(&vubr->dispatcher, dev->sock);
+ vubr->quit = 1;
}
-static int
-vubr_execute_request(VubrDev *dev, VhostUserMsg *vmsg)
-{
- /* Print out generic part of the request. */
- DPRINT(
- "================== Vhost user message from QEMU ==================\n");
- DPRINT("Request: %s (%d)\n", vubr_request_str[vmsg->request],
- vmsg->request);
- DPRINT("Flags: 0x%x\n", vmsg->flags);
- DPRINT("Size: %d\n", vmsg->size);
-
- if (vmsg->fd_num) {
- int i;
- DPRINT("Fds:");
- for (i = 0; i < vmsg->fd_num; i++) {
- DPRINT(" %d", vmsg->fds[i]);
- }
- DPRINT("\n");
- }
-
- switch (vmsg->request) {
- case VHOST_USER_NONE:
- return vubr_none_exec(dev, vmsg);
- case VHOST_USER_GET_FEATURES:
- return vubr_get_features_exec(dev, vmsg);
- case VHOST_USER_SET_FEATURES:
- return vubr_set_features_exec(dev, vmsg);
- case VHOST_USER_SET_OWNER:
- return vubr_set_owner_exec(dev, vmsg);
- case VHOST_USER_RESET_OWNER:
- return vubr_reset_device_exec(dev, vmsg);
- case VHOST_USER_SET_MEM_TABLE:
- return vubr_set_mem_table_exec(dev, vmsg);
- case VHOST_USER_SET_LOG_BASE:
- return vubr_set_log_base_exec(dev, vmsg);
- case VHOST_USER_SET_LOG_FD:
- return vubr_set_log_fd_exec(dev, vmsg);
- case VHOST_USER_SET_VRING_NUM:
- return vubr_set_vring_num_exec(dev, vmsg);
- case VHOST_USER_SET_VRING_ADDR:
- return vubr_set_vring_addr_exec(dev, vmsg);
- case VHOST_USER_SET_VRING_BASE:
- return vubr_set_vring_base_exec(dev, vmsg);
- case VHOST_USER_GET_VRING_BASE:
- return vubr_get_vring_base_exec(dev, vmsg);
- case VHOST_USER_SET_VRING_KICK:
- return vubr_set_vring_kick_exec(dev, vmsg);
- case VHOST_USER_SET_VRING_CALL:
- return vubr_set_vring_call_exec(dev, vmsg);
- case VHOST_USER_SET_VRING_ERR:
- return vubr_set_vring_err_exec(dev, vmsg);
- case VHOST_USER_GET_PROTOCOL_FEATURES:
- return vubr_get_protocol_features_exec(dev, vmsg);
- case VHOST_USER_SET_PROTOCOL_FEATURES:
- return vubr_set_protocol_features_exec(dev, vmsg);
- case VHOST_USER_GET_QUEUE_NUM:
- return vubr_get_queue_num_exec(dev, vmsg);
- case VHOST_USER_SET_VRING_ENABLE:
- return vubr_set_vring_enable_exec(dev, vmsg);
- case VHOST_USER_SEND_RARP:
- return vubr_send_rarp_exec(dev, vmsg);
-
- case VHOST_USER_MAX:
- assert(vmsg->request != VHOST_USER_MAX);
- }
- return 0;
-}
-
-static void
-vubr_receive_cb(int sock, void *ctx)
-{
- VubrDev *dev = (VubrDev *) ctx;
- VhostUserMsg vmsg;
- int reply_requested;
-
- vubr_message_read(sock, &vmsg);
- reply_requested = vubr_execute_request(dev, &vmsg);
- if (reply_requested) {
- /* Set the version in the flags when sending the reply */
- vmsg.flags &= ~VHOST_USER_VERSION_MASK;
- vmsg.flags |= VHOST_USER_VERSION;
- vmsg.flags |= VHOST_USER_REPLY_MASK;
- vubr_message_write(sock, &vmsg);
- }
-}
+static const VuDevIface vuiface = {
+ .get_features = vubr_get_features,
+ .set_features = vubr_set_features,
+ .process_msg = vubr_process_msg,
+ .queue_set_started = vubr_queue_set_started,
+};
static void
vubr_accept_cb(int sock, void *ctx)
@@ -1204,36 +479,26 @@ vubr_accept_cb(int sock, void *ctx)
vubr_die("accept()");
}
DPRINT("Got connection from remote peer on sock %d\n", conn_fd);
+
+ vu_init(&dev->vudev,
+ conn_fd,
+ vubr_panic,
+ vubr_set_watch,
+ vubr_remove_watch,
+ &vuiface);
+
dispatcher_add(&dev->dispatcher, conn_fd, ctx, vubr_receive_cb);
+ dispatcher_remove(&dev->dispatcher, sock);
}
static VubrDev *
vubr_new(const char *path, bool client)
{
VubrDev *dev = (VubrDev *) calloc(1, sizeof(VubrDev));
- dev->nregions = 0;
- int i;
struct sockaddr_un un;
CallbackFunc cb;
size_t len;
- for (i = 0; i < MAX_NR_VIRTQUEUE; i++) {
- dev->vq[i] = (VubrVirtq) {
- .call_fd = -1, .kick_fd = -1,
- .size = 0,
- .last_avail_index = 0, .last_used_index = 0,
- .desc = 0, .avail = 0, .used = 0,
- .enable = 0,
- };
- }
-
- /* Init log */
- dev->log_call_fd = -1;
- dev->log_size = 0;
- dev->log_table = 0;
- dev->ready = 0;
- dev->features = 0;
-
/* Get a UNIX socket. */
dev->sock = socket(AF_UNIX, SOCK_STREAM, 0);
if (dev->sock == -1) {
@@ -1261,10 +526,17 @@ vubr_new(const char *path, bool client)
if (connect(dev->sock, (struct sockaddr *)&un, len) == -1) {
vubr_die("connect");
}
+ vu_init(&dev->vudev,
+ dev->sock,
+ vubr_panic,
+ vubr_set_watch,
+ vubr_remove_watch,
+ &vuiface);
cb = vubr_receive_cb;
}
dispatcher_init(&dev->dispatcher);
+
dispatcher_add(&dev->dispatcher, dev->sock, (void *)dev, cb);
return dev;
@@ -1345,7 +617,7 @@ vubr_backend_udp_setup(VubrDev *dev,
static void
vubr_run(VubrDev *dev)
{
- while (1) {
+ while (!dev->quit) {
/* timeout 200ms */
dispatcher_wait(&dev->dispatcher, 200000);
/* Here one can try polling strategy. */
@@ -1421,6 +693,9 @@ main(int argc, char *argv[])
vubr_backend_udp_setup(dev, lhost, lport, rhost, rport);
vubr_run(dev);
+
+ vu_deinit(&dev->vudev);
+
return 0;
out:
diff --git a/translate-all.c b/translate-all.c
index 3dd9214904..20262938bb 100644
--- a/translate-all.c
+++ b/translate-all.c
@@ -753,7 +753,7 @@ static inline void *alloc_code_gen_buffer(void)
size_t size2;
void *buf2 = mmap(NULL, size + qemu_real_host_page_size,
PROT_NONE, flags, -1, 0);
- switch (buf2 != MAP_FAILED) {
+ switch ((int)(buf2 != MAP_FAILED)) {
case 1:
if (!cross_256mb(buf2, size)) {
/* Success! Use the new buffer. */
diff --git a/util/qemu-coroutine.c b/util/qemu-coroutine.c
index 737bffa984..a5d2f6c0c3 100644
--- a/util/qemu-coroutine.c
+++ b/util/qemu-coroutine.c
@@ -131,6 +131,13 @@ void qemu_coroutine_enter(Coroutine *co)
}
}
+void qemu_coroutine_enter_if_inactive(Coroutine *co)
+{
+ if (!qemu_coroutine_entered(co)) {
+ qemu_coroutine_enter(co);
+ }
+}
+
void coroutine_fn qemu_coroutine_yield(void)
{
Coroutine *self = qemu_coroutine_self();
diff --git a/vl.c b/vl.c
index d77dd862f9..a23de18fa9 100644
--- a/vl.c
+++ b/vl.c
@@ -2859,7 +2859,8 @@ static bool object_create_initial(const char *type)
g_str_equal(type, "filter-mirror") ||
g_str_equal(type, "filter-redirector") ||
g_str_equal(type, "colo-compare") ||
- g_str_equal(type, "filter-rewriter")) {
+ g_str_equal(type, "filter-rewriter") ||
+ g_str_equal(type, "filter-replay")) {
return false;
}