aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorPeter Maydell <peter.maydell@linaro.org>2014-06-20 18:01:24 +0100
committerPeter Maydell <peter.maydell@linaro.org>2014-06-20 18:01:24 +0100
commit0a99aae5fab5ed260aab96049c274b0334eb4085 (patch)
tree7db67e570b622a37a2139da871b79b0386942e4b
parent53001c148340127c2dca1f90329804cd0ac0e236 (diff)
parent705456c0d7f24fbd76733c891525b8eeea332e8b (diff)
Merge remote-tracking branch 'remotes/mst/tags/for_upstream' into staging
pc,pci,virtio,hotplug fixes, enhancements numa work by Hu Tao and others memory hotplug by Igor vhost-user by Nikolay, Antonios and others guest virtio announcements by Jason qtest fixes by Sergey qdev hotplug fixes by Paolo misc other fixes mostly by myself Signed-off-by: Michael S. Tsirkin <mst@redhat.com> * remotes/mst/tags/for_upstream: (109 commits) numa: use RAM_ADDR_FMT with ram_addr_t qapi/string-output-visitor: fix bugs tests: simplify code qapi: fix input visitor bugs acpi: rephrase comment qmp: add ACPI_DEVICE_OST event handling qmp: add query-acpi-ospm-status command acpi: implement ospm_status() method for PIIX4/ICH9_LPC devices acpi: introduce TYPE_ACPI_DEVICE_IF interface qmp: add query-memory-devices command numa: handle mmaped memory allocation failure correctly pc: acpi: do not hardcode preprocessor qmp: clean out whitespace qdev: recursively unrealize devices when unrealizing bus qdev: reorganize error reporting in bus_set_realized qapi: fix build on glib < 2.28 qapi: make string output visitor parse int list qapi: make string input visitor parse int list tests: fix memory leak in test of string input visitor hmp: add info memdev ... Conflicts: include/hw/i386/pc.h [PMM: fixed minor conflict in pc.h] Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
-rw-r--r--Makefile.target2
-rw-r--r--backends/Makefile.objs3
-rw-r--r--backends/hostmem-file.c134
-rw-r--r--backends/hostmem-ram.c53
-rw-r--r--backends/hostmem.c375
-rwxr-xr-xconfigure35
-rw-r--r--cpus.c14
-rw-r--r--default-configs/i386-softmmu.mak1
-rw-r--r--default-configs/x86_64-softmmu.mak1
-rw-r--r--docs/qmp/qmp-events.txt10
-rw-r--r--docs/specs/acpi_mem_hotplug.txt44
-rw-r--r--docs/specs/vhost-user.txt266
-rw-r--r--exec.c211
-rw-r--r--hmp-commands.hx4
-rw-r--r--hmp.c36
-rw-r--r--hmp.h1
-rw-r--r--hw/9pfs/virtio-9p.c4
-rw-r--r--hw/Makefile.objs1
-rw-r--r--hw/acpi/Makefile.objs2
-rw-r--r--hw/acpi/acpi_interface.c15
-rw-r--r--hw/acpi/ich9.c69
-rw-r--r--hw/acpi/memory_hotplug.c245
-rw-r--r--hw/acpi/piix4.c85
-rw-r--r--hw/core/qdev.c57
-rw-r--r--hw/i386/Makefile.objs5
-rw-r--r--hw/i386/acpi-build.c75
-rw-r--r--hw/i386/acpi-dsdt.dsl7
-rw-r--r--hw/i386/acpi-dsdt.hex.generated31
-rw-r--r--hw/i386/pc.c252
-rw-r--r--hw/i386/pc_piix.c59
-rw-r--r--hw/i386/pc_q35.c29
-rw-r--r--hw/i386/q35-acpi-dsdt.dsl7
-rw-r--r--hw/i386/q35-acpi-dsdt.hex.generated31
-rw-r--r--hw/i386/ssdt-mem.dsl77
-rw-r--r--hw/i386/ssdt-mem.hex.generated213
-rw-r--r--hw/i386/ssdt-misc.dsl164
-rw-r--r--hw/i386/ssdt-misc.hex.generated811
-rw-r--r--hw/i386/ssdt-pcihp.hex.generated6
-rw-r--r--hw/i386/ssdt-proc.hex.generated6
-rw-r--r--hw/isa/lpc_ich9.c38
-rw-r--r--hw/mem/Makefile.objs1
-rw-r--r--hw/mem/pc-dimm.c281
-rw-r--r--hw/mips/mips_malta.c2
-rw-r--r--hw/net/vhost_net.c228
-rw-r--r--hw/net/virtio-net.c98
-rw-r--r--hw/ppc/spapr.c11
-rw-r--r--hw/scsi/vhost-scsi.c45
-rw-r--r--hw/virtio/Makefile.objs2
-rw-r--r--hw/virtio/vhost-backend.c71
-rw-r--r--hw/virtio/vhost-user.c342
-rw-r--r--hw/virtio/vhost.c90
-rw-r--r--hw/virtio/virtio.c16
-rw-r--r--include/exec/cpu-all.h8
-rw-r--r--include/exec/cpu-common.h2
-rw-r--r--include/exec/memory.h41
-rw-r--r--include/exec/ram_addr.h4
-rw-r--r--include/hw/acpi/acpi.h5
-rw-r--r--include/hw/acpi/acpi_dev_interface.h43
-rw-r--r--include/hw/acpi/cpu_hotplug.h2
-rw-r--r--include/hw/acpi/cpu_hotplug_defs.h32
-rw-r--r--include/hw/acpi/ich9.h7
-rw-r--r--include/hw/acpi/memory_hotplug.h38
-rw-r--r--include/hw/acpi/pc-hotplug.h56
-rw-r--r--include/hw/boards.h16
-rw-r--r--include/hw/i386/ich9.h2
-rw-r--r--include/hw/i386/pc.h79
-rw-r--r--include/hw/mem/pc-dimm.h81
-rw-r--r--include/hw/virtio/vhost-backend.h38
-rw-r--r--include/hw/virtio/vhost.h13
-rw-r--r--include/hw/virtio/virtio-net.h17
-rw-r--r--include/migration/vmstate.h10
-rw-r--r--include/monitor/monitor.h1
-rw-r--r--include/net/vhost-user.h17
-rw-r--r--include/net/vhost_net.h11
-rw-r--r--include/qemu/osdep.h16
-rw-r--r--include/qemu/range.h72
-rw-r--r--include/qom/object.h28
-rw-r--r--include/sysemu/char.h44
-rw-r--r--include/sysemu/cpus.h1
-rw-r--r--include/sysemu/hostmem.h68
-rw-r--r--include/sysemu/kvm.h11
-rw-r--r--include/sysemu/os-win32.h2
-rw-r--r--include/sysemu/sysemu.h18
-rw-r--r--kvm-all.c4
-rw-r--r--kvm-stub.c1
-rw-r--r--memory.c39
-rw-r--r--monitor.c10
-rw-r--r--net/Makefile.objs2
-rw-r--r--net/clients.h3
-rw-r--r--net/hub.c1
-rw-r--r--net/net.c7
-rw-r--r--net/tap.c18
-rw-r--r--net/vhost-user.c258
-rw-r--r--numa.c369
-rw-r--r--qapi-schema.json208
-rw-r--r--qapi/string-input-visitor.c201
-rw-r--r--qapi/string-output-visitor.c235
-rw-r--r--qemu-char.c277
-rw-r--r--qemu-options.hx45
-rw-r--r--qmp-commands.hx89
-rw-r--r--qmp.c44
-rw-r--r--qom/object.c35
-rw-r--r--savevm.c3
-rw-r--r--stubs/Makefile.objs9
-rw-r--r--stubs/bdrv-commit-all.c7
-rw-r--r--stubs/chr-msmouse.c7
-rw-r--r--stubs/get-next-serial.c3
-rw-r--r--stubs/is-daemonized.c9
-rw-r--r--stubs/machine-init-done.c6
-rw-r--r--stubs/monitor-init.c6
-rw-r--r--stubs/notify-event.c6
-rw-r--r--stubs/qmp_pc_dimm_device_list.c7
-rw-r--r--stubs/vc-init.c7
-rw-r--r--tests/Makefile4
-rw-r--r--tests/acpi-test-data/pc/DSDTbin4480 -> 4499 bytes
-rw-r--r--tests/acpi-test-data/pc/SSDTbin2269 -> 3065 bytes
-rw-r--r--tests/acpi-test-data/q35/DSDTbin7378 -> 7397 bytes
-rw-r--r--tests/acpi-test-data/q35/SSDTbin550 -> 1346 bytes
-rw-r--r--tests/test-string-input-visitor.c37
-rw-r--r--tests/test-string-output-visitor.c38
-rw-r--r--tests/vhost-user-test.c312
-rw-r--r--trace-events17
-rw-r--r--translate-all.c7
-rw-r--r--util/oslib-posix.c73
-rw-r--r--util/oslib-win32.c19
-rw-r--r--vl.c272
126 files changed, 7416 insertions, 728 deletions
diff --git a/Makefile.target b/Makefile.target
index 06c1e59bc4..fc5827cd72 100644
--- a/Makefile.target
+++ b/Makefile.target
@@ -119,7 +119,7 @@ endif #CONFIG_BSD_USER
#########################################################
# System emulator target
ifdef CONFIG_SOFTMMU
-obj-y += arch_init.o cpus.o monitor.o gdbstub.o balloon.o ioport.o
+obj-y += arch_init.o cpus.o monitor.o gdbstub.o balloon.o ioport.o numa.o
obj-y += qtest.o
obj-y += hw/
obj-$(CONFIG_FDT) += device_tree.o
diff --git a/backends/Makefile.objs b/backends/Makefile.objs
index 591ddcf6f3..506a46c33b 100644
--- a/backends/Makefile.objs
+++ b/backends/Makefile.objs
@@ -6,3 +6,6 @@ common-obj-$(CONFIG_BRLAPI) += baum.o
baum.o-cflags := $(SDL_CFLAGS)
common-obj-$(CONFIG_TPM) += tpm.o
+
+common-obj-y += hostmem.o hostmem-ram.o
+common-obj-$(CONFIG_LINUX) += hostmem-file.o
diff --git a/backends/hostmem-file.c b/backends/hostmem-file.c
new file mode 100644
index 0000000000..51799943f1
--- /dev/null
+++ b/backends/hostmem-file.c
@@ -0,0 +1,134 @@
+/*
+ * QEMU Host Memory Backend for hugetlbfs
+ *
+ * Copyright (C) 2013-2014 Red Hat Inc
+ *
+ * Authors:
+ * Paolo Bonzini <pbonzini@redhat.com>
+ *
+ * This work is licensed under the terms of the GNU GPL, version 2 or later.
+ * See the COPYING file in the top-level directory.
+ */
+#include "qemu-common.h"
+#include "sysemu/hostmem.h"
+#include "sysemu/sysemu.h"
+#include "qom/object_interfaces.h"
+
+/* hostmem-file.c */
+/**
+ * @TYPE_MEMORY_BACKEND_FILE:
+ * name of backend that uses mmap on a file descriptor
+ */
+#define TYPE_MEMORY_BACKEND_FILE "memory-backend-file"
+
+#define MEMORY_BACKEND_FILE(obj) \
+ OBJECT_CHECK(HostMemoryBackendFile, (obj), TYPE_MEMORY_BACKEND_FILE)
+
+typedef struct HostMemoryBackendFile HostMemoryBackendFile;
+
+struct HostMemoryBackendFile {
+ HostMemoryBackend parent_obj;
+
+ bool share;
+ char *mem_path;
+};
+
+static void
+file_backend_memory_alloc(HostMemoryBackend *backend, Error **errp)
+{
+ HostMemoryBackendFile *fb = MEMORY_BACKEND_FILE(backend);
+
+ if (!backend->size) {
+ error_setg(errp, "can't create backend with size 0");
+ return;
+ }
+ if (!fb->mem_path) {
+ error_setg(errp, "mem_path property not set");
+ return;
+ }
+#ifndef CONFIG_LINUX
+ error_setg(errp, "-mem-path not supported on this host");
+#else
+ if (!memory_region_size(&backend->mr)) {
+ backend->force_prealloc = mem_prealloc;
+ memory_region_init_ram_from_file(&backend->mr, OBJECT(backend),
+ object_get_canonical_path(OBJECT(backend)),
+ backend->size, fb->share,
+ fb->mem_path, errp);
+ }
+#endif
+}
+
+static void
+file_backend_class_init(ObjectClass *oc, void *data)
+{
+ HostMemoryBackendClass *bc = MEMORY_BACKEND_CLASS(oc);
+
+ bc->alloc = file_backend_memory_alloc;
+}
+
+static char *get_mem_path(Object *o, Error **errp)
+{
+ HostMemoryBackendFile *fb = MEMORY_BACKEND_FILE(o);
+
+ return g_strdup(fb->mem_path);
+}
+
+static void set_mem_path(Object *o, const char *str, Error **errp)
+{
+ HostMemoryBackend *backend = MEMORY_BACKEND(o);
+ HostMemoryBackendFile *fb = MEMORY_BACKEND_FILE(o);
+
+ if (memory_region_size(&backend->mr)) {
+ error_setg(errp, "cannot change property value");
+ return;
+ }
+ if (fb->mem_path) {
+ g_free(fb->mem_path);
+ }
+ fb->mem_path = g_strdup(str);
+}
+
+static bool file_memory_backend_get_share(Object *o, Error **errp)
+{
+ HostMemoryBackendFile *fb = MEMORY_BACKEND_FILE(o);
+
+ return fb->share;
+}
+
+static void file_memory_backend_set_share(Object *o, bool value, Error **errp)
+{
+ HostMemoryBackend *backend = MEMORY_BACKEND(o);
+ HostMemoryBackendFile *fb = MEMORY_BACKEND_FILE(o);
+
+ if (memory_region_size(&backend->mr)) {
+ error_setg(errp, "cannot change property value");
+ return;
+ }
+ fb->share = value;
+}
+
+static void
+file_backend_instance_init(Object *o)
+{
+ object_property_add_bool(o, "share",
+ file_memory_backend_get_share,
+ file_memory_backend_set_share, NULL);
+ object_property_add_str(o, "mem-path", get_mem_path,
+ set_mem_path, NULL);
+}
+
+static const TypeInfo file_backend_info = {
+ .name = TYPE_MEMORY_BACKEND_FILE,
+ .parent = TYPE_MEMORY_BACKEND,
+ .class_init = file_backend_class_init,
+ .instance_init = file_backend_instance_init,
+ .instance_size = sizeof(HostMemoryBackendFile),
+};
+
+static void register_types(void)
+{
+ type_register_static(&file_backend_info);
+}
+
+type_init(register_types);
diff --git a/backends/hostmem-ram.c b/backends/hostmem-ram.c
new file mode 100644
index 0000000000..d9a8290dc9
--- /dev/null
+++ b/backends/hostmem-ram.c
@@ -0,0 +1,53 @@
+/*
+ * QEMU Host Memory Backend
+ *
+ * Copyright (C) 2013-2014 Red Hat Inc
+ *
+ * Authors:
+ * Igor Mammedov <imammedo@redhat.com>
+ *
+ * This work is licensed under the terms of the GNU GPL, version 2 or later.
+ * See the COPYING file in the top-level directory.
+ */
+#include "sysemu/hostmem.h"
+#include "qom/object_interfaces.h"
+
+#define TYPE_MEMORY_BACKEND_RAM "memory-backend-ram"
+
+
+static void
+ram_backend_memory_alloc(HostMemoryBackend *backend, Error **errp)
+{
+ char *path;
+
+ if (!backend->size) {
+ error_setg(errp, "can't create backend with size 0");
+ return;
+ }
+
+ path = object_get_canonical_path_component(OBJECT(backend));
+ memory_region_init_ram(&backend->mr, OBJECT(backend), path,
+ backend->size);
+ g_free(path);
+}
+
+static void
+ram_backend_class_init(ObjectClass *oc, void *data)
+{
+ HostMemoryBackendClass *bc = MEMORY_BACKEND_CLASS(oc);
+
+ bc->alloc = ram_backend_memory_alloc;
+}
+
+static const TypeInfo ram_backend_info = {
+ .name = TYPE_MEMORY_BACKEND_RAM,
+ .parent = TYPE_MEMORY_BACKEND,
+ .class_init = ram_backend_class_init,
+};
+
+static void register_types(void)
+{
+ type_register_static(&ram_backend_info);
+}
+
+type_init(register_types);
diff --git a/backends/hostmem.c b/backends/hostmem.c
new file mode 100644
index 0000000000..ca10c51b51
--- /dev/null
+++ b/backends/hostmem.c
@@ -0,0 +1,375 @@
+/*
+ * QEMU Host Memory Backend
+ *
+ * Copyright (C) 2013-2014 Red Hat Inc
+ *
+ * Authors:
+ * Igor Mammedov <imammedo@redhat.com>
+ *
+ * This work is licensed under the terms of the GNU GPL, version 2 or later.
+ * See the COPYING file in the top-level directory.
+ */
+#include "sysemu/hostmem.h"
+#include "qapi/visitor.h"
+#include "qapi-types.h"
+#include "qapi-visit.h"
+#include "qapi/qmp/qerror.h"
+#include "qemu/config-file.h"
+#include "qom/object_interfaces.h"
+
+#ifdef CONFIG_NUMA
+#include <numaif.h>
+QEMU_BUILD_BUG_ON(HOST_MEM_POLICY_DEFAULT != MPOL_DEFAULT);
+QEMU_BUILD_BUG_ON(HOST_MEM_POLICY_PREFERRED != MPOL_PREFERRED);
+QEMU_BUILD_BUG_ON(HOST_MEM_POLICY_BIND != MPOL_BIND);
+QEMU_BUILD_BUG_ON(HOST_MEM_POLICY_INTERLEAVE != MPOL_INTERLEAVE);
+#endif
+
+static void
+host_memory_backend_get_size(Object *obj, Visitor *v, void *opaque,
+ const char *name, Error **errp)
+{
+ HostMemoryBackend *backend = MEMORY_BACKEND(obj);
+ uint64_t value = backend->size;
+
+ visit_type_size(v, &value, name, errp);
+}
+
+static void
+host_memory_backend_set_size(Object *obj, Visitor *v, void *opaque,
+ const char *name, Error **errp)
+{
+ HostMemoryBackend *backend = MEMORY_BACKEND(obj);
+ Error *local_err = NULL;
+ uint64_t value;
+
+ if (memory_region_size(&backend->mr)) {
+ error_setg(&local_err, "cannot change property value");
+ goto out;
+ }
+
+ visit_type_size(v, &value, name, &local_err);
+ if (local_err) {
+ goto out;
+ }
+ if (!value) {
+ error_setg(&local_err, "Property '%s.%s' doesn't take value '%"
+ PRIu64 "'", object_get_typename(obj), name, value);
+ goto out;
+ }
+ backend->size = value;
+out:
+ error_propagate(errp, local_err);
+}
+
+static void
+host_memory_backend_get_host_nodes(Object *obj, Visitor *v, void *opaque,
+ const char *name, Error **errp)
+{
+ HostMemoryBackend *backend = MEMORY_BACKEND(obj);
+ uint16List *host_nodes = NULL;
+ uint16List **node = &host_nodes;
+ unsigned long value;
+
+ value = find_first_bit(backend->host_nodes, MAX_NODES);
+ if (value == MAX_NODES) {
+ return;
+ }
+
+ *node = g_malloc0(sizeof(**node));
+ (*node)->value = value;
+ node = &(*node)->next;
+
+ do {
+ value = find_next_bit(backend->host_nodes, MAX_NODES, value + 1);
+ if (value == MAX_NODES) {
+ break;
+ }
+
+ *node = g_malloc0(sizeof(**node));
+ (*node)->value = value;
+ node = &(*node)->next;
+ } while (true);
+
+ visit_type_uint16List(v, &host_nodes, name, errp);
+}
+
+static void
+host_memory_backend_set_host_nodes(Object *obj, Visitor *v, void *opaque,
+ const char *name, Error **errp)
+{
+#ifdef CONFIG_NUMA
+ HostMemoryBackend *backend = MEMORY_BACKEND(obj);
+ uint16List *l = NULL;
+
+ visit_type_uint16List(v, &l, name, errp);
+
+ while (l) {
+ bitmap_set(backend->host_nodes, l->value, 1);
+ l = l->next;
+ }
+#else
+ error_setg(errp, "NUMA node binding are not supported by this QEMU");
+#endif
+}
+
+static void
+host_memory_backend_get_policy(Object *obj, Visitor *v, void *opaque,
+ const char *name, Error **errp)
+{
+ HostMemoryBackend *backend = MEMORY_BACKEND(obj);
+ int policy = backend->policy;
+
+ visit_type_enum(v, &policy, HostMemPolicy_lookup, NULL, name, errp);
+}
+
+static void
+host_memory_backend_set_policy(Object *obj, Visitor *v, void *opaque,
+ const char *name, Error **errp)
+{
+ HostMemoryBackend *backend = MEMORY_BACKEND(obj);
+ int policy;
+
+ visit_type_enum(v, &policy, HostMemPolicy_lookup, NULL, name, errp);
+ backend->policy = policy;
+
+#ifndef CONFIG_NUMA
+ if (policy != HOST_MEM_POLICY_DEFAULT) {
+ error_setg(errp, "NUMA policies are not supported by this QEMU");
+ }
+#endif
+}
+
+static bool host_memory_backend_get_merge(Object *obj, Error **errp)
+{
+ HostMemoryBackend *backend = MEMORY_BACKEND(obj);
+
+ return backend->merge;
+}
+
+static void host_memory_backend_set_merge(Object *obj, bool value, Error **errp)
+{
+ HostMemoryBackend *backend = MEMORY_BACKEND(obj);
+
+ if (!memory_region_size(&backend->mr)) {
+ backend->merge = value;
+ return;
+ }
+
+ if (value != backend->merge) {
+ void *ptr = memory_region_get_ram_ptr(&backend->mr);
+ uint64_t sz = memory_region_size(&backend->mr);
+
+ qemu_madvise(ptr, sz,
+ value ? QEMU_MADV_MERGEABLE : QEMU_MADV_UNMERGEABLE);
+ backend->merge = value;
+ }
+}
+
+static bool host_memory_backend_get_dump(Object *obj, Error **errp)
+{
+ HostMemoryBackend *backend = MEMORY_BACKEND(obj);
+
+ return backend->dump;
+}
+
+static void host_memory_backend_set_dump(Object *obj, bool value, Error **errp)
+{
+ HostMemoryBackend *backend = MEMORY_BACKEND(obj);
+
+ if (!memory_region_size(&backend->mr)) {
+ backend->dump = value;
+ return;
+ }
+
+ if (value != backend->dump) {
+ void *ptr = memory_region_get_ram_ptr(&backend->mr);
+ uint64_t sz = memory_region_size(&backend->mr);
+
+ qemu_madvise(ptr, sz,
+ value ? QEMU_MADV_DODUMP : QEMU_MADV_DONTDUMP);
+ backend->dump = value;
+ }
+}
+
+static bool host_memory_backend_get_prealloc(Object *obj, Error **errp)
+{
+ HostMemoryBackend *backend = MEMORY_BACKEND(obj);
+
+ return backend->prealloc || backend->force_prealloc;
+}
+
+static void host_memory_backend_set_prealloc(Object *obj, bool value,
+ Error **errp)
+{
+ HostMemoryBackend *backend = MEMORY_BACKEND(obj);
+
+ if (backend->force_prealloc) {
+ if (value) {
+ error_setg(errp,
+ "remove -mem-prealloc to use the prealloc property");
+ return;
+ }
+ }
+
+ if (!memory_region_size(&backend->mr)) {
+ backend->prealloc = value;
+ return;
+ }
+
+ if (value && !backend->prealloc) {
+ int fd = memory_region_get_fd(&backend->mr);
+ void *ptr = memory_region_get_ram_ptr(&backend->mr);
+ uint64_t sz = memory_region_size(&backend->mr);
+
+ os_mem_prealloc(fd, ptr, sz);
+ backend->prealloc = true;
+ }
+}
+
+static void host_memory_backend_init(Object *obj)
+{
+ HostMemoryBackend *backend = MEMORY_BACKEND(obj);
+
+ backend->merge = qemu_opt_get_bool(qemu_get_machine_opts(),
+ "mem-merge", true);
+ backend->dump = qemu_opt_get_bool(qemu_get_machine_opts(),
+ "dump-guest-core", true);
+ backend->prealloc = mem_prealloc;
+
+ object_property_add_bool(obj, "merge",
+ host_memory_backend_get_merge,
+ host_memory_backend_set_merge, NULL);
+ object_property_add_bool(obj, "dump",
+ host_memory_backend_get_dump,
+ host_memory_backend_set_dump, NULL);
+ object_property_add_bool(obj, "prealloc",
+ host_memory_backend_get_prealloc,
+ host_memory_backend_set_prealloc, NULL);
+ object_property_add(obj, "size", "int",
+ host_memory_backend_get_size,
+ host_memory_backend_set_size, NULL, NULL, NULL);
+ object_property_add(obj, "host-nodes", "int",
+ host_memory_backend_get_host_nodes,
+ host_memory_backend_set_host_nodes, NULL, NULL, NULL);
+ object_property_add(obj, "policy", "str",
+ host_memory_backend_get_policy,
+ host_memory_backend_set_policy, NULL, NULL, NULL);
+}
+
+static void host_memory_backend_finalize(Object *obj)
+{
+ HostMemoryBackend *backend = MEMORY_BACKEND(obj);
+
+ if (memory_region_size(&backend->mr)) {
+ memory_region_destroy(&backend->mr);
+ }
+}
+
+MemoryRegion *
+host_memory_backend_get_memory(HostMemoryBackend *backend, Error **errp)
+{
+ return memory_region_size(&backend->mr) ? &backend->mr : NULL;
+}
+
+static void
+host_memory_backend_memory_complete(UserCreatable *uc, Error **errp)
+{
+ HostMemoryBackend *backend = MEMORY_BACKEND(uc);
+ HostMemoryBackendClass *bc = MEMORY_BACKEND_GET_CLASS(uc);
+ Error *local_err = NULL;
+ void *ptr;
+ uint64_t sz;
+
+ if (bc->alloc) {
+ bc->alloc(backend, &local_err);
+ if (local_err) {
+ error_propagate(errp, local_err);
+ return;
+ }
+
+ ptr = memory_region_get_ram_ptr(&backend->mr);
+ sz = memory_region_size(&backend->mr);
+
+ if (backend->merge) {
+ qemu_madvise(ptr, sz, QEMU_MADV_MERGEABLE);
+ }
+ if (!backend->dump) {
+ qemu_madvise(ptr, sz, QEMU_MADV_DONTDUMP);
+ }
+#ifdef CONFIG_NUMA
+ unsigned long lastbit = find_last_bit(backend->host_nodes, MAX_NODES);
+ /* lastbit == MAX_NODES means maxnode = 0 */
+ unsigned long maxnode = (lastbit + 1) % (MAX_NODES + 1);
+ /* ensure policy won't be ignored in case memory is preallocated
+ * before mbind(). note: MPOL_MF_STRICT is ignored on hugepages so
+ * this doesn't catch hugepage case. */
+ unsigned flags = MPOL_MF_STRICT;
+
+ /* check for invalid host-nodes and policies and give more verbose
+ * error messages than mbind(). */
+ if (maxnode && backend->policy == MPOL_DEFAULT) {
+ error_setg(errp, "host-nodes must be empty for policy default,"
+ " or you should explicitly specify a policy other"
+ " than default");
+ return;
+ } else if (maxnode == 0 && backend->policy != MPOL_DEFAULT) {
+ error_setg(errp, "host-nodes must be set for policy %s",
+ HostMemPolicy_lookup[backend->policy]);
+ return;
+ }
+
+ /* We can have up to MAX_NODES nodes, but we need to pass maxnode+1
+ * as argument to mbind() due to an old Linux bug (feature?) which
+ * cuts off the last specified node. This means backend->host_nodes
+ * must have MAX_NODES+1 bits available.
+ */
+ assert(sizeof(backend->host_nodes) >=
+ BITS_TO_LONGS(MAX_NODES + 1) * sizeof(unsigned long));
+ assert(maxnode <= MAX_NODES);
+ if (mbind(ptr, sz, backend->policy,
+ maxnode ? backend->host_nodes : NULL, maxnode + 1, flags)) {
+ error_setg_errno(errp, errno,
+ "cannot bind memory to host NUMA nodes");
+ return;
+ }
+#endif
+ /* Preallocate memory after the NUMA policy has been instantiated.
+ * This is necessary to guarantee memory is allocated with
+ * specified NUMA policy in place.
+ */
+ if (backend->prealloc) {
+ os_mem_prealloc(memory_region_get_fd(&backend->mr), ptr, sz);
+ }
+ }
+}
+
+static void
+host_memory_backend_class_init(ObjectClass *oc, void *data)
+{
+ UserCreatableClass *ucc = USER_CREATABLE_CLASS(oc);
+
+ ucc->complete = host_memory_backend_memory_complete;
+}
+
+static const TypeInfo host_memory_backend_info = {
+ .name = TYPE_MEMORY_BACKEND,
+ .parent = TYPE_OBJECT,
+ .abstract = true,
+ .class_size = sizeof(HostMemoryBackendClass),
+ .class_init = host_memory_backend_class_init,
+ .instance_size = sizeof(HostMemoryBackend),
+ .instance_init = host_memory_backend_init,
+ .instance_finalize = host_memory_backend_finalize,
+ .interfaces = (InterfaceInfo[]) {
+ { TYPE_USER_CREATABLE },
+ { }
+ }
+};
+
+static void register_types(void)
+{
+ type_register_static(&host_memory_backend_info);
+}
+
+type_init(register_types);
diff --git a/configure b/configure
index a69e90b570..05003b7743 100755
--- a/configure
+++ b/configure
@@ -334,6 +334,7 @@ tpm="no"
libssh2=""
vhdx=""
quorum=""
+numa=""
# parse CC options first
for opt do
@@ -1118,6 +1119,10 @@ for opt do
;;
--enable-quorum) quorum="yes"
;;
+ --disable-numa) numa="no"
+ ;;
+ --enable-numa) numa="yes"
+ ;;
*)
echo "ERROR: unknown option $opt"
echo "Try '$0 --help' for more information"
@@ -1384,6 +1389,8 @@ Advanced options (experts only):
--enable-vhdx enable support for the Microsoft VHDX image format
--disable-quorum disable quorum block filter support
--enable-quorum enable quorum block filter support
+ --disable-numa disable libnuma support
+ --enable-numa enable libnuma support
NOTE: The object files are built at the place where configure is launched
EOF
@@ -3168,6 +3175,26 @@ if compile_prog "" "" ; then
fi
##########################################
+# libnuma probe
+
+if test "$numa" != "no" ; then
+ cat > $TMPC << EOF
+#include <numa.h>
+int main(void) { return numa_available(); }
+EOF
+
+ if compile_prog "" "-lnuma" ; then
+ numa=yes
+ libs_softmmu="-lnuma $libs_softmmu"
+ else
+ if test "$numa" = "yes" ; then
+ feature_not_found "numa" "install numactl devel"
+ fi
+ numa=no
+ fi
+fi
+
+##########################################
# signalfd probe
signalfd="no"
cat > $TMPC << EOF
@@ -4211,6 +4238,7 @@ echo "vhdx $vhdx"
echo "Quorum $quorum"
echo "lzo support $lzo"
echo "snappy support $snappy"
+echo "NUMA host support $numa"
if test "$sdl_too_old" = "yes"; then
echo "-> Your SDL version is too old - please upgrade to have SDL support"
@@ -4485,6 +4513,9 @@ fi
if test "$vhost_scsi" = "yes" ; then
echo "CONFIG_VHOST_SCSI=y" >> $config_host_mak
fi
+if test "$vhost_net" = "yes" ; then
+ echo "CONFIG_VHOST_NET_USED=y" >> $config_host_mak
+fi
if test "$blobs" = "yes" ; then
echo "INSTALL_BLOBS=yes" >> $config_host_mak
fi
@@ -5174,6 +5205,10 @@ if [ "$dtc_internal" = "yes" ]; then
echo "config-host.h: subdir-dtc" >> $config_host_mak
fi
+if test "$numa" = "yes"; then
+ echo "CONFIG_NUMA=y" >> $config_host_mak
+fi
+
# build tree in object directory in case the source is not in the current directory
DIRS="tests tests/tcg tests/tcg/cris tests/tcg/lm32 tests/libqos tests/qapi-schema tests/tcg/xtensa tests/qemu-iotests"
DIRS="$DIRS fsdev"
diff --git a/cpus.c b/cpus.c
index af06dc0ae6..1ec3a9edd4 100644
--- a/cpus.c
+++ b/cpus.c
@@ -1312,20 +1312,6 @@ static void tcg_exec_all(void)
exit_request = 0;
}
-void set_numa_modes(void)
-{
- CPUState *cpu;
- int i;
-
- CPU_FOREACH(cpu) {
- for (i = 0; i < nb_numa_nodes; i++) {
- if (test_bit(cpu->cpu_index, node_cpumask[i])) {
- cpu->numa_node = i;
- }
- }
- }
-}
-
void list_cpus(FILE *f, fprintf_function cpu_fprintf, const char *optarg)
{
/* XXX: implement xxx_cpu_list for targets that still miss it */
diff --git a/default-configs/i386-softmmu.mak b/default-configs/i386-softmmu.mak
index 37ef90f585..8e08841760 100644
--- a/default-configs/i386-softmmu.mak
+++ b/default-configs/i386-softmmu.mak
@@ -44,3 +44,4 @@ CONFIG_APIC=y
CONFIG_IOAPIC=y
CONFIG_ICC_BUS=y
CONFIG_PVPANIC=y
+CONFIG_MEM_HOTPLUG=y
diff --git a/default-configs/x86_64-softmmu.mak b/default-configs/x86_64-softmmu.mak
index 31bddce4f4..66557ac590 100644
--- a/default-configs/x86_64-softmmu.mak
+++ b/default-configs/x86_64-softmmu.mak
@@ -44,3 +44,4 @@ CONFIG_APIC=y
CONFIG_IOAPIC=y
CONFIG_ICC_BUS=y
CONFIG_PVPANIC=y
+CONFIG_MEM_HOTPLUG=y
diff --git a/docs/qmp/qmp-events.txt b/docs/qmp/qmp-events.txt
index 145402e078..019db53ec8 100644
--- a/docs/qmp/qmp-events.txt
+++ b/docs/qmp/qmp-events.txt
@@ -1,6 +1,16 @@
QEMU Machine Protocol Events
============================
+ACPI_DEVICE_OST
+---------------
+
+Emitted when guest executes ACPI _OST method.
+
+ - data: ACPIOSTInfo type as described in qapi-schema.json
+
+{ "event": "ACPI_DEVICE_OST",
+ "data": { "device": "d1", "slot": "0", "slot-type": "DIMM", "source": 1, "status": 0 } }
+
BALLOON_CHANGE
--------------
diff --git a/docs/specs/acpi_mem_hotplug.txt b/docs/specs/acpi_mem_hotplug.txt
new file mode 100644
index 0000000000..12909940cc
--- /dev/null
+++ b/docs/specs/acpi_mem_hotplug.txt
@@ -0,0 +1,44 @@
+QEMU<->ACPI BIOS memory hotplug interface
+--------------------------------------
+
+ACPI BIOS GPE.3 handler is dedicated for notifying OS about memory hot-add
+events.
+
+Memory hot-plug interface (IO port 0xa00-0xa17, 1-4 byte access):
+---------------------------------------------------------------
+0xa00:
+ read access:
+ [0x0-0x3] Lo part of memory device phys address
+ [0x4-0x7] Hi part of memory device phys address
+ [0x8-0xb] Lo part of memory device size in bytes
+ [0xc-0xf] Hi part of memory device size in bytes
+ [0x10-0x13] Memory device proximity domain
+ [0x14] Memory device status fields
+ bits:
+ 0: Device is enabled and may be used by guest
+ 1: Device insert event, used to distinguish device for which
+ no device check event to OSPM was issued.
+ It's valid only when bit 1 is set.
+ 2-7: reserved and should be ignored by OSPM
+ [0x15-0x17] reserved
+
+ write access:
+ [0x0-0x3] Memory device slot selector, selects active memory device.
+ All following accesses to other registers in 0xa00-0xa17
+ region will read/store data from/to selected memory device.
+ [0x4-0x7] OST event code reported by OSPM
+ [0x8-0xb] OST status code reported by OSPM
+ [0xc-0x13] reserved, writes into it are ignored
+ [0x14] Memory device control fields
+ bits:
+ 0: reserved, OSPM must clear it before writing to register
+ 1: if set to 1 clears device insert event, set by OSPM
+ after it has emitted device check event for the
+ selected memory device
+ 2-7: reserved, OSPM must clear them before writing to register
+
+Selecting memory device slot beyond present range has no effect on platform:
+ - write accesses to memory hot-plug registers not documented above are
+ ignored
+ - read accesses to memory hot-plug registers not documented above return
+ all bits set to 1.
diff --git a/docs/specs/vhost-user.txt b/docs/specs/vhost-user.txt
new file mode 100644
index 0000000000..0ea767e4b8
--- /dev/null
+++ b/docs/specs/vhost-user.txt
@@ -0,0 +1,266 @@
+Vhost-user Protocol
+===================
+
+Copyright (c) 2014 Virtual Open Systems Sarl.
+
+This work is licensed under the terms of the GNU GPL, version 2 or later.
+See the COPYING file in the top-level directory.
+===================
+
+This protocol is aiming to complement the ioctl interface used to control the
+vhost implementation in the Linux kernel. It implements the control plane needed
+to establish virtqueue sharing with a user space process on the same host. It
+uses communication over a Unix domain socket to share file descriptors in the
+ancillary data of the message.
+
+The protocol defines 2 sides of the communication, master and slave. Master is
+the application that shares its virtqueues, in our case QEMU. Slave is the
+consumer of the virtqueues.
+
+In the current implementation QEMU is the Master, and the Slave is intended to
+be a software Ethernet switch running in user space, such as Snabbswitch.
+
+Master and slave can be either a client (i.e. connecting) or server (listening)
+in the socket communication.
+
+Message Specification
+---------------------
+
+Note that all numbers are in the machine native byte order. A vhost-user message
+consists of 3 header fields and a payload:
+
+------------------------------------
+| request | flags | size | payload |
+------------------------------------
+
+ * Request: 32-bit type of the request
+ * Flags: 32-bit bit field:
+ - Lower 2 bits are the version (currently 0x01)
+ - Bit 2 is the reply flag - needs to be sent on each reply from the slave
+ * Size - 32-bit size of the payload
+
+
+Depending on the request type, payload can be:
+
+ * A single 64-bit integer
+ -------
+ | u64 |
+ -------
+
+ u64: a 64-bit unsigned integer
+
+ * A vring state description
+ ---------------
+ | index | num |
+ ---------------
+
+ Index: a 32-bit index
+ Num: a 32-bit number
+
+ * A vring address description
+ --------------------------------------------------------------
+ | index | flags | size | descriptor | used | available | log |
+ --------------------------------------------------------------
+
+ Index: a 32-bit vring index
+ Flags: a 32-bit vring flags
+ Descriptor: a 64-bit user address of the vring descriptor table
+ Used: a 64-bit user address of the vring used ring
+ Available: a 64-bit user address of the vring available ring
+ Log: a 64-bit guest address for logging
+
+ * Memory regions description
+ ---------------------------------------------------
+ | num regions | padding | region0 | ... | region7 |
+ ---------------------------------------------------
+
+ Num regions: a 32-bit number of regions
+ Padding: 32-bit
+
+ A region is:
+ ---------------------------------------
+ | guest address | size | user address |
+ ---------------------------------------
+
+ Guest address: a 64-bit guest address of the region
+ Size: a 64-bit size
+ User address: a 64-bit user address
+
+
+In QEMU the vhost-user message is implemented with the following struct:
+
+typedef struct VhostUserMsg {
+ VhostUserRequest request;
+ uint32_t flags;
+ uint32_t size;
+ union {
+ uint64_t u64;
+ struct vhost_vring_state state;
+ struct vhost_vring_addr addr;
+ VhostUserMemory memory;
+ };
+} QEMU_PACKED VhostUserMsg;
+
+Communication
+-------------
+
+The protocol for vhost-user is based on the existing implementation of vhost
+for the Linux Kernel. Most messages that can be sent via the Unix domain socket
+implementing vhost-user have an equivalent ioctl to the kernel implementation.
+
+The communication consists of master sending message requests and slave sending
+message replies. Most of the requests don't require replies. Here is a list of
+the ones that do:
+
+ * VHOST_GET_FEATURES
+ * VHOST_GET_VRING_BASE
+
+There are several messages that the master sends with file descriptors passed
+in the ancillary data:
+
+ * VHOST_SET_MEM_TABLE
+ * VHOST_SET_LOG_FD
+ * VHOST_SET_VRING_KICK
+ * VHOST_SET_VRING_CALL
+ * VHOST_SET_VRING_ERR
+
+If Master is unable to send the full message or receives a wrong reply it will
+close the connection. An optional reconnection mechanism can be implemented.
+
+Message types
+-------------
+
+ * VHOST_USER_GET_FEATURES
+
+ Id: 2
+ Equivalent ioctl: VHOST_GET_FEATURES
+ Master payload: N/A
+ Slave payload: u64
+
+ Get from the underlying vhost implementation the features bitmask.
+
+ * VHOST_USER_SET_FEATURES
+
+ Id: 3
+ Ioctl: VHOST_SET_FEATURES
+ Master payload: u64
+
+ Enable features in the underlying vhost implementation using a bitmask.
+
+ * VHOST_USER_SET_OWNER
+
+ Id: 4
+ Equivalent ioctl: VHOST_SET_OWNER
+ Master payload: N/A
+
+ Issued when a new connection is established. It sets the current Master
+ as an owner of the session. This can be used on the Slave as a
+ "session start" flag.
+
+ * VHOST_USER_RESET_OWNER
+
+ Id: 5
+ Equivalent ioctl: VHOST_RESET_OWNER
+ Master payload: N/A
+
+ Issued when a new connection is about to be closed. The Master will no
+ longer own this connection (and will usually close it).
+
+ * VHOST_USER_SET_MEM_TABLE
+
+ Id: 6
+ Equivalent ioctl: VHOST_SET_MEM_TABLE
+ Master payload: memory regions description
+
+ Sets the memory map regions on the slave so it can translate the vring
+ addresses. In the ancillary data there is an array of file descriptors
+ for each memory mapped region. The size and ordering of the fds matches
+ the number and ordering of memory regions.
+
+ * VHOST_USER_SET_LOG_BASE
+
+ Id: 7
+ Equivalent ioctl: VHOST_SET_LOG_BASE
+ Master payload: u64
+
+ Sets the logging base address.
+
+ * VHOST_USER_SET_LOG_FD
+
+ Id: 8
+ Equivalent ioctl: VHOST_SET_LOG_FD
+ Master payload: N/A
+
+ Sets the logging file descriptor, which is passed as ancillary data.
+
+ * VHOST_USER_SET_VRING_NUM
+
+ Id: 9
+ Equivalent ioctl: VHOST_SET_VRING_NUM
+ Master payload: vring state description
+
+ Sets the number of vrings for this owner.
+
+ * VHOST_USER_SET_VRING_ADDR
+
+ Id: 10
+ Equivalent ioctl: VHOST_SET_VRING_ADDR
+ Master payload: vring address description
+ Slave payload: N/A
+
+ Sets the addresses of the different aspects of the vring.
+
+ * VHOST_USER_SET_VRING_BASE
+
+ Id: 11
+ Equivalent ioctl: VHOST_SET_VRING_BASE
+ Master payload: vring state description
+
+ Sets the base offset in the available vring.
+
+ * VHOST_USER_GET_VRING_BASE
+
+ Id: 12
+ Equivalent ioctl: VHOST_USER_GET_VRING_BASE
+ Master payload: vring state description
+ Slave payload: vring state description
+
+ Get the available vring base offset.
+
+ * VHOST_USER_SET_VRING_KICK
+
+ Id: 13
+ Equivalent ioctl: VHOST_SET_VRING_KICK
+ Master payload: u64
+
+ Set the event file descriptor for adding buffers to the vring. It
+ is passed in the ancillary data.
+ Bits (0-7) of the payload contain the vring index. Bit 8 is the
+ invalid FD flag. This flag is set when there is no file descriptor
+ in the ancillary data. This signals that polling should be used
+ instead of waiting for a kick.
+
+ * VHOST_USER_SET_VRING_CALL
+
+ Id: 14
+ Equivalent ioctl: VHOST_SET_VRING_CALL
+ Master payload: u64
+
+ Set the event file descriptor to signal when buffers are used. It
+ is passed in the ancillary data.
+ Bits (0-7) of the payload contain the vring index. Bit 8 is the
+ invalid FD flag. This flag is set when there is no file descriptor
+ in the ancillary data. This signals that polling will be used
+ instead of waiting for the call.
+
+ * VHOST_USER_SET_VRING_ERR
+
+ Id: 15
+ Equivalent ioctl: VHOST_SET_VRING_ERR
+ Master payload: u64
+
+ Set the event file descriptor to signal when error occurs. It
+ is passed in the ancillary data.
+ Bits (0-7) of the payload contain the vring index. Bit 8 is the
+ invalid FD flag. This flag is set when there is no file descriptor
+ in the ancillary data.
diff --git a/exec.c b/exec.c
index c3fbbb3fb8..1ca7baca0b 100644
--- a/exec.c
+++ b/exec.c
@@ -70,6 +70,12 @@ AddressSpace address_space_memory;
MemoryRegion io_mem_rom, io_mem_notdirty;
static MemoryRegion io_mem_unassigned;
+/* RAM is pre-allocated and passed into qemu_ram_alloc_from_ptr */
+#define RAM_PREALLOC (1 << 0)
+
+/* RAM is mmap-ed with MAP_SHARED */
+#define RAM_SHARED (1 << 1)
+
#endif
struct CPUTailQ cpus = QTAILQ_HEAD_INITIALIZER(cpus);
@@ -1011,16 +1017,10 @@ static long gethugepagesize(const char *path)
return fs.f_bsize;
}
-static sigjmp_buf sigjump;
-
-static void sigbus_handler(int signal)
-{
- siglongjmp(sigjump, 1);
-}
-
static void *file_ram_alloc(RAMBlock *block,
ram_addr_t memory,
- const char *path)
+ const char *path,
+ Error **errp)
{
char *filename;
char *sanitized_name;
@@ -1039,7 +1039,8 @@ static void *file_ram_alloc(RAMBlock *block,
}
if (kvm_enabled() && !kvm_has_sync_mmu()) {
- fprintf(stderr, "host lacks kvm mmu notifiers, -mem-path unsupported\n");
+ error_setg(errp,
+ "host lacks kvm mmu notifiers, -mem-path unsupported");
goto error;
}
@@ -1056,7 +1057,8 @@ static void *file_ram_alloc(RAMBlock *block,
fd = mkstemp(filename);
if (fd < 0) {
- perror("unable to create backing store for hugepages");
+ error_setg_errno(errp, errno,
+ "unable to create backing store for hugepages");
g_free(filename);
goto error;
}
@@ -1071,53 +1073,22 @@ static void *file_ram_alloc(RAMBlock *block,
* If anything goes wrong with it under other filesystems,
* mmap will fail.
*/
- if (ftruncate(fd, memory))
+ if (ftruncate(fd, memory)) {
perror("ftruncate");
+ }
- area = mmap(0, memory, PROT_READ | PROT_WRITE, MAP_PRIVATE, fd, 0);
+ area = mmap(0, memory, PROT_READ | PROT_WRITE,
+ (block->flags & RAM_SHARED ? MAP_SHARED : MAP_PRIVATE),
+ fd, 0);
if (area == MAP_FAILED) {
- perror("file_ram_alloc: can't mmap RAM pages");
+ error_setg_errno(errp, errno,
+ "unable to map backing store for hugepages");
close(fd);
goto error;
}
if (mem_prealloc) {
- int ret, i;
- struct sigaction act, oldact;
- sigset_t set, oldset;
-
- memset(&act, 0, sizeof(act));
- act.sa_handler = &sigbus_handler;
- act.sa_flags = 0;
-
- ret = sigaction(SIGBUS, &act, &oldact);
- if (ret) {
- perror("file_ram_alloc: failed to install signal handler");
- exit(1);
- }
-
- /* unblock SIGBUS */
- sigemptyset(&set);
- sigaddset(&set, SIGBUS);
- pthread_sigmask(SIG_UNBLOCK, &set, &oldset);
-
- if (sigsetjmp(sigjump, 1)) {
- fprintf(stderr, "file_ram_alloc: failed to preallocate pages\n");
- exit(1);
- }
-
- /* MAP_POPULATE silently ignores failures */
- for (i = 0; i < (memory/hpagesize); i++) {
- memset(area + (hpagesize*i), 0, 1);
- }
-
- ret = sigaction(SIGBUS, &oldact, NULL);
- if (ret) {
- perror("file_ram_alloc: failed to reinstall signal handler");
- exit(1);
- }
-
- pthread_sigmask(SIG_SETMASK, &oldset, NULL);
+ os_mem_prealloc(fd, area, memory);
}
block->fd = fd;
@@ -1129,14 +1100,6 @@ error:
}
return NULL;
}
-#else
-static void *file_ram_alloc(RAMBlock *block,
- ram_addr_t memory,
- const char *path)
-{
- fprintf(stderr, "-mem-path not supported on this host\n");
- exit(1);
-}
#endif
static ram_addr_t find_ram_offset(ram_addr_t size)
@@ -1262,56 +1225,30 @@ static int memory_try_enable_merging(void *addr, size_t len)
return qemu_madvise(addr, len, QEMU_MADV_MERGEABLE);
}
-ram_addr_t qemu_ram_alloc_from_ptr(ram_addr_t size, void *host,
- MemoryRegion *mr)
+static ram_addr_t ram_block_add(RAMBlock *new_block)
{
- RAMBlock *block, *new_block;
+ RAMBlock *block;
ram_addr_t old_ram_size, new_ram_size;
old_ram_size = last_ram_offset() >> TARGET_PAGE_BITS;
- size = TARGET_PAGE_ALIGN(size);
- new_block = g_malloc0(sizeof(*new_block));
- new_block->fd = -1;
-
/* This assumes the iothread lock is taken here too. */
qemu_mutex_lock_ramlist();
- new_block->mr = mr;
- new_block->offset = find_ram_offset(size);
- if (host) {
- new_block->host = host;
- new_block->flags |= RAM_PREALLOC_MASK;
- } else if (xen_enabled()) {
- if (mem_path) {
- fprintf(stderr, "-mem-path not supported with Xen\n");
- exit(1);
- }
- xen_ram_alloc(new_block->offset, size, mr);
- } else {
- if (mem_path) {
- if (phys_mem_alloc != qemu_anon_ram_alloc) {
- /*
- * file_ram_alloc() needs to allocate just like
- * phys_mem_alloc, but we haven't bothered to provide
- * a hook there.
- */
- fprintf(stderr,
- "-mem-path not supported with this accelerator\n");
- exit(1);
- }
- new_block->host = file_ram_alloc(new_block, size, mem_path);
- }
- if (!new_block->host) {
- new_block->host = phys_mem_alloc(size);
+ new_block->offset = find_ram_offset(new_block->length);
+
+ if (!new_block->host) {
+ if (xen_enabled()) {
+ xen_ram_alloc(new_block->offset, new_block->length, new_block->mr);
+ } else {
+ new_block->host = phys_mem_alloc(new_block->length);
if (!new_block->host) {
fprintf(stderr, "Cannot set up guest memory '%s': %s\n",
new_block->mr->name, strerror(errno));
exit(1);
}
- memory_try_enable_merging(new_block->host, size);
+ memory_try_enable_merging(new_block->host, new_block->length);
}
}
- new_block->length = size;
/* Keep the list sorted from biggest to smallest block. */
QTAILQ_FOREACH(block, &ram_list.blocks, next) {
@@ -1339,18 +1276,75 @@ ram_addr_t qemu_ram_alloc_from_ptr(ram_addr_t size, void *host,
old_ram_size, new_ram_size);
}
}
- cpu_physical_memory_set_dirty_range(new_block->offset, size);
+ cpu_physical_memory_set_dirty_range(new_block->offset, new_block->length);
- qemu_ram_setup_dump(new_block->host, size);
- qemu_madvise(new_block->host, size, QEMU_MADV_HUGEPAGE);
- qemu_madvise(new_block->host, size, QEMU_MADV_DONTFORK);
+ qemu_ram_setup_dump(new_block->host, new_block->length);
+ qemu_madvise(new_block->host, new_block->length, QEMU_MADV_HUGEPAGE);
+ qemu_madvise(new_block->host, new_block->length, QEMU_MADV_DONTFORK);
- if (kvm_enabled())
- kvm_setup_guest_memory(new_block->host, size);
+ if (kvm_enabled()) {
+ kvm_setup_guest_memory(new_block->host, new_block->length);
+ }
return new_block->offset;
}
+#ifdef __linux__
+ram_addr_t qemu_ram_alloc_from_file(ram_addr_t size, MemoryRegion *mr,
+ bool share, const char *mem_path,
+ Error **errp)
+{
+ RAMBlock *new_block;
+
+ if (xen_enabled()) {
+ error_setg(errp, "-mem-path not supported with Xen");
+ return -1;
+ }
+
+ if (phys_mem_alloc != qemu_anon_ram_alloc) {
+ /*
+ * file_ram_alloc() needs to allocate just like
+ * phys_mem_alloc, but we haven't bothered to provide
+ * a hook there.
+ */
+ error_setg(errp,
+ "-mem-path not supported with this accelerator");
+ return -1;
+ }
+
+ size = TARGET_PAGE_ALIGN(size);
+ new_block = g_malloc0(sizeof(*new_block));
+ new_block->mr = mr;
+ new_block->length = size;
+ new_block->flags = share ? RAM_SHARED : 0;
+ new_block->host = file_ram_alloc(new_block, size,
+ mem_path, errp);
+ if (!new_block->host) {
+ g_free(new_block);
+ return -1;
+ }
+
+ return ram_block_add(new_block);
+}
+#endif
+
+ram_addr_t qemu_ram_alloc_from_ptr(ram_addr_t size, void *host,
+ MemoryRegion *mr)
+{
+ RAMBlock *new_block;
+
+ size = TARGET_PAGE_ALIGN(size);
+ new_block = g_malloc0(sizeof(*new_block));
+ new_block->mr = mr;
+ new_block->length = size;
+ new_block->fd = -1;
+ new_block->host = host;
+ if (host) {
+ new_block->flags |= RAM_PREALLOC;
+ }
+ return ram_block_add(new_block);
+}
+
ram_addr_t qemu_ram_alloc(ram_addr_t size, MemoryRegion *mr)
{
return qemu_ram_alloc_from_ptr(size, NULL, mr);
@@ -1385,7 +1379,7 @@ void qemu_ram_free(ram_addr_t addr)
QTAILQ_REMOVE(&ram_list.blocks, block, next);
ram_list.mru_block = NULL;
ram_list.version++;
- if (block->flags & RAM_PREALLOC_MASK) {
+ if (block->flags & RAM_PREALLOC) {
;
} else if (xen_enabled()) {
xen_invalidate_map_cache_entry(block->host);
@@ -1417,7 +1411,7 @@ void qemu_ram_remap(ram_addr_t addr, ram_addr_t length)
offset = addr - block->offset;
if (offset < block->length) {
vaddr = block->host + offset;
- if (block->flags & RAM_PREALLOC_MASK) {
+ if (block->flags & RAM_PREALLOC) {
;
} else if (xen_enabled()) {
abort();
@@ -1425,12 +1419,8 @@ void qemu_ram_remap(ram_addr_t addr, ram_addr_t length)
flags = MAP_FIXED;
munmap(vaddr, length);
if (block->fd >= 0) {
-#ifdef MAP_POPULATE
- flags |= mem_prealloc ? MAP_POPULATE | MAP_SHARED :
- MAP_PRIVATE;
-#else
- flags |= MAP_PRIVATE;
-#endif
+ flags |= (block->flags & RAM_SHARED ?
+ MAP_SHARED : MAP_PRIVATE);
area = mmap(vaddr, length, PROT_READ | PROT_WRITE,
flags, block->fd, offset);
} else {
@@ -1460,6 +1450,13 @@ void qemu_ram_remap(ram_addr_t addr, ram_addr_t length)
}
#endif /* !_WIN32 */
+int qemu_get_ram_fd(ram_addr_t addr)
+{
+ RAMBlock *block = qemu_get_ram_block(addr);
+
+ return block->fd;
+}
+
/* Return a host pointer to ram allocated with qemu_ram_alloc.
With the exception of the softmmu code in this file, this should
only be used for local memory (e.g. video ram) that the device owns,
diff --git a/hmp-commands.hx b/hmp-commands.hx
index 5f1a677b85..d0943b1ff3 100644
--- a/hmp-commands.hx
+++ b/hmp-commands.hx
@@ -1211,7 +1211,7 @@ ETEXI
{
.name = "host_net_add",
.args_type = "device:s,opts:s?",
- .params = "tap|user|socket|vde|netmap|bridge|dump [options]",
+ .params = "tap|user|socket|vde|netmap|bridge|vhost-user|dump [options]",
.help = "add host VLAN client",
.mhandler.cmd = net_host_device_add,
.command_completion = host_net_add_completion,
@@ -1241,7 +1241,7 @@ ETEXI
{
.name = "netdev_add",
.args_type = "netdev:O",
- .params = "[user|tap|socket|vde|bridge|hubport|netmap],id=str[,prop=value][,...]",
+ .params = "[user|tap|socket|vde|bridge|hubport|netmap|vhost-user],id=str[,prop=value][,...]",
.help = "add host network device",
.mhandler.cmd = hmp_netdev_add,
.command_completion = netdev_add_completion,
diff --git a/hmp.c b/hmp.c
index ccc35d41a1..41006f5eef 100644
--- a/hmp.c
+++ b/hmp.c
@@ -22,6 +22,8 @@
#include "qemu/sockets.h"
#include "monitor/monitor.h"
#include "qapi/opts-visitor.h"
+#include "qapi/string-output-visitor.h"
+#include "qapi-visit.h"
#include "ui/console.h"
#include "block/qapi.h"
#include "qemu-io.h"
@@ -1676,3 +1678,37 @@ void hmp_object_del(Monitor *mon, const QDict *qdict)
qmp_object_del(id, &err);
hmp_handle_error(mon, &err);
}
+
+void hmp_info_memdev(Monitor *mon, const QDict *qdict)
+{
+ Error *err = NULL;
+ MemdevList *memdev_list = qmp_query_memdev(&err);
+ MemdevList *m = memdev_list;
+ StringOutputVisitor *ov;
+ int i = 0;
+
+
+ while (m) {
+ ov = string_output_visitor_new(false);
+ visit_type_uint16List(string_output_get_visitor(ov),
+ &m->value->host_nodes, NULL, NULL);
+ monitor_printf(mon, "memory device %d\n", i);
+ monitor_printf(mon, " size: %" PRId64 "\n", m->value->size);
+ monitor_printf(mon, " merge: %s\n",
+ m->value->merge ? "true" : "false");
+ monitor_printf(mon, " dump: %s\n",
+ m->value->dump ? "true" : "false");
+ monitor_printf(mon, " prealloc: %s\n",
+ m->value->prealloc ? "true" : "false");
+ monitor_printf(mon, " policy: %s\n",
+ HostMemPolicy_lookup[m->value->policy]);
+ monitor_printf(mon, " host nodes: %s\n",
+ string_output_get_string(ov));
+
+ string_output_visitor_cleanup(ov);
+ m = m->next;
+ i++;
+ }
+
+ monitor_printf(mon, "\n");
+}
diff --git a/hmp.h b/hmp.h
index 2d9b0a2b0b..4fd3c4a901 100644
--- a/hmp.h
+++ b/hmp.h
@@ -93,6 +93,7 @@ void hmp_qemu_io(Monitor *mon, const QDict *qdict);
void hmp_cpu_add(Monitor *mon, const QDict *qdict);
void hmp_object_add(Monitor *mon, const QDict *qdict);
void hmp_object_del(Monitor *mon, const QDict *qdict);
+void hmp_info_memdev(Monitor *mon, const QDict *qdict);
void object_add_completion(ReadLineState *rs, int nb_args, const char *str);
void object_del_completion(ReadLineState *rs, int nb_args, const char *str);
void device_add_completion(ReadLineState *rs, int nb_args, const char *str);
diff --git a/hw/9pfs/virtio-9p.c b/hw/9pfs/virtio-9p.c
index 9aa6725f09..5861a5b826 100644
--- a/hw/9pfs/virtio-9p.c
+++ b/hw/9pfs/virtio-9p.c
@@ -299,9 +299,7 @@ static int v9fs_xattr_fid_clunk(V9fsPDU *pdu, V9fsFidState *fidp)
free_out:
v9fs_string_free(&fidp->fs.xattr.name);
free_value:
- if (fidp->fs.xattr.value) {
- g_free(fidp->fs.xattr.value);
- }
+ g_free(fidp->fs.xattr.value);
return retval;
}
diff --git a/hw/Makefile.objs b/hw/Makefile.objs
index d178b65de4..52a1464051 100644
--- a/hw/Makefile.objs
+++ b/hw/Makefile.objs
@@ -29,6 +29,7 @@ devices-dirs-$(CONFIG_SOFTMMU) += usb/
devices-dirs-$(CONFIG_VIRTIO) += virtio/
devices-dirs-$(CONFIG_SOFTMMU) += watchdog/
devices-dirs-$(CONFIG_SOFTMMU) += xen/
+devices-dirs-$(CONFIG_MEM_HOTPLUG) += mem/
devices-dirs-y += core/
common-obj-y += $(devices-dirs-y)
obj-y += $(devices-dirs-y)
diff --git a/hw/acpi/Makefile.objs b/hw/acpi/Makefile.objs
index 397d32babd..acd2389431 100644
--- a/hw/acpi/Makefile.objs
+++ b/hw/acpi/Makefile.objs
@@ -1 +1,3 @@
common-obj-$(CONFIG_ACPI) += core.o piix4.o ich9.o pcihp.o cpu_hotplug.o
+common-obj-$(CONFIG_ACPI) += memory_hotplug.o
+common-obj-$(CONFIG_ACPI) += acpi_interface.o
diff --git a/hw/acpi/acpi_interface.c b/hw/acpi/acpi_interface.c
new file mode 100644
index 0000000000..c181bb2262
--- /dev/null
+++ b/hw/acpi/acpi_interface.c
@@ -0,0 +1,15 @@
+#include "hw/acpi/acpi_dev_interface.h"
+#include "qemu/module.h"
+
+static void register_types(void)
+{
+ static const TypeInfo acpi_dev_if_info = {
+ .name = TYPE_ACPI_DEVICE_IF,
+ .parent = TYPE_INTERFACE,
+ .class_size = sizeof(AcpiDeviceIfClass),
+ };
+
+ type_register_static(&acpi_dev_if_info);
+}
+
+type_init(register_types)
diff --git a/hw/acpi/ich9.c b/hw/acpi/ich9.c
index 407ae8900c..e7d6c77b34 100644
--- a/hw/acpi/ich9.c
+++ b/hw/acpi/ich9.c
@@ -34,6 +34,7 @@
#include "exec/address-spaces.h"
#include "hw/i386/ich9.h"
+#include "hw/mem/pc-dimm.h"
//#define DEBUG
@@ -139,6 +140,23 @@ static int ich9_pm_post_load(void *opaque, int version_id)
.offset = vmstate_offset_pointer(_state, _field, uint8_t), \
}
+static bool vmstate_test_use_memhp(void *opaque)
+{
+ ICH9LPCPMRegs *s = opaque;
+ return s->acpi_memory_hotplug.is_enabled;
+}
+
+static const VMStateDescription vmstate_memhp_state = {
+ .name = "ich9_pm/memhp",
+ .version_id = 1,
+ .minimum_version_id = 1,
+ .minimum_version_id_old = 1,
+ .fields = (VMStateField[]) {
+ VMSTATE_MEMORY_HOTPLUG(acpi_memory_hotplug, ICH9LPCPMRegs),
+ VMSTATE_END_OF_LIST()
+ }
+};
+
const VMStateDescription vmstate_ich9_pm = {
.name = "ich9_pm",
.version_id = 1,
@@ -155,6 +173,13 @@ const VMStateDescription vmstate_ich9_pm = {
VMSTATE_UINT32(smi_en, ICH9LPCPMRegs),
VMSTATE_UINT32(smi_sts, ICH9LPCPMRegs),
VMSTATE_END_OF_LIST()
+ },
+ .subsections = (VMStateSubsection[]) {
+ {
+ .vmsd = &vmstate_memhp_state,
+ .needed = vmstate_test_use_memhp,
+ },
+ VMSTATE_END_OF_LIST()
}
};
@@ -223,6 +248,11 @@ void ich9_pm_init(PCIDevice *lpc_pci, ICH9LPCPMRegs *pm,
&pm->gpe_cpu, ICH9_CPU_HOTPLUG_IO_BASE);
pm->cpu_added_notifier.notify = ich9_cpu_added_req;
qemu_register_cpu_added_notifier(&pm->cpu_added_notifier);
+
+ if (pm->acpi_memory_hotplug.is_enabled) {
+ acpi_memory_hotplug_init(pci_address_space_io(lpc_pci), OBJECT(lpc_pci),
+ &pm->acpi_memory_hotplug);
+ }
}
static void ich9_pm_get_gpe0_blk(Object *obj, Visitor *v,
@@ -235,9 +265,25 @@ static void ich9_pm_get_gpe0_blk(Object *obj, Visitor *v,
visit_type_uint32(v, &value, name, errp);
}
+static bool ich9_pm_get_memory_hotplug_support(Object *obj, Error **errp)
+{
+ ICH9LPCState *s = ICH9_LPC_DEVICE(obj);
+
+ return s->pm.acpi_memory_hotplug.is_enabled;
+}
+
+static void ich9_pm_set_memory_hotplug_support(Object *obj, bool value,
+ Error **errp)
+{
+ ICH9LPCState *s = ICH9_LPC_DEVICE(obj);
+
+ s->pm.acpi_memory_hotplug.is_enabled = value;
+}
+
void ich9_pm_add_properties(Object *obj, ICH9LPCPMRegs *pm, Error **errp)
{
static const uint32_t gpe0_len = ICH9_PMIO_GPE0_LEN;
+ pm->acpi_memory_hotplug.is_enabled = true;
object_property_add_uint32_ptr(obj, ACPI_PM_PROP_PM_IO_BASE,
&pm->pm_io_base, errp);
@@ -246,4 +292,27 @@ void ich9_pm_add_properties(Object *obj, ICH9LPCPMRegs *pm, Error **errp)
NULL, NULL, pm, NULL);
object_property_add_uint32_ptr(obj, ACPI_PM_PROP_GPE0_BLK_LEN,
&gpe0_len, errp);
+ object_property_add_bool(obj, "memory-hotplug-support",
+ ich9_pm_get_memory_hotplug_support,
+ ich9_pm_set_memory_hotplug_support,
+ NULL);
+}
+
+void ich9_pm_device_plug_cb(ICH9LPCPMRegs *pm, DeviceState *dev, Error **errp)
+{
+ if (pm->acpi_memory_hotplug.is_enabled &&
+ object_dynamic_cast(OBJECT(dev), TYPE_PC_DIMM)) {
+ acpi_memory_plug_cb(&pm->acpi_regs, pm->irq, &pm->acpi_memory_hotplug,
+ dev, errp);
+ } else {
+ error_setg(errp, "acpi: device plug request for not supported device"
+ " type: %s", object_get_typename(OBJECT(dev)));
+ }
+}
+
+void ich9_pm_ospm_status(AcpiDeviceIf *adev, ACPIOSTInfoList ***list)
+{
+ ICH9LPCState *s = ICH9_LPC_DEVICE(adev);
+
+ acpi_memory_ospm_status(&s->pm.acpi_memory_hotplug, list);
}
diff --git a/hw/acpi/memory_hotplug.c b/hw/acpi/memory_hotplug.c
new file mode 100644
index 0000000000..de4ddc204f
--- /dev/null
+++ b/hw/acpi/memory_hotplug.c
@@ -0,0 +1,245 @@
+#include "hw/acpi/memory_hotplug.h"
+#include "hw/acpi/pc-hotplug.h"
+#include "hw/mem/pc-dimm.h"
+#include "hw/boards.h"
+#include "trace.h"
+#include "qapi-visit.h"
+#include "monitor/monitor.h"
+#include "qapi/dealloc-visitor.h"
+#include "qapi/qmp-output-visitor.h"
+
+static ACPIOSTInfo *acpi_memory_device_status(int slot, MemStatus *mdev)
+{
+ ACPIOSTInfo *info = g_new0(ACPIOSTInfo, 1);
+
+ info->slot_type = ACPI_SLOT_TYPE_DIMM;
+ info->slot = g_strdup_printf("%d", slot);
+ info->source = mdev->ost_event;
+ info->status = mdev->ost_status;
+ if (mdev->dimm) {
+ DeviceState *dev = DEVICE(mdev->dimm);
+ if (dev->id) {
+ info->device = g_strdup(dev->id);
+ info->has_device = true;
+ }
+ }
+ return info;
+}
+
+void acpi_memory_ospm_status(MemHotplugState *mem_st, ACPIOSTInfoList ***list)
+{
+ int i;
+
+ for (i = 0; i < mem_st->dev_count; i++) {
+ ACPIOSTInfoList *elem = g_new0(ACPIOSTInfoList, 1);
+ elem->value = acpi_memory_device_status(i, &mem_st->devs[i]);
+ elem->next = NULL;
+ **list = elem;
+ *list = &elem->next;
+ }
+}
+
+static void acpi_memory_ost_mon_event(const MemHotplugState *mem_st)
+{
+ Visitor *v;
+ QObject *out_info;
+ QapiDeallocVisitor *md;
+ QmpOutputVisitor *mo = qmp_output_visitor_new();
+ MemStatus *mdev = &mem_st->devs[mem_st->selector];
+ ACPIOSTInfo *info = acpi_memory_device_status(mem_st->selector, mdev);
+
+ v = qmp_output_get_visitor(mo);
+ visit_type_ACPIOSTInfo(v, &info, "unused", NULL);
+
+ out_info = qmp_output_get_qobject(mo);
+ monitor_protocol_event(QEVENT_ACPI_OST, out_info);
+ qobject_decref(out_info);
+
+ qmp_output_visitor_cleanup(mo);
+ md = qapi_dealloc_visitor_new();
+ v = qapi_dealloc_get_visitor(md);
+ visit_type_ACPIOSTInfo(v, &info, "unused", NULL);
+ qapi_dealloc_visitor_cleanup(md);
+}
+
+static uint64_t acpi_memory_hotplug_read(void *opaque, hwaddr addr,
+ unsigned int size)
+{
+ uint32_t val = 0;
+ MemHotplugState *mem_st = opaque;
+ MemStatus *mdev;
+ Object *o;
+
+ if (mem_st->selector >= mem_st->dev_count) {
+ trace_mhp_acpi_invalid_slot_selected(mem_st->selector);
+ return 0;
+ }
+
+ mdev = &mem_st->devs[mem_st->selector];
+ o = OBJECT(mdev->dimm);
+ switch (addr) {
+ case 0x0: /* Lo part of phys address where DIMM is mapped */
+ val = o ? object_property_get_int(o, PC_DIMM_ADDR_PROP, NULL) : 0;
+ trace_mhp_acpi_read_addr_lo(mem_st->selector, val);
+ break;
+ case 0x4: /* Hi part of phys address where DIMM is mapped */
+ val = o ? object_property_get_int(o, PC_DIMM_ADDR_PROP, NULL) >> 32 : 0;
+ trace_mhp_acpi_read_addr_hi(mem_st->selector, val);
+ break;
+ case 0x8: /* Lo part of DIMM size */
+ val = o ? object_property_get_int(o, PC_DIMM_SIZE_PROP, NULL) : 0;
+ trace_mhp_acpi_read_size_lo(mem_st->selector, val);
+ break;
+ case 0xc: /* Hi part of DIMM size */
+ val = o ? object_property_get_int(o, PC_DIMM_SIZE_PROP, NULL) >> 32 : 0;
+ trace_mhp_acpi_read_size_hi(mem_st->selector, val);
+ break;
+ case 0x10: /* node proximity for _PXM method */
+ val = o ? object_property_get_int(o, PC_DIMM_NODE_PROP, NULL) : 0;
+ trace_mhp_acpi_read_pxm(mem_st->selector, val);
+ break;
+ case 0x14: /* pack and return is_* fields */
+ val |= mdev->is_enabled ? 1 : 0;
+ val |= mdev->is_inserting ? 2 : 0;
+ trace_mhp_acpi_read_flags(mem_st->selector, val);
+ break;
+ default:
+ val = ~0;
+ break;
+ }
+ return val;
+}
+
+static void acpi_memory_hotplug_write(void *opaque, hwaddr addr, uint64_t data,
+ unsigned int size)
+{
+ MemHotplugState *mem_st = opaque;
+ MemStatus *mdev;
+
+ if (!mem_st->dev_count) {
+ return;
+ }
+
+ if (addr) {
+ if (mem_st->selector >= mem_st->dev_count) {
+ trace_mhp_acpi_invalid_slot_selected(mem_st->selector);
+ return;
+ }
+ }
+
+ switch (addr) {
+ case 0x0: /* DIMM slot selector */
+ mem_st->selector = data;
+ trace_mhp_acpi_write_slot(mem_st->selector);
+ break;
+ case 0x4: /* _OST event */
+ mdev = &mem_st->devs[mem_st->selector];
+ if (data == 1) {
+ /* TODO: handle device insert OST event */
+ } else if (data == 3) {
+ /* TODO: handle device remove OST event */
+ }
+ mdev->ost_event = data;
+ trace_mhp_acpi_write_ost_ev(mem_st->selector, mdev->ost_event);
+ break;
+ case 0x8: /* _OST status */
+ mdev = &mem_st->devs[mem_st->selector];
+ mdev->ost_status = data;
+ trace_mhp_acpi_write_ost_status(mem_st->selector, mdev->ost_status);
+ /* TODO: implement memory removal on guest signal */
+ acpi_memory_ost_mon_event(mem_st);
+ break;
+ case 0x14:
+ mdev = &mem_st->devs[mem_st->selector];
+ if (data & 2) { /* clear insert event */
+ mdev->is_inserting = false;
+ trace_mhp_acpi_clear_insert_evt(mem_st->selector);
+ }
+ break;
+ }
+
+}
+static const MemoryRegionOps acpi_memory_hotplug_ops = {
+ .read = acpi_memory_hotplug_read,
+ .write = acpi_memory_hotplug_write,
+ .endianness = DEVICE_LITTLE_ENDIAN,
+ .valid = {
+ .min_access_size = 1,
+ .max_access_size = 4,
+ },
+};
+
+void acpi_memory_hotplug_init(MemoryRegion *as, Object *owner,
+ MemHotplugState *state)
+{
+ MachineState *machine = MACHINE(qdev_get_machine());
+
+ state->dev_count = machine->ram_slots;
+ if (!state->dev_count) {
+ return;
+ }
+
+ state->devs = g_malloc0(sizeof(*state->devs) * state->dev_count);
+ memory_region_init_io(&state->io, owner, &acpi_memory_hotplug_ops, state,
+ "apci-mem-hotplug", ACPI_MEMORY_HOTPLUG_IO_LEN);
+ memory_region_add_subregion(as, ACPI_MEMORY_HOTPLUG_BASE, &state->io);
+}
+
+void acpi_memory_plug_cb(ACPIREGS *ar, qemu_irq irq, MemHotplugState *mem_st,
+ DeviceState *dev, Error **errp)
+{
+ MemStatus *mdev;
+ Error *local_err = NULL;
+ int slot = object_property_get_int(OBJECT(dev), "slot", &local_err);
+
+ if (local_err) {
+ error_propagate(errp, local_err);
+ return;
+ }
+
+ if (slot >= mem_st->dev_count) {
+ char *dev_path = object_get_canonical_path(OBJECT(dev));
+ error_setg(errp, "acpi_memory_plug_cb: "
+ "device [%s] returned invalid memory slot[%d]",
+ dev_path, slot);
+ g_free(dev_path);
+ return;
+ }
+
+ mdev = &mem_st->devs[slot];
+ mdev->dimm = dev;
+ mdev->is_enabled = true;
+ mdev->is_inserting = true;
+
+ /* do ACPI magic */
+ ar->gpe.sts[0] |= ACPI_MEMORY_HOTPLUG_STATUS;
+ acpi_update_sci(ar, irq);
+ return;
+}
+
+static const VMStateDescription vmstate_memhp_sts = {
+ .name = "memory hotplug device state",
+ .version_id = 1,
+ .minimum_version_id = 1,
+ .minimum_version_id_old = 1,
+ .fields = (VMStateField[]) {
+ VMSTATE_BOOL(is_enabled, MemStatus),
+ VMSTATE_BOOL(is_inserting, MemStatus),
+ VMSTATE_UINT32(ost_event, MemStatus),
+ VMSTATE_UINT32(ost_status, MemStatus),
+ VMSTATE_END_OF_LIST()
+ }
+};
+
+const VMStateDescription vmstate_memory_hotplug = {
+ .name = "memory hotplug state",
+ .version_id = 1,
+ .minimum_version_id = 1,
+ .minimum_version_id_old = 1,
+ .fields = (VMStateField[]) {
+ VMSTATE_UINT32(selector, MemHotplugState),
+ VMSTATE_STRUCT_VARRAY_POINTER_UINT32(devs, MemHotplugState, dev_count,
+ vmstate_memhp_sts, MemStatus),
+ VMSTATE_END_OF_LIST()
+ }
+};
diff --git a/hw/acpi/piix4.c b/hw/acpi/piix4.c
index 252bbf2c77..b72b34e5c9 100644
--- a/hw/acpi/piix4.c
+++ b/hw/acpi/piix4.c
@@ -33,6 +33,9 @@
#include "hw/acpi/pcihp.h"
#include "hw/acpi/cpu_hotplug.h"
#include "hw/hotplug.h"
+#include "hw/mem/pc-dimm.h"
+#include "hw/acpi/memory_hotplug.h"
+#include "hw/acpi/acpi_dev_interface.h"
//#define DEBUG
@@ -81,6 +84,8 @@ typedef struct PIIX4PMState {
AcpiCpuHotplug gpe_cpu;
Notifier cpu_added_notifier;
+
+ MemHotplugState acpi_memory_hotplug;
} PIIX4PMState;
#define TYPE_PIIX4_PM "PIIX4_PM"
@@ -244,6 +249,23 @@ static bool vmstate_test_no_use_acpi_pci_hotplug(void *opaque, int version_id)
return !s->use_acpi_pci_hotplug;
}
+static bool vmstate_test_use_memhp(void *opaque)
+{
+ PIIX4PMState *s = opaque;
+ return s->acpi_memory_hotplug.is_enabled;
+}
+
+static const VMStateDescription vmstate_memhp_state = {
+ .name = "piix4_pm/memhp",
+ .version_id = 1,
+ .minimum_version_id = 1,
+ .minimum_version_id_old = 1,
+ .fields = (VMStateField[]) {
+ VMSTATE_MEMORY_HOTPLUG(acpi_memory_hotplug, PIIX4PMState),
+ VMSTATE_END_OF_LIST()
+ }
+};
+
/* qemu-kvm 1.2 uses version 3 but advertised as 2
* To support incoming qemu-kvm 1.2 migration, change version_id
* and minimum_version_id to 2 below (which breaks migration from
@@ -275,6 +297,13 @@ static const VMStateDescription vmstate_acpi = {
VMSTATE_PCI_HOTPLUG(acpi_pci_hotplug, PIIX4PMState,
vmstate_test_use_acpi_pci_hotplug),
VMSTATE_END_OF_LIST()
+ },
+ .subsections = (VMStateSubsection[]) {
+ {
+ .vmsd = &vmstate_memhp_state,
+ .needed = vmstate_test_use_memhp,
+ },
+ VMSTATE_END_OF_LIST()
}
};
@@ -308,19 +337,35 @@ static void piix4_pm_powerdown_req(Notifier *n, void *opaque)
acpi_pm1_evt_power_down(&s->ar);
}
-static void piix4_pci_device_plug_cb(HotplugHandler *hotplug_dev,
- DeviceState *dev, Error **errp)
+static void piix4_device_plug_cb(HotplugHandler *hotplug_dev,
+ DeviceState *dev, Error **errp)
{
PIIX4PMState *s = PIIX4_PM(hotplug_dev);
- acpi_pcihp_device_plug_cb(&s->ar, s->irq, &s->acpi_pci_hotplug, dev, errp);
+
+ if (s->acpi_memory_hotplug.is_enabled &&
+ object_dynamic_cast(OBJECT(dev), TYPE_PC_DIMM)) {
+ acpi_memory_plug_cb(&s->ar, s->irq, &s->acpi_memory_hotplug, dev, errp);
+ } else if (object_dynamic_cast(OBJECT(dev), TYPE_PCI_DEVICE)) {
+ acpi_pcihp_device_plug_cb(&s->ar, s->irq, &s->acpi_pci_hotplug, dev,
+ errp);
+ } else {
+ error_setg(errp, "acpi: device plug request for not supported device"
+ " type: %s", object_get_typename(OBJECT(dev)));
+ }
}
-static void piix4_pci_device_unplug_cb(HotplugHandler *hotplug_dev,
- DeviceState *dev, Error **errp)
+static void piix4_device_unplug_cb(HotplugHandler *hotplug_dev,
+ DeviceState *dev, Error **errp)
{
PIIX4PMState *s = PIIX4_PM(hotplug_dev);
- acpi_pcihp_device_unplug_cb(&s->ar, s->irq, &s->acpi_pci_hotplug, dev,
- errp);
+
+ if (object_dynamic_cast(OBJECT(dev), TYPE_PCI_DEVICE)) {
+ acpi_pcihp_device_unplug_cb(&s->ar, s->irq, &s->acpi_pci_hotplug, dev,
+ errp);
+ } else {
+ error_setg(errp, "acpi: device unplug request for not supported device"
+ " type: %s", object_get_typename(OBJECT(dev)));
+ }
}
static void piix4_update_bus_hotplug(PCIBus *pci_bus, void *opaque)
@@ -439,13 +484,17 @@ Object *piix4_pm_find(void)
I2CBus *piix4_pm_init(PCIBus *bus, int devfn, uint32_t smb_io_base,
qemu_irq sci_irq, qemu_irq smi_irq,
- int kvm_enabled, FWCfgState *fw_cfg)
+ int kvm_enabled, FWCfgState *fw_cfg,
+ DeviceState **piix4_pm)
{
DeviceState *dev;
PIIX4PMState *s;
dev = DEVICE(pci_create(bus, devfn, TYPE_PIIX4_PM));
qdev_prop_set_uint32(dev, "smb_io_base", smb_io_base);
+ if (piix4_pm) {
+ *piix4_pm = dev;
+ }
s = PIIX4_PM(dev);
s->irq = sci_irq;
@@ -518,6 +567,17 @@ static void piix4_acpi_system_hot_add_init(MemoryRegion *parent,
PIIX4_CPU_HOTPLUG_IO_BASE);
s->cpu_added_notifier.notify = piix4_cpu_added_req;
qemu_register_cpu_added_notifier(&s->cpu_added_notifier);
+
+ if (s->acpi_memory_hotplug.is_enabled) {
+ acpi_memory_hotplug_init(parent, OBJECT(s), &s->acpi_memory_hotplug);
+ }
+}
+
+static void piix4_ospm_status(AcpiDeviceIf *adev, ACPIOSTInfoList ***list)
+{
+ PIIX4PMState *s = PIIX4_PM(adev);
+
+ acpi_memory_ospm_status(&s->acpi_memory_hotplug, list);
}
static Property piix4_pm_properties[] = {
@@ -527,6 +587,8 @@ static Property piix4_pm_properties[] = {
DEFINE_PROP_UINT8(ACPI_PM_PROP_S4_VAL, PIIX4PMState, s4_val, 2),
DEFINE_PROP_BOOL("acpi-pci-hotplug-with-bridge-support", PIIX4PMState,
use_acpi_pci_hotplug, true),
+ DEFINE_PROP_BOOL("memory-hotplug-support", PIIX4PMState,
+ acpi_memory_hotplug.is_enabled, true),
DEFINE_PROP_END_OF_LIST(),
};
@@ -535,6 +597,7 @@ static void piix4_pm_class_init(ObjectClass *klass, void *data)
DeviceClass *dc = DEVICE_CLASS(klass);
PCIDeviceClass *k = PCI_DEVICE_CLASS(klass);
HotplugHandlerClass *hc = HOTPLUG_HANDLER_CLASS(klass);
+ AcpiDeviceIfClass *adevc = ACPI_DEVICE_IF_CLASS(klass);
k->init = piix4_pm_initfn;
k->config_write = pm_write_config;
@@ -551,8 +614,9 @@ static void piix4_pm_class_init(ObjectClass *klass, void *data)
*/
dc->cannot_instantiate_with_device_add_yet = true;
dc->hotpluggable = false;
- hc->plug = piix4_pci_device_plug_cb;
- hc->unplug = piix4_pci_device_unplug_cb;
+ hc->plug = piix4_device_plug_cb;
+ hc->unplug = piix4_device_unplug_cb;
+ adevc->ospm_status = piix4_ospm_status;
}
static const TypeInfo piix4_pm_info = {
@@ -562,6 +626,7 @@ static const TypeInfo piix4_pm_info = {
.class_init = piix4_pm_class_init,
.interfaces = (InterfaceInfo[]) {
{ TYPE_HOTPLUG_HANDLER },
+ { TYPE_ACPI_DEVICE_IF },
{ }
}
};
diff --git a/hw/core/qdev.c b/hw/core/qdev.c
index e65a5aa3a8..b9cd4fc814 100644
--- a/hw/core/qdev.c
+++ b/hw/core/qdev.c
@@ -34,6 +34,7 @@
#include "qapi/qmp/qjson.h"
#include "monitor/monitor.h"
#include "hw/hotplug.h"
+#include "hw/boards.h"
int qdev_hotplug = 0;
static bool qdev_hot_added = false;
@@ -567,32 +568,35 @@ static void bus_set_realized(Object *obj, bool value, Error **errp)
{
BusState *bus = BUS(obj);
BusClass *bc = BUS_GET_CLASS(bus);
+ BusChild *kid;
Error *local_err = NULL;
if (value && !bus->realized) {
if (bc->realize) {
bc->realize(bus, &local_err);
+ }
+ /* TODO: recursive realization */
+ } else if (!value && bus->realized) {
+ QTAILQ_FOREACH(kid, &bus->children, sibling) {
+ DeviceState *dev = kid->child;
+ object_property_set_bool(OBJECT(dev), false, "realized",
+ &local_err);
if (local_err != NULL) {
- goto error;
+ break;
}
-
}
- } else if (!value && bus->realized) {
- if (bc->unrealize) {
+ if (bc->unrealize && local_err == NULL) {
bc->unrealize(bus, &local_err);
-
- if (local_err != NULL) {
- goto error;
- }
}
}
- bus->realized = value;
- return;
+ if (local_err != NULL) {
+ error_propagate(errp, local_err);
+ return;
+ }
-error:
- error_propagate(errp, local_err);
+ bus->realized = value;
}
void qbus_create_inplace(void *bus, size_t size, const char *typename,
@@ -813,6 +817,18 @@ static void device_set_realized(Object *obj, bool value, Error **errp)
local_err == NULL) {
hotplug_handler_plug(dev->parent_bus->hotplug_handler,
dev, &local_err);
+ } else if (local_err == NULL &&
+ object_dynamic_cast(qdev_get_machine(), TYPE_MACHINE)) {
+ HotplugHandler *hotplug_ctrl;
+ MachineState *machine = MACHINE(qdev_get_machine());
+ MachineClass *mc = MACHINE_GET_CLASS(machine);
+
+ if (mc->get_hotplug_handler) {
+ hotplug_ctrl = mc->get_hotplug_handler(machine, dev);
+ if (hotplug_ctrl) {
+ hotplug_handler_plug(hotplug_ctrl, dev, &local_err);
+ }
+ }
}
if (qdev_get_vmsd(dev) && local_err == NULL) {
@@ -865,6 +881,20 @@ static bool device_get_hotpluggable(Object *obj, Error **errp)
dev->parent_bus->allow_hotplug);
}
+static bool device_get_hotplugged(Object *obj, Error **err)
+{
+ DeviceState *dev = DEVICE(obj);
+
+ return dev->hotplugged;
+}
+
+static void device_set_hotplugged(Object *obj, bool value, Error **err)
+{
+ DeviceState *dev = DEVICE(obj);
+
+ dev->hotplugged = value;
+}
+
static void device_initfn(Object *obj)
{
DeviceState *dev = DEVICE(obj);
@@ -883,6 +913,9 @@ static void device_initfn(Object *obj)
device_get_realized, device_set_realized, NULL);
object_property_add_bool(obj, "hotpluggable",
device_get_hotpluggable, NULL, NULL);
+ object_property_add_bool(obj, "hotplugged",
+ device_get_hotplugged, device_set_hotplugged,
+ &error_abort);
class = object_get_class(OBJECT(dev));
do {
diff --git a/hw/i386/Makefile.objs b/hw/i386/Makefile.objs
index f66c349508..48014abf0a 100644
--- a/hw/i386/Makefile.objs
+++ b/hw/i386/Makefile.objs
@@ -9,7 +9,8 @@ obj-y += acpi-build.o
obj-y += bios-linker-loader.o
hw/i386/acpi-build.o: hw/i386/acpi-build.c hw/i386/acpi-dsdt.hex \
hw/i386/ssdt-proc.hex hw/i386/ssdt-pcihp.hex hw/i386/ssdt-misc.hex \
- hw/i386/acpi-dsdt.hex hw/i386/q35-acpi-dsdt.hex
+ hw/i386/acpi-dsdt.hex hw/i386/q35-acpi-dsdt.hex \
+ hw/i386/q35-acpi-dsdt.hex hw/i386/ssdt-mem.hex
iasl-option=$(shell if test -z "`$(1) $(2) 2>&1 > /dev/null`" \
; then echo "$(2)"; else echo "$(3)"; fi ;)
@@ -17,7 +18,7 @@ iasl-option=$(shell if test -z "`$(1) $(2) 2>&1 > /dev/null`" \
ifdef IASL
#IASL Present. Generate hex files from .dsl
hw/i386/%.hex: $(SRC_PATH)/hw/i386/%.dsl $(SRC_PATH)/scripts/acpi_extract_preprocess.py $(SRC_PATH)/scripts/acpi_extract.py
- $(call quiet-command, cpp -P $(QEMU_DGFLAGS) $(QEMU_INCLUDES) $< -o $*.dsl.i.orig, " CPP $(TARGET_DIR)$*.dsl.i.orig")
+ $(call quiet-command, $(CPP) -x c -P $(QEMU_DGFLAGS) $(QEMU_INCLUDES) $< -o $*.dsl.i.orig, " CPP $(TARGET_DIR)$*.dsl.i.orig")
$(call quiet-command, $(PYTHON) $(SRC_PATH)/scripts/acpi_extract_preprocess.py $*.dsl.i.orig > $*.dsl.i, " ACPI_PREPROCESS $(TARGET_DIR)$*.dsl.i")
$(call quiet-command, $(IASL) $(call iasl-option,$(IASL),-Pn,) -vs -l -tc -p $* $*.dsl.i $(if $(V), , > /dev/null) 2>&1 ," IASL $(TARGET_DIR)$*.dsl.i")
$(call quiet-command, $(PYTHON) $(SRC_PATH)/scripts/acpi_extract.py $*.lst > $*.off, " ACPI_EXTRACT $(TARGET_DIR)$*.off")
diff --git a/hw/i386/acpi-build.c b/hw/i386/acpi-build.c
index 1e0aa09bc8..ebc5f034e3 100644
--- a/hw/i386/acpi-build.c
+++ b/hw/i386/acpi-build.c
@@ -37,6 +37,7 @@
#include "bios-linker-loader.h"
#include "hw/loader.h"
#include "hw/isa/isa.h"
+#include "hw/acpi/memory_hotplug.h"
/* Supported chipsets: */
#include "hw/acpi/piix4.h"
@@ -667,6 +668,14 @@ static inline char acpi_get_hex(uint32_t val)
#define ACPI_PCIQXL_SIZEOF (*ssdt_pciqxl_end - *ssdt_pciqxl_start)
#define ACPI_PCIQXL_AML (ssdp_pcihp_aml + *ssdt_pciqxl_start)
+#include "hw/i386/ssdt-mem.hex"
+
+/* 0x5B 0x82 DeviceOp PkgLength NameString DimmID */
+#define ACPI_MEM_OFFSET_HEX (*ssdt_mem_name - *ssdt_mem_start + 2)
+#define ACPI_MEM_OFFSET_ID (*ssdt_mem_id - *ssdt_mem_start + 7)
+#define ACPI_MEM_SIZEOF (*ssdt_mem_end - *ssdt_mem_start)
+#define ACPI_MEM_AML (ssdm_mem_aml + *ssdt_mem_start)
+
#define ACPI_SSDT_SIGNATURE 0x54445353 /* SSDT */
#define ACPI_SSDT_HEADER_LENGTH 36
@@ -1003,6 +1012,8 @@ build_ssdt(GArray *table_data, GArray *linker,
AcpiCpuInfo *cpu, AcpiPmInfo *pm, AcpiMiscInfo *misc,
PcPciInfo *pci, PcGuestInfo *guest_info)
{
+ MachineState *machine = MACHINE(qdev_get_machine());
+ uint32_t nr_mem = machine->ram_slots;
unsigned acpi_cpus = guest_info->apic_id_limit;
int ssdt_start = table_data->len;
uint8_t *ssdt_ptr;
@@ -1031,6 +1042,9 @@ build_ssdt(GArray *table_data, GArray *linker,
ACPI_BUILD_SET_LE(ssdt_ptr, sizeof(ssdp_misc_aml),
ssdt_isa_pest[0], 16, misc->pvpanic_port);
+ ACPI_BUILD_SET_LE(ssdt_ptr, sizeof(ssdp_misc_aml),
+ ssdt_mctrl_nr_slots[0], 32, nr_mem);
+
{
GArray *sb_scope = build_alloc_array();
uint8_t op = 0x10; /* ScopeOp */
@@ -1084,6 +1098,27 @@ build_ssdt(GArray *table_data, GArray *linker,
build_free_array(package);
}
+ if (nr_mem) {
+ assert(nr_mem <= ACPI_MAX_RAM_SLOTS);
+ /* build memory devices */
+ for (i = 0; i < nr_mem; i++) {
+ char id[3];
+ uint8_t *mem = acpi_data_push(sb_scope, ACPI_MEM_SIZEOF);
+
+ snprintf(id, sizeof(id), "%02X", i);
+ memcpy(mem, ACPI_MEM_AML, ACPI_MEM_SIZEOF);
+ memcpy(mem + ACPI_MEM_OFFSET_HEX, id, 2);
+ memcpy(mem + ACPI_MEM_OFFSET_ID, id, 2);
+ }
+
+ /* build Method(MEMORY_SLOT_NOTIFY_METHOD, 2) {
+ * If (LEqual(Arg0, 0x00)) {Notify(MP00, Arg1)} ...
+ */
+ build_append_notify_method(sb_scope,
+ stringify(MEMORY_SLOT_NOTIFY_METHOD),
+ "MP%0.02X", nr_mem);
+ }
+
{
AcpiBuildPciBusHotplugState hotplug_state;
Object *pci_host;
@@ -1132,15 +1167,22 @@ build_hpet(GArray *table_data, GArray *linker)
(void *)hpet, "HPET", sizeof(*hpet), 1);
}
+typedef enum {
+ MEM_AFFINITY_NOFLAGS = 0,
+ MEM_AFFINITY_ENABLED = (1 << 0),
+ MEM_AFFINITY_HOTPLUGGABLE = (1 << 1),
+ MEM_AFFINITY_NON_VOLATILE = (1 << 2),
+} MemoryAffinityFlags;
+
static void
-acpi_build_srat_memory(AcpiSratMemoryAffinity *numamem,
- uint64_t base, uint64_t len, int node, int enabled)
+acpi_build_srat_memory(AcpiSratMemoryAffinity *numamem, uint64_t base,
+ uint64_t len, int node, MemoryAffinityFlags flags)
{
numamem->type = ACPI_SRAT_MEMORY;
numamem->length = sizeof(*numamem);
memset(numamem->proximity, 0, 4);
numamem->proximity[0] = node;
- numamem->flags = cpu_to_le32(!!enabled);
+ numamem->flags = cpu_to_le32(flags);
numamem->base_addr = cpu_to_le64(base);
numamem->range_length = cpu_to_le64(len);
}
@@ -1157,6 +1199,10 @@ build_srat(GArray *table_data, GArray *linker,
uint64_t curnode;
int srat_start, numa_start, slots;
uint64_t mem_len, mem_base, next_base;
+ PCMachineState *pcms = PC_MACHINE(qdev_get_machine());
+ ram_addr_t hotplugabble_address_space_size =
+ object_property_get_int(OBJECT(pcms), PC_MACHINE_MEMHP_REGION_SIZE,
+ NULL);
srat_start = table_data->len;
@@ -1188,7 +1234,7 @@ build_srat(GArray *table_data, GArray *linker,
numa_start = table_data->len;
numamem = acpi_data_push(table_data, sizeof *numamem);
- acpi_build_srat_memory(numamem, 0, 640*1024, 0, 1);
+ acpi_build_srat_memory(numamem, 0, 640*1024, 0, MEM_AFFINITY_ENABLED);
next_base = 1024 * 1024;
for (i = 1; i < guest_info->numa_nodes + 1; ++i) {
mem_base = next_base;
@@ -1204,19 +1250,34 @@ build_srat(GArray *table_data, GArray *linker,
mem_len -= next_base - guest_info->ram_size_below_4g;
if (mem_len > 0) {
numamem = acpi_data_push(table_data, sizeof *numamem);
- acpi_build_srat_memory(numamem, mem_base, mem_len, i-1, 1);
+ acpi_build_srat_memory(numamem, mem_base, mem_len, i - 1,
+ MEM_AFFINITY_ENABLED);
}
mem_base = 1ULL << 32;
mem_len = next_base - guest_info->ram_size_below_4g;
next_base += (1ULL << 32) - guest_info->ram_size_below_4g;
}
numamem = acpi_data_push(table_data, sizeof *numamem);
- acpi_build_srat_memory(numamem, mem_base, mem_len, i - 1, 1);
+ acpi_build_srat_memory(numamem, mem_base, mem_len, i - 1,
+ MEM_AFFINITY_ENABLED);
}
slots = (table_data->len - numa_start) / sizeof *numamem;
for (; slots < guest_info->numa_nodes + 2; slots++) {
numamem = acpi_data_push(table_data, sizeof *numamem);
- acpi_build_srat_memory(numamem, 0, 0, 0, 0);
+ acpi_build_srat_memory(numamem, 0, 0, 0, MEM_AFFINITY_NOFLAGS);
+ }
+
+ /*
+ * Entry is required for Windows to enable memory hotplug in OS.
+ * Memory devices may override proximity set by this entry,
+ * providing _PXM method if necessary.
+ */
+ if (hotplugabble_address_space_size) {
+ numamem = acpi_data_push(table_data, sizeof *numamem);
+ acpi_build_srat_memory(numamem, pcms->hotplug_memory_base,
+ hotplugabble_address_space_size, 0,
+ MEM_AFFINITY_HOTPLUGGABLE |
+ MEM_AFFINITY_ENABLED);
}
build_header(linker, table_data,
diff --git a/hw/i386/acpi-dsdt.dsl b/hw/i386/acpi-dsdt.dsl
index 0a1e252d21..3cc0ea0f9a 100644
--- a/hw/i386/acpi-dsdt.dsl
+++ b/hw/i386/acpi-dsdt.dsl
@@ -306,7 +306,7 @@ DefinitionBlock (
}
}
-#include "hw/acpi/cpu_hotplug_defs.h"
+#include "hw/acpi/pc-hotplug.h"
#define CPU_STATUS_BASE PIIX4_CPU_HOTPLUG_IO_BASE
#include "acpi-dsdt-cpu-hotplug.dsl"
@@ -314,6 +314,7 @@ DefinitionBlock (
/****************************************************************
* General purpose events
****************************************************************/
+ External(\_SB.PCI0.MEMORY_HOPTLUG_DEVICE.MEMORY_SLOT_SCAN_METHOD, MethodObj)
Scope(\_GPE) {
Name(_HID, "ACPI0006")
@@ -330,7 +331,9 @@ DefinitionBlock (
// CPU hotplug event
\_SB.PRSC()
}
- Method(_L03) {
+ Method(_E03) {
+ // Memory hotplug event
+ \_SB.PCI0.MEMORY_HOPTLUG_DEVICE.MEMORY_SLOT_SCAN_METHOD()
}
Method(_L04) {
}
diff --git a/hw/i386/acpi-dsdt.hex.generated b/hw/i386/acpi-dsdt.hex.generated
index e61572a5dd..ee490e89c3 100644
--- a/hw/i386/acpi-dsdt.hex.generated
+++ b/hw/i386/acpi-dsdt.hex.generated
@@ -3,12 +3,12 @@ static unsigned char AcpiDsdtAmlCode[] = {
0x53,
0x44,
0x54,
-0x80,
+0x93,
0x11,
0x0,
0x0,
0x1,
-0x60,
+0xf5,
0x42,
0x58,
0x50,
@@ -4285,8 +4285,8 @@ static unsigned char AcpiDsdtAmlCode[] = {
0xa,
0xb,
0x10,
-0x42,
-0xc,
+0x45,
+0xd,
0x5f,
0x47,
0x50,
@@ -4389,12 +4389,31 @@ static unsigned char AcpiDsdtAmlCode[] = {
0x53,
0x43,
0x14,
-0x6,
+0x19,
0x5f,
-0x4c,
+0x45,
0x30,
0x33,
0x0,
+0x5c,
+0x2f,
+0x4,
+0x5f,
+0x53,
+0x42,
+0x5f,
+0x50,
+0x43,
+0x49,
+0x30,
+0x4d,
+0x48,
+0x50,
+0x44,
+0x4d,
+0x53,
+0x43,
+0x4e,
0x14,
0x6,
0x5f,
diff --git a/hw/i386/pc.c b/hw/i386/pc.c
index 3e0ecf140d..67eb45089e 100644
--- a/hw/i386/pc.c
+++ b/hw/i386/pc.c
@@ -58,6 +58,9 @@
#include "hw/boards.h"
#include "hw/pci/pci_host.h"
#include "acpi-build.h"
+#include "hw/mem/pc-dimm.h"
+#include "trace.h"
+#include "qapi/visitor.h"
/* debug PC/ISA interrupts */
//#define DEBUG_IRQ
@@ -701,14 +704,14 @@ static FWCfgState *bochs_bios_init(void)
unsigned int apic_id = x86_cpu_apic_id_from_index(i);
assert(apic_id < apic_id_limit);
for (j = 0; j < nb_numa_nodes; j++) {
- if (test_bit(i, node_cpumask[j])) {
+ if (test_bit(i, numa_info[j].node_cpu)) {
numa_fw_cfg[apic_id + 1] = cpu_to_le64(j);
break;
}
}
}
for (i = 0; i < nb_numa_nodes; i++) {
- numa_fw_cfg[apic_id_limit + 1 + i] = cpu_to_le64(node_mem[i]);
+ numa_fw_cfg[apic_id_limit + 1 + i] = cpu_to_le64(numa_info[i].node_mem);
}
fw_cfg_add_bytes(fw_cfg, FW_CFG_NUMA, numa_fw_cfg,
(1 + apic_id_limit + nb_numa_nodes) *
@@ -1119,8 +1122,12 @@ PcGuestInfo *pc_guest_info_init(ram_addr_t below_4g_mem_size,
guest_info->apic_id_limit = pc_apic_id_limit(max_cpus);
guest_info->apic_xrupt_override = kvm_allows_irq0_override();
guest_info->numa_nodes = nb_numa_nodes;
- guest_info->node_mem = g_memdup(node_mem, guest_info->numa_nodes *
+ guest_info->node_mem = g_malloc0(guest_info->numa_nodes *
sizeof *guest_info->node_mem);
+ for (i = 0; i < nb_numa_nodes; i++) {
+ guest_info->node_mem[i] = numa_info[i].node_mem;
+ }
+
guest_info->node_cpu = g_malloc0(guest_info->apic_id_limit *
sizeof *guest_info->node_cpu);
@@ -1128,7 +1135,7 @@ PcGuestInfo *pc_guest_info_init(ram_addr_t below_4g_mem_size,
unsigned int apic_id = x86_cpu_apic_id_from_index(i);
assert(apic_id < guest_info->apic_id_limit);
for (j = 0; j < nb_numa_nodes; j++) {
- if (test_bit(i, node_cpumask[j])) {
+ if (test_bit(i, numa_info[j].node_cpu)) {
guest_info->node_cpu[apic_id] = j;
break;
}
@@ -1183,10 +1190,8 @@ void pc_acpi_init(const char *default_dsdt)
}
}
-FWCfgState *pc_memory_init(MemoryRegion *system_memory,
- const char *kernel_filename,
- const char *kernel_cmdline,
- const char *initrd_filename,
+FWCfgState *pc_memory_init(MachineState *machine,
+ MemoryRegion *system_memory,
ram_addr_t below_4g_mem_size,
ram_addr_t above_4g_mem_size,
MemoryRegion *rom_memory,
@@ -1197,17 +1202,19 @@ FWCfgState *pc_memory_init(MemoryRegion *system_memory,
MemoryRegion *ram, *option_rom_mr;
MemoryRegion *ram_below_4g, *ram_above_4g;
FWCfgState *fw_cfg;
+ PCMachineState *pcms = PC_MACHINE(machine);
- linux_boot = (kernel_filename != NULL);
+ assert(machine->ram_size == below_4g_mem_size + above_4g_mem_size);
+
+ linux_boot = (machine->kernel_filename != NULL);
/* Allocate RAM. We allocate it as a single memory region and use
* aliases to address portions of it, mostly for backwards compatibility
* with older qemus that used qemu_ram_alloc().
*/
ram = g_malloc(sizeof(*ram));
- memory_region_init_ram(ram, NULL, "pc.ram",
- below_4g_mem_size + above_4g_mem_size);
- vmstate_register_ram_global(ram);
+ memory_region_allocate_system_memory(ram, NULL, "pc.ram",
+ machine->ram_size);
*ram_memory = ram;
ram_below_4g = g_malloc(sizeof(*ram_below_4g));
memory_region_init_alias(ram_below_4g, NULL, "ram-below-4g", ram,
@@ -1223,6 +1230,43 @@ FWCfgState *pc_memory_init(MemoryRegion *system_memory,
e820_add_entry(0x100000000ULL, above_4g_mem_size, E820_RAM);
}
+ if (!guest_info->has_reserved_memory &&
+ (machine->ram_slots ||
+ (machine->maxram_size > machine->ram_size))) {
+ MachineClass *mc = MACHINE_GET_CLASS(machine);
+
+ error_report("\"-memory 'slots|maxmem'\" is not supported by: %s",
+ mc->name);
+ exit(EXIT_FAILURE);
+ }
+
+ /* initialize hotplug memory address space */
+ if (guest_info->has_reserved_memory &&
+ (machine->ram_size < machine->maxram_size)) {
+ ram_addr_t hotplug_mem_size =
+ machine->maxram_size - machine->ram_size;
+
+ if (machine->ram_slots > ACPI_MAX_RAM_SLOTS) {
+ error_report("unsupported amount of memory slots: %"PRIu64,
+ machine->ram_slots);
+ exit(EXIT_FAILURE);
+ }
+
+ pcms->hotplug_memory_base =
+ ROUND_UP(0x100000000ULL + above_4g_mem_size, 1ULL << 30);
+
+ if ((pcms->hotplug_memory_base + hotplug_mem_size) <
+ hotplug_mem_size) {
+ error_report("unsupported amount of maximum memory: " RAM_ADDR_FMT,
+ machine->maxram_size);
+ exit(EXIT_FAILURE);
+ }
+
+ memory_region_init(&pcms->hotplug_memory, OBJECT(pcms),
+ "hotplug-memory", hotplug_mem_size);
+ memory_region_add_subregion(system_memory, pcms->hotplug_memory_base,
+ &pcms->hotplug_memory);
+ }
/* Initialize PC system firmware */
pc_system_firmware_init(rom_memory, guest_info->isapc_ram_fw);
@@ -1238,8 +1282,15 @@ FWCfgState *pc_memory_init(MemoryRegion *system_memory,
fw_cfg = bochs_bios_init();
rom_set_fw(fw_cfg);
+ if (guest_info->has_reserved_memory && pcms->hotplug_memory_base) {
+ uint64_t *val = g_malloc(sizeof(*val));
+ *val = cpu_to_le64(ROUND_UP(pcms->hotplug_memory_base, 0x1ULL << 30));
+ fw_cfg_add_file(fw_cfg, "etc/reserved-memory-end", val, sizeof(*val));
+ }
+
if (linux_boot) {
- load_linux(fw_cfg, kernel_filename, initrd_filename, kernel_cmdline, below_4g_mem_size);
+ load_linux(fw_cfg, machine->kernel_filename, machine->initrd_filename,
+ machine->kernel_cmdline, below_4g_mem_size);
}
for (i = 0; i < nb_option_roms; i++) {
@@ -1455,3 +1506,178 @@ void ioapic_init_gsi(GSIState *gsi_state, const char *parent_name)
gsi_state->ioapic_irq[i] = qdev_get_gpio_in(dev, i);
}
}
+
+static void pc_generic_machine_class_init(ObjectClass *oc, void *data)
+{
+ MachineClass *mc = MACHINE_CLASS(oc);
+ QEMUMachine *qm = data;
+
+ mc->name = qm->name;
+ mc->alias = qm->alias;
+ mc->desc = qm->desc;
+ mc->init = qm->init;
+ mc->reset = qm->reset;
+ mc->hot_add_cpu = qm->hot_add_cpu;
+ mc->kvm_type = qm->kvm_type;
+ mc->block_default_type = qm->block_default_type;
+ mc->max_cpus = qm->max_cpus;
+ mc->no_serial = qm->no_serial;
+ mc->no_parallel = qm->no_parallel;
+ mc->use_virtcon = qm->use_virtcon;
+ mc->use_sclp = qm->use_sclp;
+ mc->no_floppy = qm->no_floppy;
+ mc->no_cdrom = qm->no_cdrom;
+ mc->no_sdcard = qm->no_sdcard;
+ mc->is_default = qm->is_default;
+ mc->default_machine_opts = qm->default_machine_opts;
+ mc->default_boot_order = qm->default_boot_order;
+ mc->compat_props = qm->compat_props;
+ mc->hw_version = qm->hw_version;
+}
+
+void qemu_register_pc_machine(QEMUMachine *m)
+{
+ char *name = g_strconcat(m->name, TYPE_MACHINE_SUFFIX, NULL);
+ TypeInfo ti = {
+ .name = name,
+ .parent = TYPE_PC_MACHINE,
+ .class_init = pc_generic_machine_class_init,
+ .class_data = (void *)m,
+ };
+
+ type_register(&ti);
+ g_free(name);
+}
+
+static void pc_dimm_plug(HotplugHandler *hotplug_dev,
+ DeviceState *dev, Error **errp)
+{
+ int slot;
+ HotplugHandlerClass *hhc;
+ Error *local_err = NULL;
+ PCMachineState *pcms = PC_MACHINE(hotplug_dev);
+ MachineState *machine = MACHINE(hotplug_dev);
+ PCDIMMDevice *dimm = PC_DIMM(dev);
+ PCDIMMDeviceClass *ddc = PC_DIMM_GET_CLASS(dimm);
+ MemoryRegion *mr = ddc->get_memory_region(dimm);
+ uint64_t addr = object_property_get_int(OBJECT(dimm), PC_DIMM_ADDR_PROP,
+ &local_err);
+ if (local_err) {
+ goto out;
+ }
+
+ addr = pc_dimm_get_free_addr(pcms->hotplug_memory_base,
+ memory_region_size(&pcms->hotplug_memory),
+ !addr ? NULL : &addr,
+ memory_region_size(mr), &local_err);
+ if (local_err) {
+ goto out;
+ }
+
+ object_property_set_int(OBJECT(dev), addr, PC_DIMM_ADDR_PROP, &local_err);
+ if (local_err) {
+ goto out;
+ }
+ trace_mhp_pc_dimm_assigned_address(addr);
+
+ slot = object_property_get_int(OBJECT(dev), PC_DIMM_SLOT_PROP, &local_err);
+ if (local_err) {
+ goto out;
+ }
+
+ slot = pc_dimm_get_free_slot(slot == PC_DIMM_UNASSIGNED_SLOT ? NULL : &slot,
+ machine->ram_slots, &local_err);
+ if (local_err) {
+ goto out;
+ }
+ object_property_set_int(OBJECT(dev), slot, PC_DIMM_SLOT_PROP, &local_err);
+ if (local_err) {
+ goto out;
+ }
+ trace_mhp_pc_dimm_assigned_slot(slot);
+
+ if (!pcms->acpi_dev) {
+ error_setg(&local_err,
+ "memory hotplug is not enabled: missing acpi device");
+ goto out;
+ }
+
+ memory_region_add_subregion(&pcms->hotplug_memory,
+ addr - pcms->hotplug_memory_base, mr);
+ vmstate_register_ram(mr, dev);
+
+ hhc = HOTPLUG_HANDLER_GET_CLASS(pcms->acpi_dev);
+ hhc->plug(HOTPLUG_HANDLER(pcms->acpi_dev), dev, &local_err);
+out:
+ error_propagate(errp, local_err);
+}
+
+static void pc_machine_device_plug_cb(HotplugHandler *hotplug_dev,
+ DeviceState *dev, Error **errp)
+{
+ if (object_dynamic_cast(OBJECT(dev), TYPE_PC_DIMM)) {
+ pc_dimm_plug(hotplug_dev, dev, errp);
+ }
+}
+
+static HotplugHandler *pc_get_hotpug_handler(MachineState *machine,
+ DeviceState *dev)
+{
+ PCMachineClass *pcmc = PC_MACHINE_GET_CLASS(machine);
+
+ if (object_dynamic_cast(OBJECT(dev), TYPE_PC_DIMM)) {
+ return HOTPLUG_HANDLER(machine);
+ }
+
+ return pcmc->get_hotplug_handler ?
+ pcmc->get_hotplug_handler(machine, dev) : NULL;
+}
+
+static void
+pc_machine_get_hotplug_memory_region_size(Object *obj, Visitor *v, void *opaque,
+ const char *name, Error **errp)
+{
+ PCMachineState *pcms = PC_MACHINE(obj);
+ int64_t value = memory_region_size(&pcms->hotplug_memory);
+
+ visit_type_int(v, &value, name, errp);
+}
+
+static void pc_machine_initfn(Object *obj)
+{
+ object_property_add(obj, PC_MACHINE_MEMHP_REGION_SIZE, "int",
+ pc_machine_get_hotplug_memory_region_size,
+ NULL, NULL, NULL, NULL);
+}
+
+static void pc_machine_class_init(ObjectClass *oc, void *data)
+{
+ MachineClass *mc = MACHINE_CLASS(oc);
+ PCMachineClass *pcmc = PC_MACHINE_CLASS(oc);
+ HotplugHandlerClass *hc = HOTPLUG_HANDLER_CLASS(oc);
+
+ pcmc->get_hotplug_handler = mc->get_hotplug_handler;
+ mc->get_hotplug_handler = pc_get_hotpug_handler;
+ hc->plug = pc_machine_device_plug_cb;
+}
+
+static const TypeInfo pc_machine_info = {
+ .name = TYPE_PC_MACHINE,
+ .parent = TYPE_MACHINE,
+ .abstract = true,
+ .instance_size = sizeof(PCMachineState),
+ .instance_init = pc_machine_initfn,
+ .class_size = sizeof(PCMachineClass),
+ .class_init = pc_machine_class_init,
+ .interfaces = (InterfaceInfo[]) {
+ { TYPE_HOTPLUG_HANDLER },
+ { }
+ },
+};
+
+static void pc_machine_register_types(void)
+{
+ type_register_static(&pc_machine_info);
+}
+
+type_init(pc_machine_register_types)
diff --git a/hw/i386/pc_piix.c b/hw/i386/pc_piix.c
index a48e26367d..3e7524b961 100644
--- a/hw/i386/pc_piix.c
+++ b/hw/i386/pc_piix.c
@@ -67,12 +67,14 @@ static bool smbios_legacy_mode;
* pages in the host.
*/
static bool gigabyte_align = true;
+static bool has_reserved_memory = true;
/* PC hardware initialisation */
static void pc_init1(MachineState *machine,
int pci_enabled,
int kvmclock_enabled)
{
+ PCMachineState *pc_machine = PC_MACHINE(machine);
MemoryRegion *system_memory = get_system_memory();
MemoryRegion *system_io = get_system_io();
int i;
@@ -143,6 +145,7 @@ static void pc_init1(MachineState *machine,
guest_info->has_pci_info = has_pci_info;
guest_info->isapc_ram_fw = !pci_enabled;
+ guest_info->has_reserved_memory = has_reserved_memory;
if (smbios_defaults) {
MachineClass *mc = MACHINE_GET_CLASS(machine);
@@ -153,11 +156,9 @@ static void pc_init1(MachineState *machine,
/* allocate ram and load rom/bios */
if (!xen_enabled()) {
- fw_cfg = pc_memory_init(system_memory,
- machine->kernel_filename, machine->kernel_cmdline,
- machine->initrd_filename,
- below_4g_mem_size, above_4g_mem_size,
- rom_memory, &ram_memory, guest_info);
+ fw_cfg = pc_memory_init(machine, system_memory,
+ below_4g_mem_size, above_4g_mem_size,
+ rom_memory, &ram_memory, guest_info);
}
gsi_state = g_malloc0(sizeof(*gsi_state));
@@ -244,14 +245,23 @@ static void pc_init1(MachineState *machine,
}
if (pci_enabled && acpi_enabled) {
+ DeviceState *piix4_pm;
I2CBus *smbus;
smi_irq = qemu_allocate_irqs(pc_acpi_smi_interrupt, first_cpu, 1);
/* TODO: Populate SPD eeprom data. */
smbus = piix4_pm_init(pci_bus, piix3_devfn + 3, 0xb100,
gsi[9], *smi_irq,
- kvm_enabled(), fw_cfg);
+ kvm_enabled(), fw_cfg, &piix4_pm);
smbus_eeprom_init(smbus, 8, NULL, 0);
+
+ object_property_add_link(OBJECT(machine), PC_MACHINE_ACPI_DEVICE_PROP,
+ TYPE_HOTPLUG_HANDLER,
+ (Object **)&pc_machine->acpi_dev,
+ object_property_allow_set_link,
+ OBJ_PROP_LINK_UNREF_ON_RELEASE, &error_abort);
+ object_property_set_link(OBJECT(machine), OBJECT(piix4_pm),
+ PC_MACHINE_ACPI_DEVICE_PROP, &error_abort);
}
if (pci_enabled) {
@@ -267,6 +277,7 @@ static void pc_init_pci(MachineState *machine)
static void pc_compat_2_0(MachineState *machine)
{
smbios_legacy_mode = true;
+ has_reserved_memory = false;
}
static void pc_compat_1_7(MachineState *machine)
@@ -843,25 +854,25 @@ static QEMUMachine xenfv_machine = {
static void pc_machine_init(void)
{
- qemu_register_machine(&pc_i440fx_machine_v2_1);
- qemu_register_machine(&pc_i440fx_machine_v2_0);
- qemu_register_machine(&pc_i440fx_machine_v1_7);
- qemu_register_machine(&pc_i440fx_machine_v1_6);
- qemu_register_machine(&pc_i440fx_machine_v1_5);
- qemu_register_machine(&pc_i440fx_machine_v1_4);
- qemu_register_machine(&pc_machine_v1_3);
- qemu_register_machine(&pc_machine_v1_2);
- qemu_register_machine(&pc_machine_v1_1);
- qemu_register_machine(&pc_machine_v1_0);
- qemu_register_machine(&pc_machine_v0_15);
- qemu_register_machine(&pc_machine_v0_14);
- qemu_register_machine(&pc_machine_v0_13);
- qemu_register_machine(&pc_machine_v0_12);
- qemu_register_machine(&pc_machine_v0_11);
- qemu_register_machine(&pc_machine_v0_10);
- qemu_register_machine(&isapc_machine);
+ qemu_register_pc_machine(&pc_i440fx_machine_v2_1);
+ qemu_register_pc_machine(&pc_i440fx_machine_v2_0);
+ qemu_register_pc_machine(&pc_i440fx_machine_v1_7);
+ qemu_register_pc_machine(&pc_i440fx_machine_v1_6);
+ qemu_register_pc_machine(&pc_i440fx_machine_v1_5);
+ qemu_register_pc_machine(&pc_i440fx_machine_v1_4);
+ qemu_register_pc_machine(&pc_machine_v1_3);
+ qemu_register_pc_machine(&pc_machine_v1_2);
+ qemu_register_pc_machine(&pc_machine_v1_1);
+ qemu_register_pc_machine(&pc_machine_v1_0);
+ qemu_register_pc_machine(&pc_machine_v0_15);
+ qemu_register_pc_machine(&pc_machine_v0_14);
+ qemu_register_pc_machine(&pc_machine_v0_13);
+ qemu_register_pc_machine(&pc_machine_v0_12);
+ qemu_register_pc_machine(&pc_machine_v0_11);
+ qemu_register_pc_machine(&pc_machine_v0_10);
+ qemu_register_pc_machine(&isapc_machine);
#ifdef CONFIG_XEN
- qemu_register_machine(&xenfv_machine);
+ qemu_register_pc_machine(&xenfv_machine);
#endif
}
diff --git a/hw/i386/pc_q35.c b/hw/i386/pc_q35.c
index b3c02c163d..aa71332ee1 100644
--- a/hw/i386/pc_q35.c
+++ b/hw/i386/pc_q35.c
@@ -57,10 +57,12 @@ static bool smbios_legacy_mode;
* pages in the host.
*/
static bool gigabyte_align = true;
+static bool has_reserved_memory = true;
/* PC hardware initialisation */
static void pc_q35_init(MachineState *machine)
{
+ PCMachineState *pc_machine = PC_MACHINE(machine);
ram_addr_t below_4g_mem_size, above_4g_mem_size;
Q35PCIHost *q35_host;
PCIHostState *phb;
@@ -130,6 +132,7 @@ static void pc_q35_init(MachineState *machine)
guest_info->has_pci_info = has_pci_info;
guest_info->isapc_ram_fw = false;
guest_info->has_acpi_build = has_acpi_build;
+ guest_info->has_reserved_memory = has_reserved_memory;
if (smbios_defaults) {
MachineClass *mc = MACHINE_GET_CLASS(machine);
@@ -140,9 +143,7 @@ static void pc_q35_init(MachineState *machine)
/* allocate ram and load rom/bios */
if (!xen_enabled()) {
- pc_memory_init(get_system_memory(),
- machine->kernel_filename, machine->kernel_cmdline,
- machine->initrd_filename,
+ pc_memory_init(machine, get_system_memory(),
below_4g_mem_size, above_4g_mem_size,
rom_memory, &ram_memory, guest_info);
}
@@ -176,6 +177,15 @@ static void pc_q35_init(MachineState *machine)
lpc = pci_create_simple_multifunction(host_bus, PCI_DEVFN(ICH9_LPC_DEV,
ICH9_LPC_FUNC), true,
TYPE_ICH9_LPC_DEVICE);
+
+ object_property_add_link(OBJECT(machine), PC_MACHINE_ACPI_DEVICE_PROP,
+ TYPE_HOTPLUG_HANDLER,
+ (Object **)&pc_machine->acpi_dev,
+ object_property_allow_set_link,
+ OBJ_PROP_LINK_UNREF_ON_RELEASE, &error_abort);
+ object_property_set_link(OBJECT(machine), OBJECT(lpc),
+ PC_MACHINE_ACPI_DEVICE_PROP, &error_abort);
+
ich9_lpc = ICH9_LPC_DEVICE(lpc);
ich9_lpc->pic = gsi;
ich9_lpc->ioapic = gsi_state->ioapic_irq;
@@ -245,6 +255,7 @@ static void pc_q35_init(MachineState *machine)
static void pc_compat_2_0(MachineState *machine)
{
smbios_legacy_mode = true;
+ has_reserved_memory = false;
}
static void pc_compat_1_7(MachineState *machine)
@@ -384,12 +395,12 @@ static QEMUMachine pc_q35_machine_v1_4 = {
static void pc_q35_machine_init(void)
{
- qemu_register_machine(&pc_q35_machine_v2_1);
- qemu_register_machine(&pc_q35_machine_v2_0);
- qemu_register_machine(&pc_q35_machine_v1_7);
- qemu_register_machine(&pc_q35_machine_v1_6);
- qemu_register_machine(&pc_q35_machine_v1_5);
- qemu_register_machine(&pc_q35_machine_v1_4);
+ qemu_register_pc_machine(&pc_q35_machine_v2_1);
+ qemu_register_pc_machine(&pc_q35_machine_v2_0);
+ qemu_register_pc_machine(&pc_q35_machine_v1_7);
+ qemu_register_pc_machine(&pc_q35_machine_v1_6);
+ qemu_register_pc_machine(&pc_q35_machine_v1_5);
+ qemu_register_pc_machine(&pc_q35_machine_v1_4);
}
machine_init(pc_q35_machine_init);
diff --git a/hw/i386/q35-acpi-dsdt.dsl b/hw/i386/q35-acpi-dsdt.dsl
index f4d2a2daee..8c3eae73bf 100644
--- a/hw/i386/q35-acpi-dsdt.dsl
+++ b/hw/i386/q35-acpi-dsdt.dsl
@@ -402,7 +402,7 @@ DefinitionBlock (
define_gsi_link(GSIH, 0, 0x17)
}
-#include "hw/acpi/cpu_hotplug_defs.h"
+#include "hw/acpi/pc-hotplug.h"
#define CPU_STATUS_BASE ICH9_CPU_HOTPLUG_IO_BASE
#include "acpi-dsdt-cpu-hotplug.dsl"
@@ -410,6 +410,7 @@ DefinitionBlock (
/****************************************************************
* General purpose events
****************************************************************/
+ External(\_SB.PCI0.MEMORY_HOPTLUG_DEVICE.MEMORY_SLOT_SCAN_METHOD, MethodObj)
Scope(\_GPE) {
Name(_HID, "ACPI0006")
@@ -422,7 +423,9 @@ DefinitionBlock (
// CPU hotplug event
\_SB.PRSC()
}
- Method(_L03) {
+ Method(_E03) {
+ // Memory hotplug event
+ \_SB.PCI0.MEMORY_HOPTLUG_DEVICE.MEMORY_SLOT_SCAN_METHOD()
}
Method(_L04) {
}
diff --git a/hw/i386/q35-acpi-dsdt.hex.generated b/hw/i386/q35-acpi-dsdt.hex.generated
index 6b788c9be0..c9eb4ac6ad 100644
--- a/hw/i386/q35-acpi-dsdt.hex.generated
+++ b/hw/i386/q35-acpi-dsdt.hex.generated
@@ -3,12 +3,12 @@ static unsigned char Q35AcpiDsdtAmlCode[] = {
0x53,
0x44,
0x54,
-0xd2,
+0xe5,
0x1c,
0x0,
0x0,
0x1,
-0x13,
+0xb7,
0x42,
0x58,
0x50,
@@ -7234,8 +7234,8 @@ static unsigned char Q35AcpiDsdtAmlCode[] = {
0xa,
0xb,
0x10,
-0x4f,
-0x8,
+0x42,
+0xa,
0x5f,
0x47,
0x50,
@@ -7287,12 +7287,31 @@ static unsigned char Q35AcpiDsdtAmlCode[] = {
0x53,
0x43,
0x14,
-0x6,
+0x19,
0x5f,
-0x4c,
+0x45,
0x30,
0x33,
0x0,
+0x5c,
+0x2f,
+0x4,
+0x5f,
+0x53,
+0x42,
+0x5f,
+0x50,
+0x43,
+0x49,
+0x30,
+0x4d,
+0x48,
+0x50,
+0x44,
+0x4d,
+0x53,
+0x43,
+0x4e,
0x14,
0x6,
0x5f,
diff --git a/hw/i386/ssdt-mem.dsl b/hw/i386/ssdt-mem.dsl
new file mode 100644
index 0000000000..8e17bd1f97
--- /dev/null
+++ b/hw/i386/ssdt-mem.dsl
@@ -0,0 +1,77 @@
+/*
+ * Memory hotplug ACPI DSDT static objects definitions
+ *
+ * Copyright ProfitBricks GmbH 2012
+ * Copyright (C) 2013-2014 Red Hat Inc
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library; if not, see <http://www.gnu.org/licenses/>
+ */
+
+/* This file is the basis for the ssdt_mem[] variable in src/acpi.c.
+ * It defines the contents of the memory device object. At
+ * runtime, a dynamically generated SSDT will contain one copy of this
+ * AML snippet for every possible memory device in the system. The
+ * objects will be placed in the \_SB_ namespace.
+ *
+ * In addition to the aml code generated from this file, the
+ * src/acpi.c file creates a MTFY method with an entry for each memdevice:
+ * Method(MTFY, 2) {
+ * If (LEqual(Arg0, 0x00)) { Notify(MP00, Arg1) }
+ * If (LEqual(Arg0, 0x01)) { Notify(MP01, Arg1) }
+ * ...
+ * }
+ */
+#include "hw/acpi/pc-hotplug.h"
+
+ACPI_EXTRACT_ALL_CODE ssdm_mem_aml
+
+DefinitionBlock ("ssdt-mem.aml", "SSDT", 0x02, "BXPC", "CSSDT", 0x1)
+{
+
+ External(\_SB.PCI0.MEMORY_HOPTLUG_DEVICE.MEMORY_SLOT_CRS_METHOD, MethodObj)
+ External(\_SB.PCI0.MEMORY_HOPTLUG_DEVICE.MEMORY_SLOT_STATUS_METHOD, MethodObj)
+ External(\_SB.PCI0.MEMORY_HOPTLUG_DEVICE.MEMORY_SLOT_OST_METHOD, MethodObj)
+ External(\_SB.PCI0.MEMORY_HOPTLUG_DEVICE.MEMORY_SLOT_PROXIMITY_METHOD, MethodObj)
+
+ Scope(\_SB) {
+/* v------------------ DO NOT EDIT ------------------v */
+ ACPI_EXTRACT_DEVICE_START ssdt_mem_start
+ ACPI_EXTRACT_DEVICE_END ssdt_mem_end
+ ACPI_EXTRACT_DEVICE_STRING ssdt_mem_name
+ Device(MPAA) {
+ ACPI_EXTRACT_NAME_STRING ssdt_mem_id
+ Name(_UID, "0xAA")
+/* ^------------------ DO NOT EDIT ------------------^
+ * Don't change the above without also updating the C code.
+ */
+ Name(_HID, EISAID("PNP0C80"))
+
+ Method(_CRS, 0) {
+ Return(\_SB.PCI0.MEMORY_HOPTLUG_DEVICE.MEMORY_SLOT_CRS_METHOD(_UID))
+ }
+
+ Method(_STA, 0) {
+ Return(\_SB.PCI0.MEMORY_HOPTLUG_DEVICE.MEMORY_SLOT_STATUS_METHOD(_UID))
+ }
+
+ Method(_PXM, 0) {
+ Return(\_SB.PCI0.MEMORY_HOPTLUG_DEVICE.MEMORY_SLOT_PROXIMITY_METHOD(_UID))
+ }
+
+ Method(_OST, 3) {
+ \_SB.PCI0.MEMORY_HOPTLUG_DEVICE.MEMORY_SLOT_OST_METHOD(_UID, Arg0, Arg1, Arg2)
+ }
+ }
+ }
+}
diff --git a/hw/i386/ssdt-mem.hex.generated b/hw/i386/ssdt-mem.hex.generated
new file mode 100644
index 0000000000..00bd34d269
--- /dev/null
+++ b/hw/i386/ssdt-mem.hex.generated
@@ -0,0 +1,213 @@
+static unsigned char ssdt_mem_id[] = {
+0x35
+};
+static unsigned char ssdm_mem_aml[] = {
+0x53,
+0x53,
+0x44,
+0x54,
+0xc7,
+0x0,
+0x0,
+0x0,
+0x2,
+0x71,
+0x42,
+0x58,
+0x50,
+0x43,
+0x0,
+0x0,
+0x43,
+0x53,
+0x53,
+0x44,
+0x54,
+0x0,
+0x0,
+0x0,
+0x1,
+0x0,
+0x0,
+0x0,
+0x49,
+0x4e,
+0x54,
+0x4c,
+0x15,
+0x11,
+0x13,
+0x20,
+0x10,
+0x42,
+0xa,
+0x5c,
+0x5f,
+0x53,
+0x42,
+0x5f,
+0x5b,
+0x82,
+0x49,
+0x9,
+0x4d,
+0x50,
+0x41,
+0x41,
+0x8,
+0x5f,
+0x55,
+0x49,
+0x44,
+0xd,
+0x30,
+0x78,
+0x41,
+0x41,
+0x0,
+0x8,
+0x5f,
+0x48,
+0x49,
+0x44,
+0xc,
+0x41,
+0xd0,
+0xc,
+0x80,
+0x14,
+0x1e,
+0x5f,
+0x43,
+0x52,
+0x53,
+0x0,
+0xa4,
+0x5c,
+0x2f,
+0x4,
+0x5f,
+0x53,
+0x42,
+0x5f,
+0x50,
+0x43,
+0x49,
+0x30,
+0x4d,
+0x48,
+0x50,
+0x44,
+0x4d,
+0x43,
+0x52,
+0x53,
+0x5f,
+0x55,
+0x49,
+0x44,
+0x14,
+0x1e,
+0x5f,
+0x53,
+0x54,
+0x41,
+0x0,
+0xa4,
+0x5c,
+0x2f,
+0x4,
+0x5f,
+0x53,
+0x42,
+0x5f,
+0x50,
+0x43,
+0x49,
+0x30,
+0x4d,
+0x48,
+0x50,
+0x44,
+0x4d,
+0x52,
+0x53,
+0x54,
+0x5f,
+0x55,
+0x49,
+0x44,
+0x14,
+0x1e,
+0x5f,
+0x50,
+0x58,
+0x4d,
+0x0,
+0xa4,
+0x5c,
+0x2f,
+0x4,
+0x5f,
+0x53,
+0x42,
+0x5f,
+0x50,
+0x43,
+0x49,
+0x30,
+0x4d,
+0x48,
+0x50,
+0x44,
+0x4d,
+0x50,
+0x58,
+0x4d,
+0x5f,
+0x55,
+0x49,
+0x44,
+0x14,
+0x20,
+0x5f,
+0x4f,
+0x53,
+0x54,
+0x3,
+0x5c,
+0x2f,
+0x4,
+0x5f,
+0x53,
+0x42,
+0x5f,
+0x50,
+0x43,
+0x49,
+0x30,
+0x4d,
+0x48,
+0x50,
+0x44,
+0x4d,
+0x4f,
+0x53,
+0x54,
+0x5f,
+0x55,
+0x49,
+0x44,
+0x68,
+0x69,
+0x6a
+};
+static unsigned char ssdt_mem_start[] = {
+0x2c
+};
+static unsigned char ssdt_mem_end[] = {
+0xc7
+};
+static unsigned char ssdt_mem_name[] = {
+0x30
+};
diff --git a/hw/i386/ssdt-misc.dsl b/hw/i386/ssdt-misc.dsl
index a4484b8176..d329b8ba57 100644
--- a/hw/i386/ssdt-misc.dsl
+++ b/hw/i386/ssdt-misc.dsl
@@ -12,6 +12,7 @@
* You should have received a copy of the GNU General Public License along
* with this program; if not, see <http://www.gnu.org/licenses/>.
*/
+#include "hw/acpi/pc-hotplug.h"
ACPI_EXTRACT_ALL_CODE ssdp_misc_aml
@@ -116,4 +117,167 @@ DefinitionBlock ("ssdt-misc.aml", "SSDT", 0x01, "BXPC", "BXSSDTSUSP", 0x1)
}
}
}
+
+ External(MEMORY_SLOT_NOTIFY_METHOD, MethodObj)
+ Scope(\_SB.PCI0) {
+ Device(MEMORY_HOPTLUG_DEVICE) {
+ Name(_HID, "PNP0A06")
+ Name(_UID, "Memory hotplug resources")
+
+ ACPI_EXTRACT_NAME_DWORD_CONST ssdt_mctrl_nr_slots
+ Name(MEMORY_SLOTS_NUMBER, 0x12345678)
+
+ /* Memory hotplug IO registers */
+ OperationRegion(MEMORY_HOTPLUG_IO_REGION, SystemIO,
+ ACPI_MEMORY_HOTPLUG_BASE,
+ ACPI_MEMORY_HOTPLUG_IO_LEN)
+
+ Name(_CRS, ResourceTemplate() {
+ IO(Decode16, ACPI_MEMORY_HOTPLUG_BASE, ACPI_MEMORY_HOTPLUG_BASE,
+ 0, ACPI_MEMORY_HOTPLUG_IO_LEN, IO)
+ })
+
+ Method(_STA, 0) {
+ If (LEqual(MEMORY_SLOTS_NUMBER, Zero)) {
+ Return(0x0)
+ }
+ /* present, functioning, decoding, not shown in UI */
+ Return(0xB)
+ }
+
+ Field(MEMORY_HOTPLUG_IO_REGION, DWordAcc, NoLock, Preserve) {
+ MEMORY_SLOT_ADDR_LOW, 32, // read only
+ MEMORY_SLOT_ADDR_HIGH, 32, // read only
+ MEMORY_SLOT_SIZE_LOW, 32, // read only
+ MEMORY_SLOT_SIZE_HIGH, 32, // read only
+ MEMORY_SLOT_PROXIMITY, 32, // read only
+ }
+ Field(MEMORY_HOTPLUG_IO_REGION, ByteAcc, NoLock, Preserve) {
+ Offset(20),
+ MEMORY_SLOT_ENABLED, 1, // 1 if enabled, read only
+ MEMORY_SLOT_INSERT_EVENT, 1, // (read) 1 if has a insert event. (write) 1 to clear event
+ }
+
+ Mutex (MEMORY_SLOT_LOCK, 0)
+ Field (MEMORY_HOTPLUG_IO_REGION, DWordAcc, NoLock, Preserve) {
+ MEMORY_SLOT_SLECTOR, 32, // DIMM selector, write only
+ MEMORY_SLOT_OST_EVENT, 32, // _OST event code, write only
+ MEMORY_SLOT_OST_STATUS, 32, // _OST status code, write only
+ }
+
+ Method(MEMORY_SLOT_SCAN_METHOD, 0) {
+ If (LEqual(MEMORY_SLOTS_NUMBER, Zero)) {
+ Return(Zero)
+ }
+
+ Store(Zero, Local0) // Mem devs iterrator
+ Acquire(MEMORY_SLOT_LOCK, 0xFFFF)
+ while (LLess(Local0, MEMORY_SLOTS_NUMBER)) {
+ Store(Local0, MEMORY_SLOT_SLECTOR) // select Local0 DIMM
+ If (LEqual(MEMORY_SLOT_INSERT_EVENT, One)) { // Memory device needs check
+ MEMORY_SLOT_NOTIFY_METHOD(Local0, 1)
+ Store(1, MEMORY_SLOT_INSERT_EVENT)
+ }
+ // TODO: handle memory eject request
+ Add(Local0, One, Local0) // goto next DIMM
+ }
+ Release(MEMORY_SLOT_LOCK)
+ Return(One)
+ }
+
+ Method(MEMORY_SLOT_STATUS_METHOD, 1) {
+ Store(Zero, Local0)
+
+ Acquire(MEMORY_SLOT_LOCK, 0xFFFF)
+ Store(ToInteger(Arg0), MEMORY_SLOT_SLECTOR) // select DIMM
+
+ If (LEqual(MEMORY_SLOT_ENABLED, One)) {
+ Store(0xF, Local0)
+ }
+
+ Release(MEMORY_SLOT_LOCK)
+ Return(Local0)
+ }
+
+ Method(MEMORY_SLOT_CRS_METHOD, 1, Serialized) {
+ Acquire(MEMORY_SLOT_LOCK, 0xFFFF)
+ Store(ToInteger(Arg0), MEMORY_SLOT_SLECTOR) // select DIMM
+
+ Name(MR64, ResourceTemplate() {
+ QWordMemory(ResourceProducer, PosDecode, MinFixed, MaxFixed,
+ Cacheable, ReadWrite,
+ 0x0000000000000000, // Address Space Granularity
+ 0x0000000000000000, // Address Range Minimum
+ 0xFFFFFFFFFFFFFFFE, // Address Range Maximum
+ 0x0000000000000000, // Address Translation Offset
+ 0xFFFFFFFFFFFFFFFF, // Address Length
+ ,, MW64, AddressRangeMemory, TypeStatic)
+ })
+
+ CreateDWordField(MR64, 14, MINL)
+ CreateDWordField(MR64, 18, MINH)
+ CreateDWordField(MR64, 38, LENL)
+ CreateDWordField(MR64, 42, LENH)
+ CreateDWordField(MR64, 22, MAXL)
+ CreateDWordField(MR64, 26, MAXH)
+
+ Store(MEMORY_SLOT_ADDR_HIGH, MINH)
+ Store(MEMORY_SLOT_ADDR_LOW, MINL)
+ Store(MEMORY_SLOT_SIZE_HIGH, LENH)
+ Store(MEMORY_SLOT_SIZE_LOW, LENL)
+
+ // 64-bit math: MAX = MIN + LEN - 1
+ Add(MINL, LENL, MAXL)
+ Add(MINH, LENH, MAXH)
+ If (LLess(MAXL, MINL)) {
+ Add(MAXH, One, MAXH)
+ }
+ If (LLess(MAXL, One)) {
+ Subtract(MAXH, One, MAXH)
+ }
+ Subtract(MAXL, One, MAXL)
+
+ If (LEqual(MAXH, Zero)){
+ Name(MR32, ResourceTemplate() {
+ DWordMemory(ResourceProducer, PosDecode, MinFixed, MaxFixed,
+ Cacheable, ReadWrite,
+ 0x00000000, // Address Space Granularity
+ 0x00000000, // Address Range Minimum
+ 0xFFFFFFFE, // Address Range Maximum
+ 0x00000000, // Address Translation Offset
+ 0xFFFFFFFF, // Address Length
+ ,, MW32, AddressRangeMemory, TypeStatic)
+ })
+ CreateDWordField(MR32, MW32._MIN, MIN)
+ CreateDWordField(MR32, MW32._MAX, MAX)
+ CreateDWordField(MR32, MW32._LEN, LEN)
+ Store(MINL, MIN)
+ Store(MAXL, MAX)
+ Store(LENL, LEN)
+
+ Release(MEMORY_SLOT_LOCK)
+ Return(MR32)
+ }
+
+ Release(MEMORY_SLOT_LOCK)
+ Return(MR64)
+ }
+
+ Method(MEMORY_SLOT_PROXIMITY_METHOD, 1) {
+ Acquire(MEMORY_SLOT_LOCK, 0xFFFF)
+ Store(ToInteger(Arg0), MEMORY_SLOT_SLECTOR) // select DIMM
+ Store(MEMORY_SLOT_PROXIMITY, Local0)
+ Release(MEMORY_SLOT_LOCK)
+ Return(Local0)
+ }
+
+ Method(MEMORY_SLOT_OST_METHOD, 4) {
+ Acquire(MEMORY_SLOT_LOCK, 0xFFFF)
+ Store(ToInteger(Arg0), MEMORY_SLOT_SLECTOR) // select DIMM
+ Store(Arg1, MEMORY_SLOT_OST_EVENT)
+ Store(Arg2, MEMORY_SLOT_OST_STATUS)
+ Release(MEMORY_SLOT_LOCK)
+ }
+ } // Device()
+ } // Scope()
}
diff --git a/hw/i386/ssdt-misc.hex.generated b/hw/i386/ssdt-misc.hex.generated
index 55e3bd2aa6..ba4268a60b 100644
--- a/hw/i386/ssdt-misc.hex.generated
+++ b/hw/i386/ssdt-misc.hex.generated
@@ -4,6 +4,9 @@ static unsigned char acpi_pci64_length[] = {
static unsigned char acpi_s4_pkg[] = {
0x8f
};
+static unsigned short ssdt_mctrl_nr_slots[] = {
+0x1aa
+};
static unsigned char acpi_s3_name[] = {
0x7c
};
@@ -18,12 +21,12 @@ static unsigned char ssdp_misc_aml[] = {
0x53,
0x44,
0x54,
-0x62,
-0x1,
+0x7e,
+0x4,
0x0,
0x0,
0x1,
-0x76,
+0x8b,
0x42,
0x58,
0x50,
@@ -46,8 +49,8 @@ static unsigned char ssdp_misc_aml[] = {
0x4e,
0x54,
0x4c,
-0x23,
-0x8,
+0x15,
+0x11,
0x13,
0x20,
0x10,
@@ -367,7 +370,803 @@ static unsigned char ssdp_misc_aml[] = {
0x49,
0x4f,
0x4d,
-0x58
+0x58,
+0x10,
+0x4b,
+0x31,
+0x5c,
+0x2e,
+0x5f,
+0x53,
+0x42,
+0x5f,
+0x50,
+0x43,
+0x49,
+0x30,
+0x5b,
+0x82,
+0x4d,
+0x30,
+0x4d,
+0x48,
+0x50,
+0x44,
+0x8,
+0x5f,
+0x48,
+0x49,
+0x44,
+0xd,
+0x50,
+0x4e,
+0x50,
+0x30,
+0x41,
+0x30,
+0x36,
+0x0,
+0x8,
+0x5f,
+0x55,
+0x49,
+0x44,
+0xd,
+0x4d,
+0x65,
+0x6d,
+0x6f,
+0x72,
+0x79,
+0x20,
+0x68,
+0x6f,
+0x74,
+0x70,
+0x6c,
+0x75,
+0x67,
+0x20,
+0x72,
+0x65,
+0x73,
+0x6f,
+0x75,
+0x72,
+0x63,
+0x65,
+0x73,
+0x0,
+0x8,
+0x4d,
+0x44,
+0x4e,
+0x52,
+0xc,
+0x78,
+0x56,
+0x34,
+0x12,
+0x5b,
+0x80,
+0x48,
+0x50,
+0x4d,
+0x52,
+0x1,
+0xb,
+0x0,
+0xa,
+0xa,
+0x18,
+0x8,
+0x5f,
+0x43,
+0x52,
+0x53,
+0x11,
+0xd,
+0xa,
+0xa,
+0x47,
+0x1,
+0x0,
+0xa,
+0x0,
+0xa,
+0x0,
+0x18,
+0x79,
+0x0,
+0x14,
+0x13,
+0x5f,
+0x53,
+0x54,
+0x41,
+0x0,
+0xa0,
+0x9,
+0x93,
+0x4d,
+0x44,
+0x4e,
+0x52,
+0x0,
+0xa4,
+0x0,
+0xa4,
+0xa,
+0xb,
+0x5b,
+0x81,
+0x1f,
+0x48,
+0x50,
+0x4d,
+0x52,
+0x3,
+0x4d,
+0x52,
+0x42,
+0x4c,
+0x20,
+0x4d,
+0x52,
+0x42,
+0x48,
+0x20,
+0x4d,
+0x52,
+0x4c,
+0x4c,
+0x20,
+0x4d,
+0x52,
+0x4c,
+0x48,
+0x20,
+0x4d,
+0x50,
+0x58,
+0x5f,
+0x20,
+0x5b,
+0x81,
+0x13,
+0x48,
+0x50,
+0x4d,
+0x52,
+0x1,
+0x0,
+0x40,
+0xa,
+0x4d,
+0x45,
+0x53,
+0x5f,
+0x1,
+0x4d,
+0x49,
+0x4e,
+0x53,
+0x1,
+0x5b,
+0x1,
+0x4d,
+0x4c,
+0x43,
+0x4b,
+0x0,
+0x5b,
+0x81,
+0x15,
+0x48,
+0x50,
+0x4d,
+0x52,
+0x3,
+0x4d,
+0x53,
+0x45,
+0x4c,
+0x20,
+0x4d,
+0x4f,
+0x45,
+0x56,
+0x20,
+0x4d,
+0x4f,
+0x53,
+0x43,
+0x20,
+0x14,
+0x4a,
+0x4,
+0x4d,
+0x53,
+0x43,
+0x4e,
+0x0,
+0xa0,
+0x9,
+0x93,
+0x4d,
+0x44,
+0x4e,
+0x52,
+0x0,
+0xa4,
+0x0,
+0x70,
+0x0,
+0x60,
+0x5b,
+0x23,
+0x4d,
+0x4c,
+0x43,
+0x4b,
+0xff,
+0xff,
+0xa2,
+0x25,
+0x95,
+0x60,
+0x4d,
+0x44,
+0x4e,
+0x52,
+0x70,
+0x60,
+0x4d,
+0x53,
+0x45,
+0x4c,
+0xa0,
+0x13,
+0x93,
+0x4d,
+0x49,
+0x4e,
+0x53,
+0x1,
+0x4d,
+0x54,
+0x46,
+0x59,
+0x60,
+0x1,
+0x70,
+0x1,
+0x4d,
+0x49,
+0x4e,
+0x53,
+0x72,
+0x60,
+0x1,
+0x60,
+0x5b,
+0x27,
+0x4d,
+0x4c,
+0x43,
+0x4b,
+0xa4,
+0x1,
+0x14,
+0x2d,
+0x4d,
+0x52,
+0x53,
+0x54,
+0x1,
+0x70,
+0x0,
+0x60,
+0x5b,
+0x23,
+0x4d,
+0x4c,
+0x43,
+0x4b,
+0xff,
+0xff,
+0x70,
+0x99,
+0x68,
+0x0,
+0x4d,
+0x53,
+0x45,
+0x4c,
+0xa0,
+0xb,
+0x93,
+0x4d,
+0x45,
+0x53,
+0x5f,
+0x1,
+0x70,
+0xa,
+0xf,
+0x60,
+0x5b,
+0x27,
+0x4d,
+0x4c,
+0x43,
+0x4b,
+0xa4,
+0x60,
+0x14,
+0x41,
+0x18,
+0x4d,
+0x43,
+0x52,
+0x53,
+0x9,
+0x5b,
+0x23,
+0x4d,
+0x4c,
+0x43,
+0x4b,
+0xff,
+0xff,
+0x70,
+0x99,
+0x68,
+0x0,
+0x4d,
+0x53,
+0x45,
+0x4c,
+0x8,
+0x4d,
+0x52,
+0x36,
+0x34,
+0x11,
+0x33,
+0xa,
+0x30,
+0x8a,
+0x2b,
+0x0,
+0x0,
+0xc,
+0x3,
+0x0,
+0x0,
+0x0,
+0x0,
+0x0,
+0x0,
+0x0,
+0x0,
+0x0,
+0x0,
+0x0,
+0x0,
+0x0,
+0x0,
+0x0,
+0x0,
+0xfe,
+0xff,
+0xff,
+0xff,
+0xff,
+0xff,
+0xff,
+0xff,
+0x0,
+0x0,
+0x0,
+0x0,
+0x0,
+0x0,
+0x0,
+0x0,
+0xff,
+0xff,
+0xff,
+0xff,
+0xff,
+0xff,
+0xff,
+0xff,
+0x79,
+0x0,
+0x8a,
+0x4d,
+0x52,
+0x36,
+0x34,
+0xa,
+0xe,
+0x4d,
+0x49,
+0x4e,
+0x4c,
+0x8a,
+0x4d,
+0x52,
+0x36,
+0x34,
+0xa,
+0x12,
+0x4d,
+0x49,
+0x4e,
+0x48,
+0x8a,
+0x4d,
+0x52,
+0x36,
+0x34,
+0xa,
+0x26,
+0x4c,
+0x45,
+0x4e,
+0x4c,
+0x8a,
+0x4d,
+0x52,
+0x36,
+0x34,
+0xa,
+0x2a,
+0x4c,
+0x45,
+0x4e,
+0x48,
+0x8a,
+0x4d,
+0x52,
+0x36,
+0x34,
+0xa,
+0x16,
+0x4d,
+0x41,
+0x58,
+0x4c,
+0x8a,
+0x4d,
+0x52,
+0x36,
+0x34,
+0xa,
+0x1a,
+0x4d,
+0x41,
+0x58,
+0x48,
+0x70,
+0x4d,
+0x52,
+0x42,
+0x48,
+0x4d,
+0x49,
+0x4e,
+0x48,
+0x70,
+0x4d,
+0x52,
+0x42,
+0x4c,
+0x4d,
+0x49,
+0x4e,
+0x4c,
+0x70,
+0x4d,
+0x52,
+0x4c,
+0x48,
+0x4c,
+0x45,
+0x4e,
+0x48,
+0x70,
+0x4d,
+0x52,
+0x4c,
+0x4c,
+0x4c,
+0x45,
+0x4e,
+0x4c,
+0x72,
+0x4d,
+0x49,
+0x4e,
+0x4c,
+0x4c,
+0x45,
+0x4e,
+0x4c,
+0x4d,
+0x41,
+0x58,
+0x4c,
+0x72,
+0x4d,
+0x49,
+0x4e,
+0x48,
+0x4c,
+0x45,
+0x4e,
+0x48,
+0x4d,
+0x41,
+0x58,
+0x48,
+0xa0,
+0x14,
+0x95,
+0x4d,
+0x41,
+0x58,
+0x4c,
+0x4d,
+0x49,
+0x4e,
+0x4c,
+0x72,
+0x4d,
+0x41,
+0x58,
+0x48,
+0x1,
+0x4d,
+0x41,
+0x58,
+0x48,
+0xa0,
+0x11,
+0x95,
+0x4d,
+0x41,
+0x58,
+0x4c,
+0x1,
+0x74,
+0x4d,
+0x41,
+0x58,
+0x48,
+0x1,
+0x4d,
+0x41,
+0x58,
+0x48,
+0x74,
+0x4d,
+0x41,
+0x58,
+0x4c,
+0x1,
+0x4d,
+0x41,
+0x58,
+0x4c,
+0xa0,
+0x44,
+0x7,
+0x93,
+0x4d,
+0x41,
+0x58,
+0x48,
+0x0,
+0x8,
+0x4d,
+0x52,
+0x33,
+0x32,
+0x11,
+0x1f,
+0xa,
+0x1c,
+0x87,
+0x17,
+0x0,
+0x0,
+0xc,
+0x3,
+0x0,
+0x0,
+0x0,
+0x0,
+0x0,
+0x0,
+0x0,
+0x0,
+0xfe,
+0xff,
+0xff,
+0xff,
+0x0,
+0x0,
+0x0,
+0x0,
+0xff,
+0xff,
+0xff,
+0xff,
+0x79,
+0x0,
+0x8a,
+0x4d,
+0x52,
+0x33,
+0x32,
+0xa,
+0xa,
+0x4d,
+0x49,
+0x4e,
+0x5f,
+0x8a,
+0x4d,
+0x52,
+0x33,
+0x32,
+0xa,
+0xe,
+0x4d,
+0x41,
+0x58,
+0x5f,
+0x8a,
+0x4d,
+0x52,
+0x33,
+0x32,
+0xa,
+0x16,
+0x4c,
+0x45,
+0x4e,
+0x5f,
+0x70,
+0x4d,
+0x49,
+0x4e,
+0x4c,
+0x4d,
+0x49,
+0x4e,
+0x5f,
+0x70,
+0x4d,
+0x41,
+0x58,
+0x4c,
+0x4d,
+0x41,
+0x58,
+0x5f,
+0x70,
+0x4c,
+0x45,
+0x4e,
+0x4c,
+0x4c,
+0x45,
+0x4e,
+0x5f,
+0x5b,
+0x27,
+0x4d,
+0x4c,
+0x43,
+0x4b,
+0xa4,
+0x4d,
+0x52,
+0x33,
+0x32,
+0x5b,
+0x27,
+0x4d,
+0x4c,
+0x43,
+0x4b,
+0xa4,
+0x4d,
+0x52,
+0x36,
+0x34,
+0x14,
+0x24,
+0x4d,
+0x50,
+0x58,
+0x4d,
+0x1,
+0x5b,
+0x23,
+0x4d,
+0x4c,
+0x43,
+0x4b,
+0xff,
+0xff,
+0x70,
+0x99,
+0x68,
+0x0,
+0x4d,
+0x53,
+0x45,
+0x4c,
+0x70,
+0x4d,
+0x50,
+0x58,
+0x5f,
+0x60,
+0x5b,
+0x27,
+0x4d,
+0x4c,
+0x43,
+0x4b,
+0xa4,
+0x60,
+0x14,
+0x28,
+0x4d,
+0x4f,
+0x53,
+0x54,
+0x4,
+0x5b,
+0x23,
+0x4d,
+0x4c,
+0x43,
+0x4b,
+0xff,
+0xff,
+0x70,
+0x99,
+0x68,
+0x0,
+0x4d,
+0x53,
+0x45,
+0x4c,
+0x70,
+0x69,
+0x4d,
+0x4f,
+0x45,
+0x56,
+0x70,
+0x6a,
+0x4d,
+0x4f,
+0x53,
+0x43,
+0x5b,
+0x27,
+0x4d,
+0x4c,
+0x43,
+0x4b
};
static unsigned char ssdt_isa_pest[] = {
0xd0
diff --git a/hw/i386/ssdt-pcihp.hex.generated b/hw/i386/ssdt-pcihp.hex.generated
index b599b4663c..72ffa84800 100644
--- a/hw/i386/ssdt-pcihp.hex.generated
+++ b/hw/i386/ssdt-pcihp.hex.generated
@@ -32,7 +32,7 @@ static unsigned char ssdp_pcihp_aml[] = {
0x0,
0x0,
0x1,
-0x6b,
+0x70,
0x42,
0x58,
0x50,
@@ -55,8 +55,8 @@ static unsigned char ssdp_pcihp_aml[] = {
0x4e,
0x54,
0x4c,
-0x23,
-0x8,
+0x15,
+0x11,
0x13,
0x20,
0x10,
diff --git a/hw/i386/ssdt-proc.hex.generated b/hw/i386/ssdt-proc.hex.generated
index 97e28d4820..4df0734c79 100644
--- a/hw/i386/ssdt-proc.hex.generated
+++ b/hw/i386/ssdt-proc.hex.generated
@@ -11,7 +11,7 @@ static unsigned char ssdp_proc_aml[] = {
0x0,
0x0,
0x1,
-0x78,
+0x7d,
0x42,
0x58,
0x50,
@@ -34,8 +34,8 @@ static unsigned char ssdp_proc_aml[] = {
0x4e,
0x54,
0x4c,
-0x23,
-0x8,
+0x15,
+0x11,
0x13,
0x20,
0x5b,
diff --git a/hw/isa/lpc_ich9.c b/hw/isa/lpc_ich9.c
index 97f69d6001..b846d81990 100644
--- a/hw/isa/lpc_ich9.c
+++ b/hw/isa/lpc_ich9.c
@@ -563,7 +563,14 @@ static void ich9_lpc_add_properties(ICH9LPCState *lpc)
ich9_pm_add_properties(OBJECT(lpc), &lpc->pm, NULL);
}
-static int ich9_lpc_initfn(PCIDevice *d)
+static void ich9_lpc_initfn(Object *obj)
+{
+ ICH9LPCState *lpc = ICH9_LPC_DEVICE(obj);
+
+ ich9_lpc_add_properties(lpc);
+}
+
+static int ich9_lpc_init(PCIDevice *d)
{
ICH9LPCState *lpc = ICH9_LPC_DEVICE(d);
ISABus *isa_bus;
@@ -589,10 +596,22 @@ static int ich9_lpc_initfn(PCIDevice *d)
memory_region_add_subregion_overlap(pci_address_space_io(d),
ICH9_RST_CNT_IOPORT, &lpc->rst_cnt_mem,
1);
+ return 0;
+}
- ich9_lpc_add_properties(lpc);
+static void ich9_device_plug_cb(HotplugHandler *hotplug_dev,
+ DeviceState *dev, Error **errp)
+{
+ ICH9LPCState *lpc = ICH9_LPC_DEVICE(hotplug_dev);
- return 0;
+ ich9_pm_device_plug_cb(&lpc->pm, dev, errp);
+}
+
+static void ich9_device_unplug_cb(HotplugHandler *hotplug_dev,
+ DeviceState *dev, Error **errp)
+{
+ error_setg(errp, "acpi: device unplug request for not supported device"
+ " type: %s", object_get_typename(OBJECT(dev)));
}
static bool ich9_rst_cnt_needed(void *opaque)
@@ -638,10 +657,12 @@ static void ich9_lpc_class_init(ObjectClass *klass, void *data)
{
DeviceClass *dc = DEVICE_CLASS(klass);
PCIDeviceClass *k = PCI_DEVICE_CLASS(klass);
+ HotplugHandlerClass *hc = HOTPLUG_HANDLER_CLASS(klass);
+ AcpiDeviceIfClass *adevc = ACPI_DEVICE_IF_CLASS(klass);
set_bit(DEVICE_CATEGORY_BRIDGE, dc->categories);
dc->reset = ich9_lpc_reset;
- k->init = ich9_lpc_initfn;
+ k->init = ich9_lpc_init;
dc->vmsd = &vmstate_ich9_lpc;
k->config_write = ich9_lpc_config_write;
dc->desc = "ICH9 LPC bridge";
@@ -654,13 +675,22 @@ static void ich9_lpc_class_init(ObjectClass *klass, void *data)
* pc_q35_init()
*/
dc->cannot_instantiate_with_device_add_yet = true;
+ hc->plug = ich9_device_plug_cb;
+ hc->unplug = ich9_device_unplug_cb;
+ adevc->ospm_status = ich9_pm_ospm_status;
}
static const TypeInfo ich9_lpc_info = {
.name = TYPE_ICH9_LPC_DEVICE,
.parent = TYPE_PCI_DEVICE,
.instance_size = sizeof(struct ICH9LPCState),
+ .instance_init = ich9_lpc_initfn,
.class_init = ich9_lpc_class_init,
+ .interfaces = (InterfaceInfo[]) {
+ { TYPE_HOTPLUG_HANDLER },
+ { TYPE_ACPI_DEVICE_IF },
+ { }
+ }
};
static void ich9_lpc_register(void)
diff --git a/hw/mem/Makefile.objs b/hw/mem/Makefile.objs
new file mode 100644
index 0000000000..b000fb42bf
--- /dev/null
+++ b/hw/mem/Makefile.objs
@@ -0,0 +1 @@
+common-obj-$(CONFIG_MEM_HOTPLUG) += pc-dimm.o
diff --git a/hw/mem/pc-dimm.c b/hw/mem/pc-dimm.c
new file mode 100644
index 0000000000..ad176b700b
--- /dev/null
+++ b/hw/mem/pc-dimm.c
@@ -0,0 +1,281 @@
+/*
+ * Dimm device for Memory Hotplug
+ *
+ * Copyright ProfitBricks GmbH 2012
+ * Copyright (C) 2014 Red Hat Inc
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library; if not, see <http://www.gnu.org/licenses/>
+ */
+
+#include "hw/mem/pc-dimm.h"
+#include "qemu/config-file.h"
+#include "qapi/visitor.h"
+#include "qemu/range.h"
+
+int qmp_pc_dimm_device_list(Object *obj, void *opaque)
+{
+ MemoryDeviceInfoList ***prev = opaque;
+
+ if (object_dynamic_cast(obj, TYPE_PC_DIMM)) {
+ DeviceState *dev = DEVICE(obj);
+
+ if (dev->realized) {
+ MemoryDeviceInfoList *elem = g_new0(MemoryDeviceInfoList, 1);
+ MemoryDeviceInfo *info = g_new0(MemoryDeviceInfo, 1);
+ PCDIMMDeviceInfo *di = g_new0(PCDIMMDeviceInfo, 1);
+ DeviceClass *dc = DEVICE_GET_CLASS(obj);
+ PCDIMMDevice *dimm = PC_DIMM(obj);
+
+ if (dev->id) {
+ di->has_id = true;
+ di->id = g_strdup(dev->id);
+ }
+ di->hotplugged = dev->hotplugged;
+ di->hotpluggable = dc->hotpluggable;
+ di->addr = dimm->addr;
+ di->slot = dimm->slot;
+ di->node = dimm->node;
+ di->size = object_property_get_int(OBJECT(dimm), PC_DIMM_SIZE_PROP,
+ NULL);
+ di->memdev = object_get_canonical_path(OBJECT(dimm->hostmem));
+
+ info->dimm = di;
+ elem->value = info;
+ elem->next = NULL;
+ **prev = elem;
+ *prev = &elem->next;
+ }
+ }
+
+ object_child_foreach(obj, qmp_pc_dimm_device_list, opaque);
+ return 0;
+}
+
+static int pc_dimm_slot2bitmap(Object *obj, void *opaque)
+{
+ unsigned long *bitmap = opaque;
+
+ if (object_dynamic_cast(obj, TYPE_PC_DIMM)) {
+ DeviceState *dev = DEVICE(obj);
+ if (dev->realized) { /* count only realized DIMMs */
+ PCDIMMDevice *d = PC_DIMM(obj);
+ set_bit(d->slot, bitmap);
+ }
+ }
+
+ object_child_foreach(obj, pc_dimm_slot2bitmap, opaque);
+ return 0;
+}
+
+int pc_dimm_get_free_slot(const int *hint, int max_slots, Error **errp)
+{
+ unsigned long *bitmap = bitmap_new(max_slots);
+ int slot = 0;
+
+ object_child_foreach(qdev_get_machine(), pc_dimm_slot2bitmap, bitmap);
+
+ /* check if requested slot is not occupied */
+ if (hint) {
+ if (*hint >= max_slots) {
+ error_setg(errp, "invalid slot# %d, should be less than %d",
+ *hint, max_slots);
+ } else if (!test_bit(*hint, bitmap)) {
+ slot = *hint;
+ } else {
+ error_setg(errp, "slot %d is busy", *hint);
+ }
+ goto out;
+ }
+
+ /* search for free slot */
+ slot = find_first_zero_bit(bitmap, max_slots);
+ if (slot == max_slots) {
+ error_setg(errp, "no free slots available");
+ }
+out:
+ g_free(bitmap);
+ return slot;
+}
+
+static gint pc_dimm_addr_sort(gconstpointer a, gconstpointer b)
+{
+ PCDIMMDevice *x = PC_DIMM(a);
+ PCDIMMDevice *y = PC_DIMM(b);
+ Int128 diff = int128_sub(int128_make64(x->addr), int128_make64(y->addr));
+
+ if (int128_lt(diff, int128_zero())) {
+ return -1;
+ } else if (int128_gt(diff, int128_zero())) {
+ return 1;
+ }
+ return 0;
+}
+
+static int pc_dimm_built_list(Object *obj, void *opaque)
+{
+ GSList **list = opaque;
+
+ if (object_dynamic_cast(obj, TYPE_PC_DIMM)) {
+ DeviceState *dev = DEVICE(obj);
+ if (dev->realized) { /* only realized DIMMs matter */
+ *list = g_slist_insert_sorted(*list, dev, pc_dimm_addr_sort);
+ }
+ }
+
+ object_child_foreach(obj, pc_dimm_built_list, opaque);
+ return 0;
+}
+
+uint64_t pc_dimm_get_free_addr(uint64_t address_space_start,
+ uint64_t address_space_size,
+ uint64_t *hint, uint64_t size,
+ Error **errp)
+{
+ GSList *list = NULL, *item;
+ uint64_t new_addr, ret = 0;
+ uint64_t address_space_end = address_space_start + address_space_size;
+
+ assert(address_space_end > address_space_size);
+ object_child_foreach(qdev_get_machine(), pc_dimm_built_list, &list);
+
+ if (hint) {
+ new_addr = *hint;
+ } else {
+ new_addr = address_space_start;
+ }
+
+ /* find address range that will fit new DIMM */
+ for (item = list; item; item = g_slist_next(item)) {
+ PCDIMMDevice *dimm = item->data;
+ uint64_t dimm_size = object_property_get_int(OBJECT(dimm),
+ PC_DIMM_SIZE_PROP,
+ errp);
+ if (errp && *errp) {
+ goto out;
+ }
+
+ if (ranges_overlap(dimm->addr, dimm_size, new_addr, size)) {
+ if (hint) {
+ DeviceState *d = DEVICE(dimm);
+ error_setg(errp, "address range conflicts with '%s'", d->id);
+ goto out;
+ }
+ new_addr = dimm->addr + dimm_size;
+ }
+ }
+ ret = new_addr;
+
+ if (new_addr < address_space_start) {
+ error_setg(errp, "can't add memory [0x%" PRIx64 ":0x%" PRIx64
+ "] at 0x%" PRIx64, new_addr, size, address_space_start);
+ } else if ((new_addr + size) > address_space_end) {
+ error_setg(errp, "can't add memory [0x%" PRIx64 ":0x%" PRIx64
+ "] beyond 0x%" PRIx64, new_addr, size, address_space_end);
+ }
+
+out:
+ g_slist_free(list);
+ return ret;
+}
+
+static Property pc_dimm_properties[] = {
+ DEFINE_PROP_UINT64(PC_DIMM_ADDR_PROP, PCDIMMDevice, addr, 0),
+ DEFINE_PROP_UINT32(PC_DIMM_NODE_PROP, PCDIMMDevice, node, 0),
+ DEFINE_PROP_INT32(PC_DIMM_SLOT_PROP, PCDIMMDevice, slot,
+ PC_DIMM_UNASSIGNED_SLOT),
+ DEFINE_PROP_END_OF_LIST(),
+};
+
+static void pc_dimm_get_size(Object *obj, Visitor *v, void *opaque,
+ const char *name, Error **errp)
+{
+ int64_t value;
+ MemoryRegion *mr;
+ PCDIMMDevice *dimm = PC_DIMM(obj);
+
+ mr = host_memory_backend_get_memory(dimm->hostmem, errp);
+ value = memory_region_size(mr);
+
+ visit_type_int(v, &value, name, errp);
+}
+
+static void pc_dimm_check_memdev_is_busy(Object *obj, const char *name,
+ Object *val, Error **errp)
+{
+ MemoryRegion *mr;
+
+ mr = host_memory_backend_get_memory(MEMORY_BACKEND(val), errp);
+ if (memory_region_is_mapped(mr)) {
+ char *path = object_get_canonical_path_component(val);
+ error_setg(errp, "can't use already busy memdev: %s", path);
+ g_free(path);
+ } else {
+ qdev_prop_allow_set_link_before_realize(obj, name, val, errp);
+ }
+}
+
+static void pc_dimm_init(Object *obj)
+{
+ PCDIMMDevice *dimm = PC_DIMM(obj);
+
+ object_property_add(obj, PC_DIMM_SIZE_PROP, "int", pc_dimm_get_size,
+ NULL, NULL, NULL, &error_abort);
+ object_property_add_link(obj, PC_DIMM_MEMDEV_PROP, TYPE_MEMORY_BACKEND,
+ (Object **)&dimm->hostmem,
+ pc_dimm_check_memdev_is_busy,
+ OBJ_PROP_LINK_UNREF_ON_RELEASE,
+ &error_abort);
+}
+
+static void pc_dimm_realize(DeviceState *dev, Error **errp)
+{
+ PCDIMMDevice *dimm = PC_DIMM(dev);
+
+ if (!dimm->hostmem) {
+ error_setg(errp, "'" PC_DIMM_MEMDEV_PROP "' property is not set");
+ return;
+ }
+}
+
+static MemoryRegion *pc_dimm_get_memory_region(PCDIMMDevice *dimm)
+{
+ return host_memory_backend_get_memory(dimm->hostmem, &error_abort);
+}
+
+static void pc_dimm_class_init(ObjectClass *oc, void *data)
+{
+ DeviceClass *dc = DEVICE_CLASS(oc);
+ PCDIMMDeviceClass *ddc = PC_DIMM_CLASS(oc);
+
+ dc->realize = pc_dimm_realize;
+ dc->props = pc_dimm_properties;
+
+ ddc->get_memory_region = pc_dimm_get_memory_region;
+}
+
+static TypeInfo pc_dimm_info = {
+ .name = TYPE_PC_DIMM,
+ .parent = TYPE_DEVICE,
+ .instance_size = sizeof(PCDIMMDevice),
+ .instance_init = pc_dimm_init,
+ .class_init = pc_dimm_class_init,
+ .class_size = sizeof(PCDIMMDeviceClass),
+};
+
+static void pc_dimm_register_types(void)
+{
+ type_register_static(&pc_dimm_info);
+}
+
+type_init(pc_dimm_register_types)
diff --git a/hw/mips/mips_malta.c b/hw/mips/mips_malta.c
index f4a7d47129..3c04342236 100644
--- a/hw/mips/mips_malta.c
+++ b/hw/mips/mips_malta.c
@@ -1104,7 +1104,7 @@ void mips_malta_init(MachineState *machine)
pci_piix4_ide_init(pci_bus, hd, piix4_devfn + 1);
pci_create_simple(pci_bus, piix4_devfn + 2, "piix4-usb-uhci");
smbus = piix4_pm_init(pci_bus, piix4_devfn + 3, 0x1100,
- isa_get_irq(NULL, 9), NULL, 0, NULL);
+ isa_get_irq(NULL, 9), NULL, 0, NULL, NULL);
smbus_eeprom_init(smbus, 8, smbus_eeprom_buf, smbus_eeprom_size);
g_free(smbus_eeprom_buf);
pit = pit_init(isa_bus, 0x40, 0, NULL);
diff --git a/hw/net/vhost_net.c b/hw/net/vhost_net.c
index a1de2f43a0..7ac7c21bdb 100644
--- a/hw/net/vhost_net.c
+++ b/hw/net/vhost_net.c
@@ -15,6 +15,7 @@
#include "net/net.h"
#include "net/tap.h"
+#include "net/vhost-user.h"
#include "hw/virtio/virtio-net.h"
#include "net/vhost_net.h"
@@ -27,7 +28,6 @@
#include <sys/socket.h>
#include <linux/kvm.h>
#include <fcntl.h>
-#include <sys/ioctl.h>
#include <linux/virtio_ring.h>
#include <netpacket/packet.h>
#include <net/ethernet.h>
@@ -46,39 +46,76 @@ struct vhost_net {
NetClientState *nc;
};
-unsigned vhost_net_get_features(struct vhost_net *net, unsigned features)
+/* Features supported by host kernel. */
+static const int kernel_feature_bits[] = {
+ VIRTIO_F_NOTIFY_ON_EMPTY,
+ VIRTIO_RING_F_INDIRECT_DESC,
+ VIRTIO_RING_F_EVENT_IDX,
+ VIRTIO_NET_F_MRG_RXBUF,
+ VHOST_INVALID_FEATURE_BIT
+};
+
+/* Features supported by others. */
+const int user_feature_bits[] = {
+ VIRTIO_F_NOTIFY_ON_EMPTY,
+ VIRTIO_RING_F_INDIRECT_DESC,
+ VIRTIO_RING_F_EVENT_IDX,
+
+ VIRTIO_F_ANY_LAYOUT,
+ VIRTIO_NET_F_CSUM,
+ VIRTIO_NET_F_GUEST_CSUM,
+ VIRTIO_NET_F_GSO,
+ VIRTIO_NET_F_GUEST_TSO4,
+ VIRTIO_NET_F_GUEST_TSO6,
+ VIRTIO_NET_F_GUEST_ECN,
+ VIRTIO_NET_F_GUEST_UFO,
+ VIRTIO_NET_F_HOST_TSO4,
+ VIRTIO_NET_F_HOST_TSO6,
+ VIRTIO_NET_F_HOST_ECN,
+ VIRTIO_NET_F_HOST_UFO,
+ VIRTIO_NET_F_MRG_RXBUF,
+ VIRTIO_NET_F_STATUS,
+ VIRTIO_NET_F_CTRL_VQ,
+ VIRTIO_NET_F_CTRL_RX,
+ VIRTIO_NET_F_CTRL_VLAN,
+ VIRTIO_NET_F_CTRL_RX_EXTRA,
+ VIRTIO_NET_F_CTRL_MAC_ADDR,
+ VIRTIO_NET_F_CTRL_GUEST_OFFLOADS,
+
+ VIRTIO_NET_F_MQ,
+
+ VHOST_INVALID_FEATURE_BIT
+};
+
+static const int *vhost_net_get_feature_bits(struct vhost_net *net)
{
- /* Clear features not supported by host kernel. */
- if (!(net->dev.features & (1 << VIRTIO_F_NOTIFY_ON_EMPTY))) {
- features &= ~(1 << VIRTIO_F_NOTIFY_ON_EMPTY);
- }
- if (!(net->dev.features & (1 << VIRTIO_RING_F_INDIRECT_DESC))) {
- features &= ~(1 << VIRTIO_RING_F_INDIRECT_DESC);
- }
- if (!(net->dev.features & (1 << VIRTIO_RING_F_EVENT_IDX))) {
- features &= ~(1 << VIRTIO_RING_F_EVENT_IDX);
- }
- if (!(net->dev.features & (1 << VIRTIO_NET_F_MRG_RXBUF))) {
- features &= ~(1 << VIRTIO_NET_F_MRG_RXBUF);
+ const int *feature_bits = 0;
+
+ switch (net->nc->info->type) {
+ case NET_CLIENT_OPTIONS_KIND_TAP:
+ feature_bits = kernel_feature_bits;
+ break;
+ case NET_CLIENT_OPTIONS_KIND_VHOST_USER:
+ feature_bits = user_feature_bits;
+ break;
+ default:
+ error_report("Feature bits not defined for this type: %d",
+ net->nc->info->type);
+ break;
}
- return features;
+
+ return feature_bits;
+}
+
+unsigned vhost_net_get_features(struct vhost_net *net, unsigned features)
+{
+ return vhost_get_features(&net->dev, vhost_net_get_feature_bits(net),
+ features);
}
void vhost_net_ack_features(struct vhost_net *net, unsigned features)
{
- net->dev.acked_features = net->dev.backend_features;
- if (features & (1 << VIRTIO_F_NOTIFY_ON_EMPTY)) {
- net->dev.acked_features |= (1 << VIRTIO_F_NOTIFY_ON_EMPTY);
- }
- if (features & (1 << VIRTIO_RING_F_INDIRECT_DESC)) {
- net->dev.acked_features |= (1 << VIRTIO_RING_F_INDIRECT_DESC);
- }
- if (features & (1 << VIRTIO_RING_F_EVENT_IDX)) {
- net->dev.acked_features |= (1 << VIRTIO_RING_F_EVENT_IDX);
- }
- if (features & (1 << VIRTIO_NET_F_MRG_RXBUF)) {
- net->dev.acked_features |= (1 << VIRTIO_NET_F_MRG_RXBUF);
- }
+ vhost_ack_features(&net->dev, vhost_net_get_feature_bits(net), features);
}
static int vhost_net_get_fd(NetClientState *backend)
@@ -92,42 +129,52 @@ static int vhost_net_get_fd(NetClientState *backend)
}
}
-struct vhost_net *vhost_net_init(NetClientState *backend, int devfd,
- bool force)
+struct vhost_net *vhost_net_init(VhostNetOptions *options)
{
int r;
+ bool backend_kernel = options->backend_type == VHOST_BACKEND_TYPE_KERNEL;
struct vhost_net *net = g_malloc(sizeof *net);
- if (!backend) {
- fprintf(stderr, "vhost-net requires backend to be setup\n");
+
+ if (!options->net_backend) {
+ fprintf(stderr, "vhost-net requires net backend to be setup\n");
goto fail;
}
- r = vhost_net_get_fd(backend);
- if (r < 0) {
- goto fail;
+
+ if (backend_kernel) {
+ r = vhost_net_get_fd(options->net_backend);
+ if (r < 0) {
+ goto fail;
+ }
+ net->dev.backend_features = qemu_has_vnet_hdr(options->net_backend)
+ ? 0 : (1 << VHOST_NET_F_VIRTIO_NET_HDR);
+ net->backend = r;
+ } else {
+ net->dev.backend_features = 0;
+ net->backend = -1;
}
- net->nc = backend;
- net->dev.backend_features = qemu_has_vnet_hdr(backend) ? 0 :
- (1 << VHOST_NET_F_VIRTIO_NET_HDR);
- net->backend = r;
+ net->nc = options->net_backend;
net->dev.nvqs = 2;
net->dev.vqs = net->vqs;
- r = vhost_dev_init(&net->dev, devfd, "/dev/vhost-net", force);
+ r = vhost_dev_init(&net->dev, options->opaque,
+ options->backend_type, options->force);
if (r < 0) {
goto fail;
}
- if (!qemu_has_vnet_hdr_len(backend,
+ if (!qemu_has_vnet_hdr_len(options->net_backend,
sizeof(struct virtio_net_hdr_mrg_rxbuf))) {
net->dev.features &= ~(1 << VIRTIO_NET_F_MRG_RXBUF);
}
- if (~net->dev.features & net->dev.backend_features) {
- fprintf(stderr, "vhost lacks feature mask %" PRIu64 " for backend\n",
- (uint64_t)(~net->dev.features & net->dev.backend_features));
- vhost_dev_cleanup(&net->dev);
- goto fail;
+ if (backend_kernel) {
+ if (~net->dev.features & net->dev.backend_features) {
+ fprintf(stderr, "vhost lacks feature mask %" PRIu64
+ " for backend\n",
+ (uint64_t)(~net->dev.features & net->dev.backend_features));
+ vhost_dev_cleanup(&net->dev);
+ goto fail;
+ }
}
-
/* Set sane init value. Override when guest acks. */
vhost_net_ack_features(net, 0);
return net;
@@ -166,24 +213,37 @@ static int vhost_net_start_one(struct vhost_net *net,
goto fail_start;
}
- net->nc->info->poll(net->nc, false);
- qemu_set_fd_handler(net->backend, NULL, NULL, NULL);
- file.fd = net->backend;
- for (file.index = 0; file.index < net->dev.nvqs; ++file.index) {
- r = ioctl(net->dev.control, VHOST_NET_SET_BACKEND, &file);
- if (r < 0) {
- r = -errno;
- goto fail;
+ if (net->nc->info->poll) {
+ net->nc->info->poll(net->nc, false);
+ }
+
+ if (net->nc->info->type == NET_CLIENT_OPTIONS_KIND_TAP) {
+ qemu_set_fd_handler(net->backend, NULL, NULL, NULL);
+ file.fd = net->backend;
+ for (file.index = 0; file.index < net->dev.nvqs; ++file.index) {
+ const VhostOps *vhost_ops = net->dev.vhost_ops;
+ r = vhost_ops->vhost_call(&net->dev, VHOST_NET_SET_BACKEND,
+ &file);
+ if (r < 0) {
+ r = -errno;
+ goto fail;
+ }
}
}
return 0;
fail:
file.fd = -1;
- while (file.index-- > 0) {
- int r = ioctl(net->dev.control, VHOST_NET_SET_BACKEND, &file);
- assert(r >= 0);
+ if (net->nc->info->type == NET_CLIENT_OPTIONS_KIND_TAP) {
+ while (file.index-- > 0) {
+ const VhostOps *vhost_ops = net->dev.vhost_ops;
+ int r = vhost_ops->vhost_call(&net->dev, VHOST_NET_SET_BACKEND,
+ &file);
+ assert(r >= 0);
+ }
+ }
+ if (net->nc->info->poll) {
+ net->nc->info->poll(net->nc, true);
}
- net->nc->info->poll(net->nc, true);
vhost_dev_stop(&net->dev, dev);
fail_start:
vhost_dev_disable_notifiers(&net->dev, dev);
@@ -200,11 +260,17 @@ static void vhost_net_stop_one(struct vhost_net *net,
return;
}
- for (file.index = 0; file.index < net->dev.nvqs; ++file.index) {
- int r = ioctl(net->dev.control, VHOST_NET_SET_BACKEND, &file);
- assert(r >= 0);
+ if (net->nc->info->type == NET_CLIENT_OPTIONS_KIND_TAP) {
+ for (file.index = 0; file.index < net->dev.nvqs; ++file.index) {
+ const VhostOps *vhost_ops = net->dev.vhost_ops;
+ int r = vhost_ops->vhost_call(&net->dev, VHOST_NET_SET_BACKEND,
+ &file);
+ assert(r >= 0);
+ }
+ }
+ if (net->nc->info->poll) {
+ net->nc->info->poll(net->nc, true);
}
- net->nc->info->poll(net->nc, true);
vhost_dev_stop(&net->dev, dev);
vhost_dev_disable_notifiers(&net->dev, dev);
}
@@ -224,7 +290,7 @@ int vhost_net_start(VirtIODevice *dev, NetClientState *ncs,
}
for (i = 0; i < total_queues; i++) {
- r = vhost_net_start_one(tap_get_vhost_net(ncs[i].peer), dev, i * 2);
+ r = vhost_net_start_one(get_vhost_net(ncs[i].peer), dev, i * 2);
if (r < 0) {
goto err;
@@ -241,7 +307,7 @@ int vhost_net_start(VirtIODevice *dev, NetClientState *ncs,
err:
while (--i >= 0) {
- vhost_net_stop_one(tap_get_vhost_net(ncs[i].peer), dev);
+ vhost_net_stop_one(get_vhost_net(ncs[i].peer), dev);
}
return r;
}
@@ -262,7 +328,7 @@ void vhost_net_stop(VirtIODevice *dev, NetClientState *ncs,
assert(r >= 0);
for (i = 0; i < total_queues; i++) {
- vhost_net_stop_one(tap_get_vhost_net(ncs[i].peer), dev);
+ vhost_net_stop_one(get_vhost_net(ncs[i].peer), dev);
}
}
@@ -282,9 +348,30 @@ void vhost_net_virtqueue_mask(VHostNetState *net, VirtIODevice *dev,
{
vhost_virtqueue_mask(&net->dev, dev, idx, mask);
}
+
+VHostNetState *get_vhost_net(NetClientState *nc)
+{
+ VHostNetState *vhost_net = 0;
+
+ if (!nc) {
+ return 0;
+ }
+
+ switch (nc->info->type) {
+ case NET_CLIENT_OPTIONS_KIND_TAP:
+ vhost_net = tap_get_vhost_net(nc);
+ break;
+ case NET_CLIENT_OPTIONS_KIND_VHOST_USER:
+ vhost_net = vhost_user_get_vhost_net(nc);
+ break;
+ default:
+ break;
+ }
+
+ return vhost_net;
+}
#else
-struct vhost_net *vhost_net_init(NetClientState *backend, int devfd,
- bool force)
+struct vhost_net *vhost_net_init(VhostNetOptions *options)
{
error_report("vhost-net support is not compiled in");
return NULL;
@@ -328,4 +415,9 @@ void vhost_net_virtqueue_mask(VHostNetState *net, VirtIODevice *dev,
int idx, bool mask)
{
}
+
+VHostNetState *get_vhost_net(NetClientState *nc)
+{
+ return 0;
+}
#endif
diff --git a/hw/net/virtio-net.c b/hw/net/virtio-net.c
index 940a7cfe54..d8588f3808 100644
--- a/hw/net/virtio-net.c
+++ b/hw/net/virtio-net.c
@@ -99,20 +99,23 @@ static bool virtio_net_started(VirtIONet *n, uint8_t status)
(n->status & VIRTIO_NET_S_LINK_UP) && vdev->vm_running;
}
+static void virtio_net_announce_timer(void *opaque)
+{
+ VirtIONet *n = opaque;
+ VirtIODevice *vdev = VIRTIO_DEVICE(n);
+
+ n->announce_counter--;
+ n->status |= VIRTIO_NET_S_ANNOUNCE;
+ virtio_notify_config(vdev);
+}
+
static void virtio_net_vhost_status(VirtIONet *n, uint8_t status)
{
VirtIODevice *vdev = VIRTIO_DEVICE(n);
NetClientState *nc = qemu_get_queue(n->nic);
int queues = n->multiqueue ? n->max_queues : 1;
- if (!nc->peer) {
- return;
- }
- if (nc->peer->info->type != NET_CLIENT_OPTIONS_KIND_TAP) {
- return;
- }
-
- if (!tap_get_vhost_net(nc->peer)) {
+ if (!get_vhost_net(nc->peer)) {
return;
}
@@ -122,7 +125,7 @@ static void virtio_net_vhost_status(VirtIONet *n, uint8_t status)
}
if (!n->vhost_started) {
int r;
- if (!vhost_net_query(tap_get_vhost_net(nc->peer), vdev)) {
+ if (!vhost_net_query(get_vhost_net(nc->peer), vdev)) {
return;
}
n->vhost_started = 1;
@@ -322,6 +325,9 @@ static void virtio_net_reset(VirtIODevice *vdev)
n->nobcast = 0;
/* multiqueue is disabled by default */
n->curr_queues = 1;
+ timer_del(n->announce_timer);
+ n->announce_counter = 0;
+ n->status &= ~VIRTIO_NET_S_ANNOUNCE;
/* Flush any MAC and VLAN filter table state */
n->mac_table.in_use = 0;
@@ -452,13 +458,10 @@ static uint32_t virtio_net_get_features(VirtIODevice *vdev, uint32_t features)
features &= ~(0x1 << VIRTIO_NET_F_HOST_UFO);
}
- if (!nc->peer || nc->peer->info->type != NET_CLIENT_OPTIONS_KIND_TAP) {
+ if (!get_vhost_net(nc->peer)) {
return features;
}
- if (!tap_get_vhost_net(nc->peer)) {
- return features;
- }
- return vhost_net_get_features(tap_get_vhost_net(nc->peer), features);
+ return vhost_net_get_features(get_vhost_net(nc->peer), features);
}
static uint32_t virtio_net_bad_features(VirtIODevice *vdev)
@@ -522,13 +525,10 @@ static void virtio_net_set_features(VirtIODevice *vdev, uint32_t features)
for (i = 0; i < n->max_queues; i++) {
NetClientState *nc = qemu_get_subqueue(n->nic, i);
- if (!nc->peer || nc->peer->info->type != NET_CLIENT_OPTIONS_KIND_TAP) {
- continue;
- }
- if (!tap_get_vhost_net(nc->peer)) {
+ if (!get_vhost_net(nc->peer)) {
continue;
}
- vhost_net_ack_features(tap_get_vhost_net(nc->peer), features);
+ vhost_net_ack_features(get_vhost_net(nc->peer), features);
}
if ((1 << VIRTIO_NET_F_CTRL_VLAN) & features) {
@@ -731,6 +731,23 @@ static int virtio_net_handle_vlan_table(VirtIONet *n, uint8_t cmd,
return VIRTIO_NET_OK;
}
+static int virtio_net_handle_announce(VirtIONet *n, uint8_t cmd,
+ struct iovec *iov, unsigned int iov_cnt)
+{
+ if (cmd == VIRTIO_NET_CTRL_ANNOUNCE_ACK &&
+ n->status & VIRTIO_NET_S_ANNOUNCE) {
+ n->status &= ~VIRTIO_NET_S_ANNOUNCE;
+ if (n->announce_counter) {
+ timer_mod(n->announce_timer,
+ qemu_clock_get_ms(QEMU_CLOCK_VIRTUAL) +
+ self_announce_delay(n->announce_counter));
+ }
+ return VIRTIO_NET_OK;
+ } else {
+ return VIRTIO_NET_ERR;
+ }
+}
+
static int virtio_net_handle_mq(VirtIONet *n, uint8_t cmd,
struct iovec *iov, unsigned int iov_cnt)
{
@@ -794,6 +811,8 @@ static void virtio_net_handle_ctrl(VirtIODevice *vdev, VirtQueue *vq)
status = virtio_net_handle_mac(n, ctrl.cmd, iov, iov_cnt);
} else if (ctrl.class == VIRTIO_NET_CTRL_VLAN) {
status = virtio_net_handle_vlan_table(n, ctrl.cmd, iov, iov_cnt);
+ } else if (ctrl.class == VIRTIO_NET_CTRL_ANNOUNCE) {
+ status = virtio_net_handle_announce(n, ctrl.cmd, iov, iov_cnt);
} else if (ctrl.class == VIRTIO_NET_CTRL_MQ) {
status = virtio_net_handle_mq(n, ctrl.cmd, iov, iov_cnt);
} else if (ctrl.class == VIRTIO_NET_CTRL_GUEST_OFFLOADS) {
@@ -1451,6 +1470,12 @@ static int virtio_net_load(QEMUFile *f, void *opaque, int version_id)
qemu_get_subqueue(n->nic, i)->link_down = link_down;
}
+ if (vdev->guest_features & (0x1 << VIRTIO_NET_F_GUEST_ANNOUNCE) &&
+ vdev->guest_features & (0x1 << VIRTIO_NET_F_CTRL_VQ)) {
+ n->announce_counter = SELF_ANNOUNCE_ROUNDS;
+ timer_mod(n->announce_timer, qemu_clock_get_ms(QEMU_CLOCK_VIRTUAL));
+ }
+
return 0;
}
@@ -1476,7 +1501,7 @@ static bool virtio_net_guest_notifier_pending(VirtIODevice *vdev, int idx)
VirtIONet *n = VIRTIO_NET(vdev);
NetClientState *nc = qemu_get_subqueue(n->nic, vq2q(idx));
assert(n->vhost_started);
- return vhost_net_virtqueue_pending(tap_get_vhost_net(nc->peer), idx);
+ return vhost_net_virtqueue_pending(get_vhost_net(nc->peer), idx);
}
static void virtio_net_guest_notifier_mask(VirtIODevice *vdev, int idx,
@@ -1485,7 +1510,7 @@ static void virtio_net_guest_notifier_mask(VirtIODevice *vdev, int idx,
VirtIONet *n = VIRTIO_NET(vdev);
NetClientState *nc = qemu_get_subqueue(n->nic, vq2q(idx));
assert(n->vhost_started);
- vhost_net_virtqueue_mask(tap_get_vhost_net(nc->peer),
+ vhost_net_virtqueue_mask(get_vhost_net(nc->peer),
vdev, idx, mask);
}
@@ -1509,18 +1534,9 @@ void virtio_net_set_netclient_name(VirtIONet *n, const char *name,
*/
assert(type != NULL);
- if (n->netclient_name) {
- g_free(n->netclient_name);
- n->netclient_name = NULL;
- }
- if (n->netclient_type) {
- g_free(n->netclient_type);
- n->netclient_type = NULL;
- }
-
- if (name != NULL) {
- n->netclient_name = g_strdup(name);
- }
+ g_free(n->netclient_name);
+ g_free(n->netclient_type);
+ n->netclient_name = g_strdup(name);
n->netclient_type = g_strdup(type);
}
@@ -1562,6 +1578,8 @@ static void virtio_net_device_realize(DeviceState *dev, Error **errp)
qemu_macaddr_default_if_unset(&n->nic_conf.macaddr);
memcpy(&n->mac[0], &n->nic_conf.macaddr, sizeof(n->mac));
n->status = VIRTIO_NET_S_LINK_UP;
+ n->announce_timer = timer_new_ms(QEMU_CLOCK_VIRTUAL,
+ virtio_net_announce_timer, n);
if (n->netclient_type) {
/*
@@ -1616,14 +1634,10 @@ static void virtio_net_device_unrealize(DeviceState *dev, Error **errp)
unregister_savevm(dev, "virtio-net", n);
- if (n->netclient_name) {
- g_free(n->netclient_name);
- n->netclient_name = NULL;
- }
- if (n->netclient_type) {
- g_free(n->netclient_type);
- n->netclient_type = NULL;
- }
+ g_free(n->netclient_name);
+ n->netclient_name = NULL;
+ g_free(n->netclient_type);
+ n->netclient_type = NULL;
g_free(n->mac_table.macs);
g_free(n->vlans);
@@ -1642,6 +1656,8 @@ static void virtio_net_device_unrealize(DeviceState *dev, Error **errp)
}
}
+ timer_del(n->announce_timer);
+ timer_free(n->announce_timer);
g_free(n->vqs);
qemu_del_nic(n->nic);
virtio_cleanup(vdev);
diff --git a/hw/ppc/spapr.c b/hw/ppc/spapr.c
index e06321cf15..82f183f173 100644
--- a/hw/ppc/spapr.c
+++ b/hw/ppc/spapr.c
@@ -673,8 +673,8 @@ static int spapr_populate_memory(sPAPREnvironment *spapr, void *fdt)
int i, off;
/* memory node(s) */
- if (nb_numa_nodes > 1 && node_mem[0] < ram_size) {
- node0_size = node_mem[0];
+ if (nb_numa_nodes > 1 && numa_info[0].node_mem < ram_size) {
+ node0_size = numa_info[0].node_mem;
} else {
node0_size = ram_size;
}
@@ -712,7 +712,7 @@ static int spapr_populate_memory(sPAPREnvironment *spapr, void *fdt)
if (mem_start >= ram_size) {
node_size = 0;
} else {
- node_size = node_mem[i];
+ node_size = numa_info[i].node_mem;
if (node_size > ram_size - mem_start) {
node_size = ram_size - mem_start;
}
@@ -857,7 +857,8 @@ static void spapr_reset_htab(sPAPREnvironment *spapr)
/* Update the RMA size if necessary */
if (spapr->vrma_adjust) {
- hwaddr node0_size = (nb_numa_nodes > 1) ? node_mem[0] : ram_size;
+ hwaddr node0_size = (nb_numa_nodes > 1) ?
+ numa_info[0].node_mem : ram_size;
spapr->rma_size = kvmppc_rma_size(node0_size, spapr->htab_shift);
}
}
@@ -1289,7 +1290,7 @@ static void ppc_spapr_init(MachineState *machine)
MemoryRegion *sysmem = get_system_memory();
MemoryRegion *ram = g_new(MemoryRegion, 1);
hwaddr rma_alloc_size;
- hwaddr node0_size = (nb_numa_nodes > 1) ? node_mem[0] : ram_size;
+ hwaddr node0_size = (nb_numa_nodes > 1) ? numa_info[0].node_mem : ram_size;
uint32_t initrd_base = 0;
long kernel_size = 0, initrd_size = 0;
long load_limit, rtas_limit, fw_size;
diff --git a/hw/scsi/vhost-scsi.c b/hw/scsi/vhost-scsi.c
index cc8df4ef03..ddfe76aed0 100644
--- a/hw/scsi/vhost-scsi.c
+++ b/hw/scsi/vhost-scsi.c
@@ -24,15 +24,25 @@
#include "hw/virtio/virtio-scsi.h"
#include "hw/virtio/virtio-bus.h"
+/* Features supported by host kernel. */
+static const int kernel_feature_bits[] = {
+ VIRTIO_F_NOTIFY_ON_EMPTY,
+ VIRTIO_RING_F_INDIRECT_DESC,
+ VIRTIO_RING_F_EVENT_IDX,
+ VIRTIO_SCSI_F_HOTPLUG,
+ VHOST_INVALID_FEATURE_BIT
+};
+
static int vhost_scsi_set_endpoint(VHostSCSI *s)
{
VirtIOSCSICommon *vs = VIRTIO_SCSI_COMMON(s);
+ const VhostOps *vhost_ops = s->dev.vhost_ops;
struct vhost_scsi_target backend;
int ret;
memset(&backend, 0, sizeof(backend));
pstrcpy(backend.vhost_wwpn, sizeof(backend.vhost_wwpn), vs->conf.wwpn);
- ret = ioctl(s->dev.control, VHOST_SCSI_SET_ENDPOINT, &backend);
+ ret = vhost_ops->vhost_call(&s->dev, VHOST_SCSI_SET_ENDPOINT, &backend);
if (ret < 0) {
return -errno;
}
@@ -43,10 +53,11 @@ static void vhost_scsi_clear_endpoint(VHostSCSI *s)
{
VirtIOSCSICommon *vs = VIRTIO_SCSI_COMMON(s);
struct vhost_scsi_target backend;
+ const VhostOps *vhost_ops = s->dev.vhost_ops;
memset(&backend, 0, sizeof(backend));
pstrcpy(backend.vhost_wwpn, sizeof(backend.vhost_wwpn), vs->conf.wwpn);
- ioctl(s->dev.control, VHOST_SCSI_CLEAR_ENDPOINT, &backend);
+ vhost_ops->vhost_call(&s->dev, VHOST_SCSI_CLEAR_ENDPOINT, &backend);
}
static int vhost_scsi_start(VHostSCSI *s)
@@ -55,13 +66,15 @@ static int vhost_scsi_start(VHostSCSI *s)
VirtIODevice *vdev = VIRTIO_DEVICE(s);
BusState *qbus = BUS(qdev_get_parent_bus(DEVICE(vdev)));
VirtioBusClass *k = VIRTIO_BUS_GET_CLASS(qbus);
+ const VhostOps *vhost_ops = s->dev.vhost_ops;
if (!k->set_guest_notifiers) {
error_report("binding does not support guest notifiers");
return -ENOSYS;
}
- ret = ioctl(s->dev.control, VHOST_SCSI_GET_ABI_VERSION, &abi_version);
+ ret = vhost_ops->vhost_call(&s->dev,
+ VHOST_SCSI_GET_ABI_VERSION, &abi_version);
if (ret < 0) {
return -errno;
}
@@ -141,21 +154,7 @@ static uint32_t vhost_scsi_get_features(VirtIODevice *vdev,
{
VHostSCSI *s = VHOST_SCSI(vdev);
- /* Clear features not supported by host kernel. */
- if (!(s->dev.features & (1 << VIRTIO_F_NOTIFY_ON_EMPTY))) {
- features &= ~(1 << VIRTIO_F_NOTIFY_ON_EMPTY);
- }
- if (!(s->dev.features & (1 << VIRTIO_RING_F_INDIRECT_DESC))) {
- features &= ~(1 << VIRTIO_RING_F_INDIRECT_DESC);
- }
- if (!(s->dev.features & (1 << VIRTIO_RING_F_EVENT_IDX))) {
- features &= ~(1 << VIRTIO_RING_F_EVENT_IDX);
- }
- if (!(s->dev.features & (1 << VIRTIO_SCSI_F_HOTPLUG))) {
- features &= ~(1 << VIRTIO_SCSI_F_HOTPLUG);
- }
-
- return features;
+ return vhost_get_features(&s->dev, kernel_feature_bits, features);
}
static void vhost_scsi_set_config(VirtIODevice *vdev,
@@ -219,6 +218,13 @@ static void vhost_scsi_realize(DeviceState *dev, Error **errp)
error_setg(errp, "vhost-scsi: unable to parse vhostfd");
return;
}
+ } else {
+ vhostfd = open("/dev/vhost-scsi", O_RDWR);
+ if (vhostfd < 0) {
+ error_setg(errp, "vhost-scsi: open vhost char device failed: %s",
+ strerror(errno));
+ return;
+ }
}
virtio_scsi_common_realize(dev, &err, vhost_dummy_handle_output,
@@ -233,7 +239,8 @@ static void vhost_scsi_realize(DeviceState *dev, Error **errp)
s->dev.vqs = g_new(struct vhost_virtqueue, s->dev.nvqs);
s->dev.vq_index = 0;
- ret = vhost_dev_init(&s->dev, vhostfd, "/dev/vhost-scsi", true);
+ ret = vhost_dev_init(&s->dev, (void *)(uintptr_t)vhostfd,
+ VHOST_BACKEND_TYPE_KERNEL, true);
if (ret < 0) {
error_setg(errp, "vhost-scsi: vhost initialization failed: %s",
strerror(-ret));
diff --git a/hw/virtio/Makefile.objs b/hw/virtio/Makefile.objs
index 1ba53d9cc3..ec9e855bc1 100644
--- a/hw/virtio/Makefile.objs
+++ b/hw/virtio/Makefile.objs
@@ -5,4 +5,4 @@ common-obj-y += virtio-mmio.o
common-obj-$(CONFIG_VIRTIO_BLK_DATA_PLANE) += dataplane/
obj-y += virtio.o virtio-balloon.o
-obj-$(CONFIG_LINUX) += vhost.o
+obj-$(CONFIG_LINUX) += vhost.o vhost-backend.o vhost-user.o
diff --git a/hw/virtio/vhost-backend.c b/hw/virtio/vhost-backend.c
new file mode 100644
index 0000000000..35316c40d9
--- /dev/null
+++ b/hw/virtio/vhost-backend.c
@@ -0,0 +1,71 @@
+/*
+ * vhost-backend
+ *
+ * Copyright (c) 2013 Virtual Open Systems Sarl.
+ *
+ * This work is licensed under the terms of the GNU GPL, version 2 or later.
+ * See the COPYING file in the top-level directory.
+ *
+ */
+
+#include "hw/virtio/vhost.h"
+#include "hw/virtio/vhost-backend.h"
+#include "qemu/error-report.h"
+
+#include <sys/ioctl.h>
+
+extern const VhostOps user_ops;
+
+static int vhost_kernel_call(struct vhost_dev *dev, unsigned long int request,
+ void *arg)
+{
+ int fd = (uintptr_t) dev->opaque;
+
+ assert(dev->vhost_ops->backend_type == VHOST_BACKEND_TYPE_KERNEL);
+
+ return ioctl(fd, request, arg);
+}
+
+static int vhost_kernel_init(struct vhost_dev *dev, void *opaque)
+{
+ assert(dev->vhost_ops->backend_type == VHOST_BACKEND_TYPE_KERNEL);
+
+ dev->opaque = opaque;
+
+ return 0;
+}
+
+static int vhost_kernel_cleanup(struct vhost_dev *dev)
+{
+ int fd = (uintptr_t) dev->opaque;
+
+ assert(dev->vhost_ops->backend_type == VHOST_BACKEND_TYPE_KERNEL);
+
+ return close(fd);
+}
+
+static const VhostOps kernel_ops = {
+ .backend_type = VHOST_BACKEND_TYPE_KERNEL,
+ .vhost_call = vhost_kernel_call,
+ .vhost_backend_init = vhost_kernel_init,
+ .vhost_backend_cleanup = vhost_kernel_cleanup
+};
+
+int vhost_set_backend_type(struct vhost_dev *dev, VhostBackendType backend_type)
+{
+ int r = 0;
+
+ switch (backend_type) {
+ case VHOST_BACKEND_TYPE_KERNEL:
+ dev->vhost_ops = &kernel_ops;
+ break;
+ case VHOST_BACKEND_TYPE_USER:
+ dev->vhost_ops = &user_ops;
+ break;
+ default:
+ error_report("Unknown vhost backend type\n");
+ r = -1;
+ }
+
+ return r;
+}
diff --git a/hw/virtio/vhost-user.c b/hw/virtio/vhost-user.c
new file mode 100644
index 0000000000..0df6a936a0
--- /dev/null
+++ b/hw/virtio/vhost-user.c
@@ -0,0 +1,342 @@
+/*
+ * vhost-user
+ *
+ * Copyright (c) 2013 Virtual Open Systems Sarl.
+ *
+ * This work is licensed under the terms of the GNU GPL, version 2 or later.
+ * See the COPYING file in the top-level directory.
+ *
+ */
+
+#include "hw/virtio/vhost.h"
+#include "hw/virtio/vhost-backend.h"
+#include "sysemu/char.h"
+#include "sysemu/kvm.h"
+#include "qemu/error-report.h"
+#include "qemu/sockets.h"
+
+#include <fcntl.h>
+#include <unistd.h>
+#include <sys/ioctl.h>
+#include <sys/socket.h>
+#include <sys/un.h>
+#include <linux/vhost.h>
+
+#define VHOST_MEMORY_MAX_NREGIONS 8
+
+typedef enum VhostUserRequest {
+ VHOST_USER_NONE = 0,
+ VHOST_USER_GET_FEATURES = 1,
+ VHOST_USER_SET_FEATURES = 2,
+ VHOST_USER_SET_OWNER = 3,
+ VHOST_USER_RESET_OWNER = 4,
+ VHOST_USER_SET_MEM_TABLE = 5,
+ VHOST_USER_SET_LOG_BASE = 6,
+ VHOST_USER_SET_LOG_FD = 7,
+ VHOST_USER_SET_VRING_NUM = 8,
+ VHOST_USER_SET_VRING_ADDR = 9,
+ VHOST_USER_SET_VRING_BASE = 10,
+ VHOST_USER_GET_VRING_BASE = 11,
+ VHOST_USER_SET_VRING_KICK = 12,
+ VHOST_USER_SET_VRING_CALL = 13,
+ VHOST_USER_SET_VRING_ERR = 14,
+ VHOST_USER_MAX
+} VhostUserRequest;
+
+typedef struct VhostUserMemoryRegion {
+ uint64_t guest_phys_addr;
+ uint64_t memory_size;
+ uint64_t userspace_addr;
+} VhostUserMemoryRegion;
+
+typedef struct VhostUserMemory {
+ uint32_t nregions;
+ uint32_t padding;
+ VhostUserMemoryRegion regions[VHOST_MEMORY_MAX_NREGIONS];
+} VhostUserMemory;
+
+typedef struct VhostUserMsg {
+ VhostUserRequest request;
+
+#define VHOST_USER_VERSION_MASK (0x3)
+#define VHOST_USER_REPLY_MASK (0x1<<2)
+ uint32_t flags;
+ uint32_t size; /* the following payload size */
+ union {
+#define VHOST_USER_VRING_IDX_MASK (0xff)
+#define VHOST_USER_VRING_NOFD_MASK (0x1<<8)
+ uint64_t u64;
+ struct vhost_vring_state state;
+ struct vhost_vring_addr addr;
+ VhostUserMemory memory;
+ };
+} QEMU_PACKED VhostUserMsg;
+
+static VhostUserMsg m __attribute__ ((unused));
+#define VHOST_USER_HDR_SIZE (sizeof(m.request) \
+ + sizeof(m.flags) \
+ + sizeof(m.size))
+
+#define VHOST_USER_PAYLOAD_SIZE (sizeof(m) - VHOST_USER_HDR_SIZE)
+
+/* The version of the protocol we support */
+#define VHOST_USER_VERSION (0x1)
+
+static bool ioeventfd_enabled(void)
+{
+ return kvm_enabled() && kvm_eventfds_enabled();
+}
+
+static unsigned long int ioctl_to_vhost_user_request[VHOST_USER_MAX] = {
+ -1, /* VHOST_USER_NONE */
+ VHOST_GET_FEATURES, /* VHOST_USER_GET_FEATURES */
+ VHOST_SET_FEATURES, /* VHOST_USER_SET_FEATURES */
+ VHOST_SET_OWNER, /* VHOST_USER_SET_OWNER */
+ VHOST_RESET_OWNER, /* VHOST_USER_RESET_OWNER */
+ VHOST_SET_MEM_TABLE, /* VHOST_USER_SET_MEM_TABLE */
+ VHOST_SET_LOG_BASE, /* VHOST_USER_SET_LOG_BASE */
+ VHOST_SET_LOG_FD, /* VHOST_USER_SET_LOG_FD */
+ VHOST_SET_VRING_NUM, /* VHOST_USER_SET_VRING_NUM */
+ VHOST_SET_VRING_ADDR, /* VHOST_USER_SET_VRING_ADDR */
+ VHOST_SET_VRING_BASE, /* VHOST_USER_SET_VRING_BASE */
+ VHOST_GET_VRING_BASE, /* VHOST_USER_GET_VRING_BASE */
+ VHOST_SET_VRING_KICK, /* VHOST_USER_SET_VRING_KICK */
+ VHOST_SET_VRING_CALL, /* VHOST_USER_SET_VRING_CALL */
+ VHOST_SET_VRING_ERR /* VHOST_USER_SET_VRING_ERR */
+};
+
+static VhostUserRequest vhost_user_request_translate(unsigned long int request)
+{
+ VhostUserRequest idx;
+
+ for (idx = 0; idx < VHOST_USER_MAX; idx++) {
+ if (ioctl_to_vhost_user_request[idx] == request) {
+ break;
+ }
+ }
+
+ return (idx == VHOST_USER_MAX) ? VHOST_USER_NONE : idx;
+}
+
+static int vhost_user_read(struct vhost_dev *dev, VhostUserMsg *msg)
+{
+ CharDriverState *chr = dev->opaque;
+ uint8_t *p = (uint8_t *) msg;
+ int r, size = VHOST_USER_HDR_SIZE;
+
+ r = qemu_chr_fe_read_all(chr, p, size);
+ if (r != size) {
+ error_report("Failed to read msg header. Read %d instead of %d.\n", r,
+ size);
+ goto fail;
+ }
+
+ /* validate received flags */
+ if (msg->flags != (VHOST_USER_REPLY_MASK | VHOST_USER_VERSION)) {
+ error_report("Failed to read msg header."
+ " Flags 0x%x instead of 0x%x.\n", msg->flags,
+ VHOST_USER_REPLY_MASK | VHOST_USER_VERSION);
+ goto fail;
+ }
+
+ /* validate message size is sane */
+ if (msg->size > VHOST_USER_PAYLOAD_SIZE) {
+ error_report("Failed to read msg header."
+ " Size %d exceeds the maximum %zu.\n", msg->size,
+ VHOST_USER_PAYLOAD_SIZE);
+ goto fail;
+ }
+
+ if (msg->size) {
+ p += VHOST_USER_HDR_SIZE;
+ size = msg->size;
+ r = qemu_chr_fe_read_all(chr, p, size);
+ if (r != size) {
+ error_report("Failed to read msg payload."
+ " Read %d instead of %d.\n", r, msg->size);
+ goto fail;
+ }
+ }
+
+ return 0;
+
+fail:
+ return -1;
+}
+
+static int vhost_user_write(struct vhost_dev *dev, VhostUserMsg *msg,
+ int *fds, int fd_num)
+{
+ CharDriverState *chr = dev->opaque;
+ int size = VHOST_USER_HDR_SIZE + msg->size;
+
+ if (fd_num) {
+ qemu_chr_fe_set_msgfds(chr, fds, fd_num);
+ }
+
+ return qemu_chr_fe_write_all(chr, (const uint8_t *) msg, size) == size ?
+ 0 : -1;
+}
+
+static int vhost_user_call(struct vhost_dev *dev, unsigned long int request,
+ void *arg)
+{
+ VhostUserMsg msg;
+ VhostUserRequest msg_request;
+ RAMBlock *block = 0;
+ struct vhost_vring_file *file = 0;
+ int need_reply = 0;
+ int fds[VHOST_MEMORY_MAX_NREGIONS];
+ size_t fd_num = 0;
+
+ assert(dev->vhost_ops->backend_type == VHOST_BACKEND_TYPE_USER);
+
+ msg_request = vhost_user_request_translate(request);
+ msg.request = msg_request;
+ msg.flags = VHOST_USER_VERSION;
+ msg.size = 0;
+
+ switch (request) {
+ case VHOST_GET_FEATURES:
+ need_reply = 1;
+ break;
+
+ case VHOST_SET_FEATURES:
+ case VHOST_SET_LOG_BASE:
+ msg.u64 = *((__u64 *) arg);
+ msg.size = sizeof(m.u64);
+ break;
+
+ case VHOST_SET_OWNER:
+ case VHOST_RESET_OWNER:
+ break;
+
+ case VHOST_SET_MEM_TABLE:
+ QTAILQ_FOREACH(block, &ram_list.blocks, next)
+ {
+ if (block->fd > 0) {
+ msg.memory.regions[fd_num].userspace_addr =
+ (uintptr_t) block->host;
+ msg.memory.regions[fd_num].memory_size = block->length;
+ msg.memory.regions[fd_num].guest_phys_addr = block->offset;
+ fds[fd_num++] = block->fd;
+ }
+ }
+
+ msg.memory.nregions = fd_num;
+
+ if (!fd_num) {
+ error_report("Failed initializing vhost-user memory map\n"
+ "consider using -object memory-backend-file share=on\n");
+ return -1;
+ }
+
+ msg.size = sizeof(m.memory.nregions);
+ msg.size += sizeof(m.memory.padding);
+ msg.size += fd_num * sizeof(VhostUserMemoryRegion);
+
+ break;
+
+ case VHOST_SET_LOG_FD:
+ fds[fd_num++] = *((int *) arg);
+ break;
+
+ case VHOST_SET_VRING_NUM:
+ case VHOST_SET_VRING_BASE:
+ memcpy(&msg.state, arg, sizeof(struct vhost_vring_state));
+ msg.size = sizeof(m.state);
+ break;
+
+ case VHOST_GET_VRING_BASE:
+ memcpy(&msg.state, arg, sizeof(struct vhost_vring_state));
+ msg.size = sizeof(m.state);
+ need_reply = 1;
+ break;
+
+ case VHOST_SET_VRING_ADDR:
+ memcpy(&msg.addr, arg, sizeof(struct vhost_vring_addr));
+ msg.size = sizeof(m.addr);
+ break;
+
+ case VHOST_SET_VRING_KICK:
+ case VHOST_SET_VRING_CALL:
+ case VHOST_SET_VRING_ERR:
+ file = arg;
+ msg.u64 = file->index & VHOST_USER_VRING_IDX_MASK;
+ msg.size = sizeof(m.u64);
+ if (ioeventfd_enabled() && file->fd > 0) {
+ fds[fd_num++] = file->fd;
+ } else {
+ msg.u64 |= VHOST_USER_VRING_NOFD_MASK;
+ }
+ break;
+ default:
+ error_report("vhost-user trying to send unhandled ioctl\n");
+ return -1;
+ break;
+ }
+
+ if (vhost_user_write(dev, &msg, fds, fd_num) < 0) {
+ return 0;
+ }
+
+ if (need_reply) {
+ if (vhost_user_read(dev, &msg) < 0) {
+ return 0;
+ }
+
+ if (msg_request != msg.request) {
+ error_report("Received unexpected msg type."
+ " Expected %d received %d\n", msg_request, msg.request);
+ return -1;
+ }
+
+ switch (msg_request) {
+ case VHOST_USER_GET_FEATURES:
+ if (msg.size != sizeof(m.u64)) {
+ error_report("Received bad msg size.\n");
+ return -1;
+ }
+ *((__u64 *) arg) = msg.u64;
+ break;
+ case VHOST_USER_GET_VRING_BASE:
+ if (msg.size != sizeof(m.state)) {
+ error_report("Received bad msg size.\n");
+ return -1;
+ }
+ memcpy(arg, &msg.state, sizeof(struct vhost_vring_state));
+ break;
+ default:
+ error_report("Received unexpected msg type.\n");
+ return -1;
+ break;
+ }
+ }
+
+ return 0;
+}
+
+static int vhost_user_init(struct vhost_dev *dev, void *opaque)
+{
+ assert(dev->vhost_ops->backend_type == VHOST_BACKEND_TYPE_USER);
+
+ dev->opaque = opaque;
+
+ return 0;
+}
+
+static int vhost_user_cleanup(struct vhost_dev *dev)
+{
+ assert(dev->vhost_ops->backend_type == VHOST_BACKEND_TYPE_USER);
+
+ dev->opaque = 0;
+
+ return 0;
+}
+
+const VhostOps user_ops = {
+ .backend_type = VHOST_BACKEND_TYPE_USER,
+ .vhost_call = vhost_user_call,
+ .vhost_backend_init = vhost_user_init,
+ .vhost_backend_cleanup = vhost_user_cleanup
+ };
diff --git a/hw/virtio/vhost.c b/hw/virtio/vhost.c
index f62cfaf38e..c1b1aad6cf 100644
--- a/hw/virtio/vhost.c
+++ b/hw/virtio/vhost.c
@@ -13,7 +13,6 @@
* GNU GPL, version 2 or (at your option) any later version.
*/
-#include <sys/ioctl.h>
#include "hw/virtio/vhost.h"
#include "hw/hw.h"
#include "qemu/atomic.h"
@@ -289,15 +288,13 @@ static inline void vhost_dev_log_resize(struct vhost_dev* dev, uint64_t size)
log = g_malloc0(size * sizeof *log);
log_base = (uint64_t)(unsigned long)log;
- r = ioctl(dev->control, VHOST_SET_LOG_BASE, &log_base);
+ r = dev->vhost_ops->vhost_call(dev, VHOST_SET_LOG_BASE, &log_base);
assert(r >= 0);
/* Sync only the range covered by the old log */
if (dev->log_size) {
vhost_log_sync_range(dev, 0, dev->log_size * VHOST_LOG_CHUNK - 1);
}
- if (dev->log) {
- g_free(dev->log);
- }
+ g_free(dev->log);
dev->log = log;
dev->log_size = size;
}
@@ -458,7 +455,7 @@ static void vhost_commit(MemoryListener *listener)
}
if (!dev->log_enabled) {
- r = ioctl(dev->control, VHOST_SET_MEM_TABLE, dev->mem);
+ r = dev->vhost_ops->vhost_call(dev, VHOST_SET_MEM_TABLE, dev->mem);
assert(r >= 0);
dev->memory_changed = false;
return;
@@ -471,7 +468,7 @@ static void vhost_commit(MemoryListener *listener)
if (dev->log_size < log_size) {
vhost_dev_log_resize(dev, log_size + VHOST_LOG_BUFFER);
}
- r = ioctl(dev->control, VHOST_SET_MEM_TABLE, dev->mem);
+ r = dev->vhost_ops->vhost_call(dev, VHOST_SET_MEM_TABLE, dev->mem);
assert(r >= 0);
/* To log less, can only decrease log size after table update. */
if (dev->log_size > log_size + VHOST_LOG_BUFFER) {
@@ -539,7 +536,7 @@ static int vhost_virtqueue_set_addr(struct vhost_dev *dev,
.log_guest_addr = vq->used_phys,
.flags = enable_log ? (1 << VHOST_VRING_F_LOG) : 0,
};
- int r = ioctl(dev->control, VHOST_SET_VRING_ADDR, &addr);
+ int r = dev->vhost_ops->vhost_call(dev, VHOST_SET_VRING_ADDR, &addr);
if (r < 0) {
return -errno;
}
@@ -553,7 +550,7 @@ static int vhost_dev_set_features(struct vhost_dev *dev, bool enable_log)
if (enable_log) {
features |= 0x1 << VHOST_F_LOG_ALL;
}
- r = ioctl(dev->control, VHOST_SET_FEATURES, &features);
+ r = dev->vhost_ops->vhost_call(dev, VHOST_SET_FEATURES, &features);
return r < 0 ? -errno : 0;
}
@@ -601,9 +598,7 @@ static int vhost_migration_log(MemoryListener *listener, int enable)
if (r < 0) {
return r;
}
- if (dev->log) {
- g_free(dev->log);
- }
+ g_free(dev->log);
dev->log = NULL;
dev->log_size = 0;
} else {
@@ -668,13 +663,13 @@ static int vhost_virtqueue_start(struct vhost_dev *dev,
assert(idx >= dev->vq_index && idx < dev->vq_index + dev->nvqs);
vq->num = state.num = virtio_queue_get_num(vdev, idx);
- r = ioctl(dev->control, VHOST_SET_VRING_NUM, &state);
+ r = dev->vhost_ops->vhost_call(dev, VHOST_SET_VRING_NUM, &state);
if (r) {
return -errno;
}
state.num = virtio_queue_get_last_avail_idx(vdev, idx);
- r = ioctl(dev->control, VHOST_SET_VRING_BASE, &state);
+ r = dev->vhost_ops->vhost_call(dev, VHOST_SET_VRING_BASE, &state);
if (r) {
return -errno;
}
@@ -716,7 +711,7 @@ static int vhost_virtqueue_start(struct vhost_dev *dev,
}
file.fd = event_notifier_get_fd(virtio_queue_get_host_notifier(vvq));
- r = ioctl(dev->control, VHOST_SET_VRING_KICK, &file);
+ r = dev->vhost_ops->vhost_call(dev, VHOST_SET_VRING_KICK, &file);
if (r) {
r = -errno;
goto fail_kick;
@@ -754,7 +749,7 @@ static void vhost_virtqueue_stop(struct vhost_dev *dev,
};
int r;
assert(idx >= dev->vq_index && idx < dev->vq_index + dev->nvqs);
- r = ioctl(dev->control, VHOST_GET_VRING_BASE, &state);
+ r = dev->vhost_ops->vhost_call(dev, VHOST_GET_VRING_BASE, &state);
if (r < 0) {
fprintf(stderr, "vhost VQ %d ring restore failed: %d\n", idx, r);
fflush(stderr);
@@ -796,7 +791,7 @@ static int vhost_virtqueue_init(struct vhost_dev *dev,
}
file.fd = event_notifier_get_fd(&vq->masked_notifier);
- r = ioctl(dev->control, VHOST_SET_VRING_CALL, &file);
+ r = dev->vhost_ops->vhost_call(dev, VHOST_SET_VRING_CALL, &file);
if (r) {
r = -errno;
goto fail_call;
@@ -812,25 +807,26 @@ static void vhost_virtqueue_cleanup(struct vhost_virtqueue *vq)
event_notifier_cleanup(&vq->masked_notifier);
}
-int vhost_dev_init(struct vhost_dev *hdev, int devfd, const char *devpath,
- bool force)
+int vhost_dev_init(struct vhost_dev *hdev, void *opaque,
+ VhostBackendType backend_type, bool force)
{
uint64_t features;
int i, r;
- if (devfd >= 0) {
- hdev->control = devfd;
- } else {
- hdev->control = open(devpath, O_RDWR);
- if (hdev->control < 0) {
- return -errno;
- }
+
+ if (vhost_set_backend_type(hdev, backend_type) < 0) {
+ return -1;
}
- r = ioctl(hdev->control, VHOST_SET_OWNER, NULL);
+
+ if (hdev->vhost_ops->vhost_backend_init(hdev, opaque) < 0) {
+ return -errno;
+ }
+
+ r = hdev->vhost_ops->vhost_call(hdev, VHOST_SET_OWNER, NULL);
if (r < 0) {
goto fail;
}
- r = ioctl(hdev->control, VHOST_GET_FEATURES, &features);
+ r = hdev->vhost_ops->vhost_call(hdev, VHOST_GET_FEATURES, &features);
if (r < 0) {
goto fail;
}
@@ -875,7 +871,7 @@ fail_vq:
}
fail:
r = -errno;
- close(hdev->control);
+ hdev->vhost_ops->vhost_backend_cleanup(hdev);
return r;
}
@@ -888,7 +884,7 @@ void vhost_dev_cleanup(struct vhost_dev *hdev)
memory_listener_unregister(&hdev->memory_listener);
g_free(hdev->mem);
g_free(hdev->mem_sections);
- close(hdev->control);
+ hdev->vhost_ops->vhost_backend_cleanup(hdev);
}
bool vhost_dev_query(struct vhost_dev *hdev, VirtIODevice *vdev)
@@ -990,10 +986,37 @@ void vhost_virtqueue_mask(struct vhost_dev *hdev, VirtIODevice *vdev, int n,
} else {
file.fd = event_notifier_get_fd(virtio_queue_get_guest_notifier(vvq));
}
- r = ioctl(hdev->control, VHOST_SET_VRING_CALL, &file);
+ r = hdev->vhost_ops->vhost_call(hdev, VHOST_SET_VRING_CALL, &file);
assert(r >= 0);
}
+unsigned vhost_get_features(struct vhost_dev *hdev, const int *feature_bits,
+ unsigned features)
+{
+ const int *bit = feature_bits;
+ while (*bit != VHOST_INVALID_FEATURE_BIT) {
+ unsigned bit_mask = (1 << *bit);
+ if (!(hdev->features & bit_mask)) {
+ features &= ~bit_mask;
+ }
+ bit++;
+ }
+ return features;
+}
+
+void vhost_ack_features(struct vhost_dev *hdev, const int *feature_bits,
+ unsigned features)
+{
+ const int *bit = feature_bits;
+ while (*bit != VHOST_INVALID_FEATURE_BIT) {
+ unsigned bit_mask = (1 << *bit);
+ if (features & bit_mask) {
+ hdev->acked_features |= bit_mask;
+ }
+ bit++;
+ }
+}
+
/* Host notifiers must be enabled at this point. */
int vhost_dev_start(struct vhost_dev *hdev, VirtIODevice *vdev)
{
@@ -1005,7 +1028,7 @@ int vhost_dev_start(struct vhost_dev *hdev, VirtIODevice *vdev)
if (r < 0) {
goto fail_features;
}
- r = ioctl(hdev->control, VHOST_SET_MEM_TABLE, hdev->mem);
+ r = hdev->vhost_ops->vhost_call(hdev, VHOST_SET_MEM_TABLE, hdev->mem);
if (r < 0) {
r = -errno;
goto fail_mem;
@@ -1024,8 +1047,7 @@ int vhost_dev_start(struct vhost_dev *hdev, VirtIODevice *vdev)
hdev->log_size = vhost_get_log_size(hdev);
hdev->log = hdev->log_size ?
g_malloc0(hdev->log_size * sizeof *hdev->log) : NULL;
- r = ioctl(hdev->control, VHOST_SET_LOG_BASE,
- (uint64_t)(unsigned long)hdev->log);
+ r = hdev->vhost_ops->vhost_call(hdev, VHOST_SET_LOG_BASE, hdev->log);
if (r < 0) {
r = -errno;
goto fail_log;
diff --git a/hw/virtio/virtio.c b/hw/virtio/virtio.c
index a07ae8ad91..a3082d569d 100644
--- a/hw/virtio/virtio.c
+++ b/hw/virtio/virtio.c
@@ -1164,14 +1164,8 @@ EventNotifier *virtio_queue_get_host_notifier(VirtQueue *vq)
void virtio_device_set_child_bus_name(VirtIODevice *vdev, char *bus_name)
{
- if (vdev->bus_name) {
- g_free(vdev->bus_name);
- vdev->bus_name = NULL;
- }
-
- if (bus_name) {
- vdev->bus_name = g_strdup(bus_name);
- }
+ g_free(vdev->bus_name);
+ vdev->bus_name = g_strdup(bus_name);
}
static void virtio_device_realize(DeviceState *dev, Error **errp)
@@ -1206,10 +1200,8 @@ static void virtio_device_unrealize(DeviceState *dev, Error **errp)
}
}
- if (vdev->bus_name) {
- g_free(vdev->bus_name);
- vdev->bus_name = NULL;
- }
+ g_free(vdev->bus_name);
+ vdev->bus_name = NULL;
}
static void virtio_device_class_init(ObjectClass *klass, void *data)
diff --git a/include/exec/cpu-all.h b/include/exec/cpu-all.h
index e8363d7248..f91581fc65 100644
--- a/include/exec/cpu-all.h
+++ b/include/exec/cpu-all.h
@@ -297,11 +297,6 @@ CPUArchState *cpu_copy(CPUArchState *env);
/* memory API */
-extern ram_addr_t ram_size;
-
-/* RAM is pre-allocated and passed into qemu_ram_alloc_from_ptr */
-#define RAM_PREALLOC_MASK (1 << 0)
-
typedef struct RAMBlock {
struct MemoryRegion *mr;
uint8_t *host;
@@ -327,9 +322,6 @@ typedef struct RAMList {
} RAMList;
extern RAMList ram_list;
-extern const char *mem_path;
-extern int mem_prealloc;
-
/* Flags stored in the low bits of the TLB virtual address. These are
defined so that fast path ram access is all zeros. */
/* Zero if TLB entry is valid. */
diff --git a/include/exec/cpu-common.h b/include/exec/cpu-common.h
index 89ec6404cf..e3ec4c8e0c 100644
--- a/include/exec/cpu-common.h
+++ b/include/exec/cpu-common.h
@@ -45,6 +45,8 @@ typedef uintptr_t ram_addr_t;
# define RAM_ADDR_FMT "%" PRIxPTR
#endif
+extern ram_addr_t ram_size;
+
/* memory API */
typedef void CPUWriteMemoryFunc(void *opaque, hwaddr addr, uint32_t value);
diff --git a/include/exec/memory.h b/include/exec/memory.h
index 549ae734e6..3d778d70f0 100644
--- a/include/exec/memory.h
+++ b/include/exec/memory.h
@@ -31,6 +31,7 @@
#include "qemu/queue.h"
#include "qemu/int128.h"
#include "qemu/notify.h"
+#include "qapi/error.h"
#define MAX_PHYS_ADDR_SPACE_BITS 62
#define MAX_PHYS_ADDR (((hwaddr)1 << MAX_PHYS_ADDR_SPACE_BITS) - 1)
@@ -311,6 +312,28 @@ void memory_region_init_ram(MemoryRegion *mr,
const char *name,
uint64_t size);
+#ifdef __linux__
+/**
+ * memory_region_init_ram_from_file: Initialize RAM memory region with a
+ * mmap-ed backend.
+ *
+ * @mr: the #MemoryRegion to be initialized.
+ * @owner: the object that tracks the region's reference count
+ * @name: the name of the region.
+ * @size: size of the region.
+ * @share: %true if memory must be mmaped with the MAP_SHARED flag
+ * @path: the path in which to allocate the RAM.
+ * @errp: pointer to Error*, to store an error if it happens.
+ */
+void memory_region_init_ram_from_file(MemoryRegion *mr,
+ struct Object *owner,
+ const char *name,
+ uint64_t size,
+ bool share,
+ const char *path,
+ Error **errp);
+#endif
+
/**
* memory_region_init_ram_ptr: Initialize RAM memory region from a
* user-provided pointer. Accesses into the
@@ -513,6 +536,16 @@ bool memory_region_is_logging(MemoryRegion *mr);
bool memory_region_is_rom(MemoryRegion *mr);
/**
+ * memory_region_get_fd: Get a file descriptor backing a RAM memory region.
+ *
+ * Returns a file descriptor backing a file-based RAM memory region,
+ * or -1 if the region is not a file-based RAM memory region.
+ *
+ * @mr: the RAM or alias memory region being queried.
+ */
+int memory_region_get_fd(MemoryRegion *mr);
+
+/**
* memory_region_get_ram_ptr: Get a pointer into a RAM memory region.
*
* Returns a host pointer to a RAM memory region (created with
@@ -848,6 +881,14 @@ void memory_region_set_alias_offset(MemoryRegion *mr,
bool memory_region_present(MemoryRegion *container, hwaddr addr);
/**
+ * memory_region_is_mapped: returns true if #MemoryRegion is mapped
+ * into any address space.
+ *
+ * @mr: a #MemoryRegion which should be checked if it's mapped
+ */
+bool memory_region_is_mapped(MemoryRegion *mr);
+
+/**
* memory_region_find: translate an address/size relative to a
* MemoryRegion into a #MemoryRegionSection.
*
diff --git a/include/exec/ram_addr.h b/include/exec/ram_addr.h
index b94de02ea7..55ca67681f 100644
--- a/include/exec/ram_addr.h
+++ b/include/exec/ram_addr.h
@@ -22,9 +22,13 @@
#ifndef CONFIG_USER_ONLY
#include "hw/xen/xen.h"
+ram_addr_t qemu_ram_alloc_from_file(ram_addr_t size, MemoryRegion *mr,
+ bool share, const char *mem_path,
+ Error **errp);
ram_addr_t qemu_ram_alloc_from_ptr(ram_addr_t size, void *host,
MemoryRegion *mr);
ram_addr_t qemu_ram_alloc(ram_addr_t size, MemoryRegion *mr);
+int qemu_get_ram_fd(ram_addr_t addr);
void *qemu_get_ram_ptr(ram_addr_t addr);
void qemu_ram_free(ram_addr_t addr);
void qemu_ram_free_from_ptr(ram_addr_t addr);
diff --git a/include/hw/acpi/acpi.h b/include/hw/acpi/acpi.h
index a9fae9d5c5..1f678b4bf2 100644
--- a/include/hw/acpi/acpi.h
+++ b/include/hw/acpi/acpi.h
@@ -26,6 +26,11 @@
#include "exec/memory.h"
#include "hw/irq.h"
+/*
+ * current device naming scheme supports up to 256 memory devices
+ */
+#define ACPI_MAX_RAM_SLOTS 256
+
/* from linux include/acpi/actype.h */
/* Default ACPI register widths */
diff --git a/include/hw/acpi/acpi_dev_interface.h b/include/hw/acpi/acpi_dev_interface.h
new file mode 100644
index 0000000000..f245f8d236
--- /dev/null
+++ b/include/hw/acpi/acpi_dev_interface.h
@@ -0,0 +1,43 @@
+#ifndef ACPI_DEV_INTERFACE_H
+#define ACPI_DEV_INTERFACE_H
+
+#include "qom/object.h"
+#include "qapi-types.h"
+
+#define TYPE_ACPI_DEVICE_IF "acpi-device-interface"
+
+#define ACPI_DEVICE_IF_CLASS(klass) \
+ OBJECT_CLASS_CHECK(AcpiDeviceIfClass, (klass), \
+ TYPE_ACPI_DEVICE_IF)
+#define ACPI_DEVICE_IF_GET_CLASS(obj) \
+ OBJECT_GET_CLASS(AcpiDeviceIfClass, (obj), \
+ TYPE_ACPI_DEVICE_IF)
+#define ACPI_DEVICE_IF(obj) \
+ INTERFACE_CHECK(AcpiDeviceIf, (obj), \
+ TYPE_ACPI_DEVICE_IF)
+
+
+typedef struct AcpiDeviceIf {
+ /* <private> */
+ Object Parent;
+} AcpiDeviceIf;
+
+/**
+ * AcpiDeviceIfClass:
+ *
+ * ospm_status: returns status of ACPI device objects, reported
+ * via _OST method if device supports it.
+ *
+ * Interface is designed for providing unified interface
+ * to generic ACPI functionality that could be used without
+ * knowledge about internals of actual device that implements
+ * ACPI interface.
+ */
+typedef struct AcpiDeviceIfClass {
+ /* <private> */
+ InterfaceClass parent_class;
+
+ /* <public> */
+ void (*ospm_status)(AcpiDeviceIf *adev, ACPIOSTInfoList ***list);
+} AcpiDeviceIfClass;
+#endif
diff --git a/include/hw/acpi/cpu_hotplug.h b/include/hw/acpi/cpu_hotplug.h
index 4576400fd7..9e5d30c9df 100644
--- a/include/hw/acpi/cpu_hotplug.h
+++ b/include/hw/acpi/cpu_hotplug.h
@@ -13,7 +13,7 @@
#define ACPI_HOTPLUG_H
#include "hw/acpi/acpi.h"
-#include "hw/acpi/cpu_hotplug_defs.h"
+#include "hw/acpi/pc-hotplug.h"
typedef struct AcpiCpuHotplug {
MemoryRegion io;
diff --git a/include/hw/acpi/cpu_hotplug_defs.h b/include/hw/acpi/cpu_hotplug_defs.h
deleted file mode 100644
index 9f33663511..0000000000
--- a/include/hw/acpi/cpu_hotplug_defs.h
+++ /dev/null
@@ -1,32 +0,0 @@
-/*
- * QEMU ACPI hotplug utilities shared defines
- *
- * Copyright (C) 2013 Red Hat Inc
- *
- * Authors:
- * Igor Mammedov <imammedo@redhat.com>
- *
- * This work is licensed under the terms of the GNU GPL, version 2 or later.
- * See the COPYING file in the top-level directory.
- */
-#ifndef ACPI_HOTPLUG_DEFS_H
-#define ACPI_HOTPLUG_DEFS_H
-
-/*
- * ONLY DEFINEs are permited in this file since it's shared
- * between C and ASL code.
- */
-#define ACPI_CPU_HOTPLUG_STATUS 4
-
-/* Limit for CPU arch IDs for CPU hotplug. All hotpluggable CPUs should
- * have CPUClass.get_arch_id() < ACPI_CPU_HOTPLUG_ID_LIMIT.
- */
-#define ACPI_CPU_HOTPLUG_ID_LIMIT 256
-
-/* 256 CPU IDs, 8 bits per entry: */
-#define ACPI_GPE_PROC_LEN 32
-
-#define ICH9_CPU_HOTPLUG_IO_BASE 0x0CD8
-#define PIIX4_CPU_HOTPLUG_IO_BASE 0xaf00
-
-#endif
diff --git a/include/hw/acpi/ich9.h b/include/hw/acpi/ich9.h
index 104f419852..7e42448ef9 100644
--- a/include/hw/acpi/ich9.h
+++ b/include/hw/acpi/ich9.h
@@ -23,6 +23,8 @@
#include "hw/acpi/acpi.h"
#include "hw/acpi/cpu_hotplug.h"
+#include "hw/acpi/memory_hotplug.h"
+#include "hw/acpi/acpi_dev_interface.h"
typedef struct ICH9LPCPMRegs {
/*
@@ -46,6 +48,8 @@ typedef struct ICH9LPCPMRegs {
AcpiCpuHotplug gpe_cpu;
Notifier cpu_added_notifier;
+
+ MemHotplugState acpi_memory_hotplug;
} ICH9LPCPMRegs;
void ich9_pm_init(PCIDevice *lpc_pci, ICH9LPCPMRegs *pm,
@@ -55,4 +59,7 @@ extern const VMStateDescription vmstate_ich9_pm;
void ich9_pm_add_properties(Object *obj, ICH9LPCPMRegs *pm, Error **errp);
+void ich9_pm_device_plug_cb(ICH9LPCPMRegs *pm, DeviceState *dev, Error **errp);
+
+void ich9_pm_ospm_status(AcpiDeviceIf *adev, ACPIOSTInfoList ***list);
#endif /* HW_ACPI_ICH9_H */
diff --git a/include/hw/acpi/memory_hotplug.h b/include/hw/acpi/memory_hotplug.h
new file mode 100644
index 0000000000..7bbf8a0064
--- /dev/null
+++ b/include/hw/acpi/memory_hotplug.h
@@ -0,0 +1,38 @@
+#ifndef QEMU_HW_ACPI_MEMORY_HOTPLUG_H
+#define QEMU_HW_ACPI_MEMORY_HOTPLUG_H
+
+#include "hw/qdev-core.h"
+#include "hw/acpi/acpi.h"
+#include "migration/vmstate.h"
+
+#define ACPI_MEMORY_HOTPLUG_STATUS 8
+
+typedef struct MemStatus {
+ DeviceState *dimm;
+ bool is_enabled;
+ bool is_inserting;
+ uint32_t ost_event;
+ uint32_t ost_status;
+} MemStatus;
+
+typedef struct MemHotplugState {
+ bool is_enabled; /* true if memory hotplug is supported */
+ MemoryRegion io;
+ uint32_t selector;
+ uint32_t dev_count;
+ MemStatus *devs;
+} MemHotplugState;
+
+void acpi_memory_hotplug_init(MemoryRegion *as, Object *owner,
+ MemHotplugState *state);
+
+void acpi_memory_plug_cb(ACPIREGS *ar, qemu_irq irq, MemHotplugState *mem_st,
+ DeviceState *dev, Error **errp);
+
+extern const VMStateDescription vmstate_memory_hotplug;
+#define VMSTATE_MEMORY_HOTPLUG(memhp, state) \
+ VMSTATE_STRUCT(memhp, state, 1, \
+ vmstate_memory_hotplug, MemHotplugState)
+
+void acpi_memory_ospm_status(MemHotplugState *mem_st, ACPIOSTInfoList ***list);
+#endif
diff --git a/include/hw/acpi/pc-hotplug.h b/include/hw/acpi/pc-hotplug.h
new file mode 100644
index 0000000000..bf5157d7c3
--- /dev/null
+++ b/include/hw/acpi/pc-hotplug.h
@@ -0,0 +1,56 @@
+/*
+ * QEMU ACPI hotplug utilities shared defines
+ *
+ * Copyright (C) 2014 Red Hat Inc
+ *
+ * Authors:
+ * Igor Mammedov <imammedo@redhat.com>
+ *
+ * This work is licensed under the terms of the GNU GPL, version 2 or later.
+ * See the COPYING file in the top-level directory.
+ */
+#ifndef PC_HOTPLUG_H
+#define PC_HOTPLUG_H
+
+/*
+ * ONLY DEFINEs are permited in this file since it's shared
+ * between C and ASL code.
+ */
+#define ACPI_CPU_HOTPLUG_STATUS 4
+
+/* Limit for CPU arch IDs for CPU hotplug. All hotpluggable CPUs should
+ * have CPUClass.get_arch_id() < ACPI_CPU_HOTPLUG_ID_LIMIT.
+ */
+#define ACPI_CPU_HOTPLUG_ID_LIMIT 256
+
+/* 256 CPU IDs, 8 bits per entry: */
+#define ACPI_GPE_PROC_LEN 32
+
+#define ICH9_CPU_HOTPLUG_IO_BASE 0x0CD8
+#define PIIX4_CPU_HOTPLUG_IO_BASE 0xaf00
+
+#define ACPI_MEMORY_HOTPLUG_IO_LEN 24
+#define ACPI_MEMORY_HOTPLUG_BASE 0x0a00
+
+#define MEMORY_HOPTLUG_DEVICE MHPD
+#define MEMORY_SLOTS_NUMBER MDNR
+#define MEMORY_HOTPLUG_IO_REGION HPMR
+#define MEMORY_SLOT_ADDR_LOW MRBL
+#define MEMORY_SLOT_ADDR_HIGH MRBH
+#define MEMORY_SLOT_SIZE_LOW MRLL
+#define MEMORY_SLOT_SIZE_HIGH MRLH
+#define MEMORY_SLOT_PROXIMITY MPX
+#define MEMORY_SLOT_ENABLED MES
+#define MEMORY_SLOT_INSERT_EVENT MINS
+#define MEMORY_SLOT_SLECTOR MSEL
+#define MEMORY_SLOT_OST_EVENT MOEV
+#define MEMORY_SLOT_OST_STATUS MOSC
+#define MEMORY_SLOT_LOCK MLCK
+#define MEMORY_SLOT_STATUS_METHOD MRST
+#define MEMORY_SLOT_CRS_METHOD MCRS
+#define MEMORY_SLOT_OST_METHOD MOST
+#define MEMORY_SLOT_PROXIMITY_METHOD MPXM
+#define MEMORY_SLOT_NOTIFY_METHOD MTFY
+#define MEMORY_SLOT_SCAN_METHOD MSCN
+
+#endif
diff --git a/include/hw/boards.h b/include/hw/boards.h
index 2d2e2bef19..605a970934 100644
--- a/include/hw/boards.h
+++ b/include/hw/boards.h
@@ -43,9 +43,13 @@ struct QEMUMachine {
const char *hw_version;
};
-#define TYPE_MACHINE_SUFFIX "-machine"
+void memory_region_allocate_system_memory(MemoryRegion *mr, Object *owner,
+ const char *name,
+ uint64_t ram_size);
+
int qemu_register_machine(QEMUMachine *m);
+#define TYPE_MACHINE_SUFFIX "-machine"
#define TYPE_MACHINE "machine"
#undef MACHINE /* BSD defines it and QEMU does not use it */
#define MACHINE(obj) \
@@ -61,6 +65,11 @@ extern MachineState *current_machine;
/**
* MachineClass:
* @qemu_machine: #QEMUMachine
+ * @get_hotplug_handler: this function is called during bus-less
+ * device hotplug. If defined it returns pointer to an instance
+ * of HotplugHandler object, which handles hotplug operation
+ * for a given @dev. It may return NULL if @dev doesn't require
+ * any actions to be performed by hotplug handler.
*/
struct MachineClass {
/*< private >*/
@@ -90,6 +99,9 @@ struct MachineClass {
const char *default_boot_order;
GlobalProperty *compat_props;
const char *hw_version;
+
+ HotplugHandler *(*get_hotplug_handler)(MachineState *machine,
+ DeviceState *dev);
};
/**
@@ -113,6 +125,8 @@ struct MachineState {
char *firmware;
ram_addr_t ram_size;
+ ram_addr_t maxram_size;
+ uint64_t ram_slots;
const char *boot_order;
char *kernel_filename;
char *kernel_cmdline;
diff --git a/include/hw/i386/ich9.h b/include/hw/i386/ich9.h
index e19143555e..59ea25b49a 100644
--- a/include/hw/i386/ich9.h
+++ b/include/hw/i386/ich9.h
@@ -24,7 +24,7 @@ I2CBus *ich9_smb_init(PCIBus *bus, int devfn, uint32_t smb_io_base);
#define ICH9_CC_SIZE (16 * 1024) /* 16KB */
-#define TYPE_ICH9_LPC_DEVICE "ICH9 LPC"
+#define TYPE_ICH9_LPC_DEVICE "ICH9-LPC"
#define ICH9_LPC_DEVICE(obj) \
OBJECT_CHECK(ICH9LPCState, (obj), TYPE_ICH9_LPC_DEVICE)
diff --git a/include/hw/i386/pc.h b/include/hw/i386/pc.h
index ca7a0bdd1a..19f78ea336 100644
--- a/include/hw/i386/pc.h
+++ b/include/hw/i386/pc.h
@@ -3,6 +3,7 @@
#include "qemu-common.h"
#include "exec/memory.h"
+#include "hw/boards.h"
#include "hw/isa/isa.h"
#include "hw/block/fdc.h"
#include "net/net.h"
@@ -12,9 +13,57 @@
#include "qemu/bitmap.h"
#include "sysemu/sysemu.h"
#include "hw/pci/pci.h"
+#include "hw/boards.h"
#define HPET_INTCAP "hpet-intcap"
+/**
+ * PCMachineState:
+ * @hotplug_memory_base: address in guest RAM address space where hotplug memory
+ * address space begins.
+ * @hotplug_memory: hotplug memory addess space container
+ * @acpi_dev: link to ACPI PM device that performs ACPI hotplug handling
+ */
+struct PCMachineState {
+ /*< private >*/
+ MachineState parent_obj;
+
+ /* <public> */
+ ram_addr_t hotplug_memory_base;
+ MemoryRegion hotplug_memory;
+
+ HotplugHandler *acpi_dev;
+};
+
+#define PC_MACHINE_ACPI_DEVICE_PROP "acpi-device"
+#define PC_MACHINE_MEMHP_REGION_SIZE "hotplug-memory-region-size"
+
+/**
+ * PCMachineClass:
+ * @get_hotplug_handler: pointer to parent class callback @get_hotplug_handler
+ */
+struct PCMachineClass {
+ /*< private >*/
+ MachineClass parent_class;
+
+ /*< public >*/
+ HotplugHandler *(*get_hotplug_handler)(MachineState *machine,
+ DeviceState *dev);
+};
+
+typedef struct PCMachineState PCMachineState;
+typedef struct PCMachineClass PCMachineClass;
+
+#define TYPE_PC_MACHINE "generic-pc-machine"
+#define PC_MACHINE(obj) \
+ OBJECT_CHECK(PCMachineState, (obj), TYPE_PC_MACHINE)
+#define PC_MACHINE_GET_CLASS(obj) \
+ OBJECT_GET_CLASS(PCMachineClass, (obj), TYPE_PC_MACHINE)
+#define PC_MACHINE_CLASS(klass) \
+ OBJECT_CLASS_CHECK(PCMachineClass, (klass), TYPE_PC_MACHINE)
+
+void qemu_register_pc_machine(QEMUMachine *m);
+
/* PC-style peripherals (also used by other machines). */
typedef struct PcPciInfo {
@@ -43,6 +92,7 @@ struct PcGuestInfo {
uint64_t *node_cpu;
FWCfgState *fw_cfg;
bool has_acpi_build;
+ bool has_reserved_memory;
};
/* parallel.c */
@@ -134,10 +184,8 @@ PcGuestInfo *pc_guest_info_init(ram_addr_t below_4g_mem_size,
void pc_pci_as_mapping_init(Object *owner, MemoryRegion *system_memory,
MemoryRegion *pci_address_space);
-FWCfgState *pc_memory_init(MemoryRegion *system_memory,
- const char *kernel_filename,
- const char *kernel_cmdline,
- const char *initrd_filename,
+FWCfgState *pc_memory_init(MachineState *machine,
+ MemoryRegion *system_memory,
ram_addr_t below_4g_mem_size,
ram_addr_t above_4g_mem_size,
MemoryRegion *rom_memory,
@@ -167,7 +215,8 @@ void ioapic_init_gsi(GSIState *gsi_state, const char *parent_name);
I2CBus *piix4_pm_init(PCIBus *bus, int devfn, uint32_t smb_io_base,
qemu_irq sci_irq, qemu_irq smi_irq,
- int kvm_enabled, FWCfgState *fw_cfg);
+ int kvm_enabled, FWCfgState *fw_cfg,
+ DeviceState **piix4_pm);
void piix4_smbus_register_device(SMBusDevice *dev, uint8_t addr);
/* hpet.c */
@@ -243,7 +292,12 @@ int e820_get_num_entries(void);
bool e820_get_entry(int, uint32_t, uint64_t *, uint64_t *);
#define PC_Q35_COMPAT_2_0 \
- PC_COMPAT_2_0
+ PC_COMPAT_2_0, \
+ {\
+ .driver = "ICH9-LPC",\
+ .property = "memory-hotplug-support",\
+ .value = "off",\
+ }
#define PC_Q35_COMPAT_1_7 \
PC_COMPAT_1_7, \
@@ -272,10 +326,16 @@ bool e820_get_entry(int, uint32_t, uint64_t *, uint64_t *);
.property = "any_layout",\
.value = "off",\
},{\
+ .driver = "PIIX4_PM",\
+ .property = "memory-hotplug-support",\
+ .value = "off",\
+ },\
+ {\
.driver = "apic",\
.property = "version",\
.value = stringify(0x11),\
- },{\
+ },\
+ {\
.driver = "nec-usb-xhci",\
.property = "superspeed-ports-first",\
.value = "off",\
@@ -294,6 +354,11 @@ bool e820_get_entry(int, uint32_t, uint64_t *, uint64_t *);
.driver = "pci-serial-4x",\
.property = "prog_if",\
.value = stringify(0),\
+ },\
+ {\
+ .driver = "virtio-net-pci",\
+ .property = "guest_announce",\
+ .value = "off",\
}
#define PC_COMPAT_1_7 \
diff --git a/include/hw/mem/pc-dimm.h b/include/hw/mem/pc-dimm.h
new file mode 100644
index 0000000000..761eeef801
--- /dev/null
+++ b/include/hw/mem/pc-dimm.h
@@ -0,0 +1,81 @@
+/*
+ * PC DIMM device
+ *
+ * Copyright ProfitBricks GmbH 2012
+ * Copyright (C) 2013-2014 Red Hat Inc
+ *
+ * Authors:
+ * Vasilis Liaskovitis <vasilis.liaskovitis@profitbricks.com>
+ * Igor Mammedov <imammedo@redhat.com>
+ *
+ * This work is licensed under the terms of the GNU GPL, version 2 or later.
+ * See the COPYING file in the top-level directory.
+ *
+ */
+
+#ifndef QEMU_PC_DIMM_H
+#define QEMU_PC_DIMM_H
+
+#include "exec/memory.h"
+#include "sysemu/hostmem.h"
+#include "hw/qdev.h"
+
+#define DEFAULT_PC_DIMMSIZE (1024*1024*1024)
+
+#define TYPE_PC_DIMM "pc-dimm"
+#define PC_DIMM(obj) \
+ OBJECT_CHECK(PCDIMMDevice, (obj), TYPE_PC_DIMM)
+#define PC_DIMM_CLASS(oc) \
+ OBJECT_CLASS_CHECK(PCDIMMDeviceClass, (oc), TYPE_PC_DIMM)
+#define PC_DIMM_GET_CLASS(obj) \
+ OBJECT_GET_CLASS(PCDIMMDeviceClass, (obj), TYPE_PC_DIMM)
+
+#define PC_DIMM_ADDR_PROP "addr"
+#define PC_DIMM_SLOT_PROP "slot"
+#define PC_DIMM_NODE_PROP "node"
+#define PC_DIMM_SIZE_PROP "size"
+#define PC_DIMM_MEMDEV_PROP "memdev"
+
+#define PC_DIMM_UNASSIGNED_SLOT -1
+
+/**
+ * PCDIMMDevice:
+ * @addr: starting guest physical address, where @PCDIMMDevice is mapped.
+ * Default value: 0, means that address is auto-allocated.
+ * @node: numa node to which @PCDIMMDevice is attached.
+ * @slot: slot number into which @PCDIMMDevice is plugged in.
+ * Default value: -1, means that slot is auto-allocated.
+ * @hostmem: host memory backend providing memory for @PCDIMMDevice
+ */
+typedef struct PCDIMMDevice {
+ /* private */
+ DeviceState parent_obj;
+
+ /* public */
+ uint64_t addr;
+ uint32_t node;
+ int32_t slot;
+ HostMemoryBackend *hostmem;
+} PCDIMMDevice;
+
+/**
+ * PCDIMMDeviceClass:
+ * @get_memory_region: returns #MemoryRegion associated with @dimm
+ */
+typedef struct PCDIMMDeviceClass {
+ /* private */
+ DeviceClass parent_class;
+
+ /* public */
+ MemoryRegion *(*get_memory_region)(PCDIMMDevice *dimm);
+} PCDIMMDeviceClass;
+
+uint64_t pc_dimm_get_free_addr(uint64_t address_space_start,
+ uint64_t address_space_size,
+ uint64_t *hint, uint64_t size,
+ Error **errp);
+
+int pc_dimm_get_free_slot(const int *hint, int max_slots, Error **errp);
+
+int qmp_pc_dimm_device_list(Object *obj, void *opaque);
+#endif
diff --git a/include/hw/virtio/vhost-backend.h b/include/hw/virtio/vhost-backend.h
new file mode 100644
index 0000000000..d31768a1d4
--- /dev/null
+++ b/include/hw/virtio/vhost-backend.h
@@ -0,0 +1,38 @@
+/*
+ * vhost-backend
+ *
+ * Copyright (c) 2013 Virtual Open Systems Sarl.
+ *
+ * This work is licensed under the terms of the GNU GPL, version 2 or later.
+ * See the COPYING file in the top-level directory.
+ *
+ */
+
+#ifndef VHOST_BACKEND_H_
+#define VHOST_BACKEND_H_
+
+typedef enum VhostBackendType {
+ VHOST_BACKEND_TYPE_NONE = 0,
+ VHOST_BACKEND_TYPE_KERNEL = 1,
+ VHOST_BACKEND_TYPE_USER = 2,
+ VHOST_BACKEND_TYPE_MAX = 3,
+} VhostBackendType;
+
+struct vhost_dev;
+
+typedef int (*vhost_call)(struct vhost_dev *dev, unsigned long int request,
+ void *arg);
+typedef int (*vhost_backend_init)(struct vhost_dev *dev, void *opaque);
+typedef int (*vhost_backend_cleanup)(struct vhost_dev *dev);
+
+typedef struct VhostOps {
+ VhostBackendType backend_type;
+ vhost_call vhost_call;
+ vhost_backend_init vhost_backend_init;
+ vhost_backend_cleanup vhost_backend_cleanup;
+} VhostOps;
+
+int vhost_set_backend_type(struct vhost_dev *dev,
+ VhostBackendType backend_type);
+
+#endif /* VHOST_BACKEND_H_ */
diff --git a/include/hw/virtio/vhost.h b/include/hw/virtio/vhost.h
index de24746c7e..33028ec8c2 100644
--- a/include/hw/virtio/vhost.h
+++ b/include/hw/virtio/vhost.h
@@ -2,6 +2,7 @@
#define VHOST_H
#include "hw/hw.h"
+#include "hw/virtio/vhost-backend.h"
#include "hw/virtio/virtio.h"
#include "exec/memory.h"
@@ -25,11 +26,11 @@ typedef unsigned long vhost_log_chunk_t;
#define VHOST_LOG_PAGE 0x1000
#define VHOST_LOG_BITS (8 * sizeof(vhost_log_chunk_t))
#define VHOST_LOG_CHUNK (VHOST_LOG_PAGE * VHOST_LOG_BITS)
+#define VHOST_INVALID_FEATURE_BIT (0xff)
struct vhost_memory;
struct vhost_dev {
MemoryListener memory_listener;
- int control;
struct vhost_memory *mem;
int n_mem_sections;
MemoryRegionSection *mem_sections;
@@ -48,10 +49,12 @@ struct vhost_dev {
bool memory_changed;
hwaddr mem_changed_start_addr;
hwaddr mem_changed_end_addr;
+ const VhostOps *vhost_ops;
+ void *opaque;
};
-int vhost_dev_init(struct vhost_dev *hdev, int devfd, const char *devpath,
- bool force);
+int vhost_dev_init(struct vhost_dev *hdev, void *opaque,
+ VhostBackendType backend_type, bool force);
void vhost_dev_cleanup(struct vhost_dev *hdev);
bool vhost_dev_query(struct vhost_dev *hdev, VirtIODevice *vdev);
int vhost_dev_start(struct vhost_dev *hdev, VirtIODevice *vdev);
@@ -68,4 +71,8 @@ bool vhost_virtqueue_pending(struct vhost_dev *hdev, int n);
*/
void vhost_virtqueue_mask(struct vhost_dev *hdev, VirtIODevice *vdev, int n,
bool mask);
+unsigned vhost_get_features(struct vhost_dev *hdev, const int *feature_bits,
+ unsigned features);
+void vhost_ack_features(struct vhost_dev *hdev, const int *feature_bits,
+ unsigned features);
#endif
diff --git a/include/hw/virtio/virtio-net.h b/include/hw/virtio/virtio-net.h
index 4b32440837..f7fccc08a4 100644
--- a/include/hw/virtio/virtio-net.h
+++ b/include/hw/virtio/virtio-net.h
@@ -49,12 +49,14 @@
#define VIRTIO_NET_F_CTRL_RX 18 /* Control channel RX mode support */
#define VIRTIO_NET_F_CTRL_VLAN 19 /* Control channel VLAN filtering */
#define VIRTIO_NET_F_CTRL_RX_EXTRA 20 /* Extra RX mode control support */
+#define VIRTIO_NET_F_GUEST_ANNOUNCE 21 /* Guest can announce itself */
#define VIRTIO_NET_F_MQ 22 /* Device supports Receive Flow
* Steering */
#define VIRTIO_NET_F_CTRL_MAC_ADDR 23 /* Set MAC address */
#define VIRTIO_NET_S_LINK_UP 1 /* Link is up */
+#define VIRTIO_NET_S_ANNOUNCE 2 /* Announcement is needed */
#define TX_TIMER_INTERVAL 150000 /* 150 us */
@@ -193,6 +195,8 @@ typedef struct VirtIONet {
char *netclient_name;
char *netclient_type;
uint64_t curr_guest_offloads;
+ QEMUTimer *announce_timer;
+ int announce_counter;
} VirtIONet;
#define VIRTIO_NET_CTRL_MAC 1
@@ -213,6 +217,18 @@ typedef struct VirtIONet {
#define VIRTIO_NET_CTRL_VLAN_DEL 1
/*
+ * Control link announce acknowledgement
+ *
+ * VIRTIO_NET_S_ANNOUNCE bit in the status field requests link announcement from
+ * guest driver. The driver is notified by config space change interrupt. The
+ * command VIRTIO_NET_CTRL_ANNOUNCE_ACK is used to indicate that the driver has
+ * received the notification. It makes the device clear the bit
+ * VIRTIO_NET_S_ANNOUNCE in the status field.
+ */
+#define VIRTIO_NET_CTRL_ANNOUNCE 3
+ #define VIRTIO_NET_CTRL_ANNOUNCE_ACK 0
+
+/*
* Control Multiqueue
*
* The command VIRTIO_NET_CTRL_MQ_VQ_PAIRS_SET
@@ -251,6 +267,7 @@ struct virtio_net_ctrl_mq {
DEFINE_PROP_BIT("guest_tso6", _state, _field, VIRTIO_NET_F_GUEST_TSO6, true), \
DEFINE_PROP_BIT("guest_ecn", _state, _field, VIRTIO_NET_F_GUEST_ECN, true), \
DEFINE_PROP_BIT("guest_ufo", _state, _field, VIRTIO_NET_F_GUEST_UFO, true), \
+ DEFINE_PROP_BIT("guest_announce", _state, _field, VIRTIO_NET_F_GUEST_ANNOUNCE, true), \
DEFINE_PROP_BIT("host_tso4", _state, _field, VIRTIO_NET_F_HOST_TSO4, true), \
DEFINE_PROP_BIT("host_tso6", _state, _field, VIRTIO_NET_F_HOST_TSO6, true), \
DEFINE_PROP_BIT("host_ecn", _state, _field, VIRTIO_NET_F_HOST_ECN, true), \
diff --git a/include/migration/vmstate.h b/include/migration/vmstate.h
index 7e45048355..799d2d0f03 100644
--- a/include/migration/vmstate.h
+++ b/include/migration/vmstate.h
@@ -752,6 +752,8 @@ extern const VMStateInfo vmstate_info_bitmap;
#define VMSTATE_END_OF_LIST() \
{}
+#define SELF_ANNOUNCE_ROUNDS 5
+
int vmstate_load_state(QEMUFile *f, const VMStateDescription *vmsd,
void *opaque, int version_id);
void vmstate_save_state(QEMUFile *f, const VMStateDescription *vmsd,
@@ -778,4 +780,12 @@ void vmstate_register_ram(struct MemoryRegion *memory, DeviceState *dev);
void vmstate_unregister_ram(struct MemoryRegion *memory, DeviceState *dev);
void vmstate_register_ram_global(struct MemoryRegion *memory);
+static inline
+int64_t self_announce_delay(int round)
+{
+ assert(round < SELF_ANNOUNCE_ROUNDS && round > 0);
+ /* delay 50ms, 150ms, 250ms, ... */
+ return 50 + (SELF_ANNOUNCE_ROUNDS - round - 1) * 100;
+}
+
#endif
diff --git a/include/monitor/monitor.h b/include/monitor/monitor.h
index 1c1f56f36b..97696ea693 100644
--- a/include/monitor/monitor.h
+++ b/include/monitor/monitor.h
@@ -51,6 +51,7 @@ typedef enum MonitorEvent {
QEVENT_BLOCK_IMAGE_CORRUPTED,
QEVENT_QUORUM_FAILURE,
QEVENT_QUORUM_REPORT_BAD,
+ QEVENT_ACPI_OST,
/* Add to 'monitor_event_names' array in monitor.c when
* defining new events here */
diff --git a/include/net/vhost-user.h b/include/net/vhost-user.h
new file mode 100644
index 0000000000..85109f63aa
--- /dev/null
+++ b/include/net/vhost-user.h
@@ -0,0 +1,17 @@
+/*
+ * vhost-user.h
+ *
+ * Copyright (c) 2013 Virtual Open Systems Sarl.
+ *
+ * This work is licensed under the terms of the GNU GPL, version 2 or later.
+ * See the COPYING file in the top-level directory.
+ *
+ */
+
+#ifndef VHOST_USER_H_
+#define VHOST_USER_H_
+
+struct vhost_net;
+struct vhost_net *vhost_user_get_vhost_net(NetClientState *nc);
+
+#endif /* VHOST_USER_H_ */
diff --git a/include/net/vhost_net.h b/include/net/vhost_net.h
index 2d936bb5f5..b1c18a3f3b 100644
--- a/include/net/vhost_net.h
+++ b/include/net/vhost_net.h
@@ -2,11 +2,19 @@
#define VHOST_NET_H
#include "net/net.h"
+#include "hw/virtio/vhost-backend.h"
struct vhost_net;
typedef struct vhost_net VHostNetState;
-VHostNetState *vhost_net_init(NetClientState *backend, int devfd, bool force);
+typedef struct VhostNetOptions {
+ VhostBackendType backend_type;
+ NetClientState *net_backend;
+ void *opaque;
+ bool force;
+} VhostNetOptions;
+
+struct vhost_net *vhost_net_init(VhostNetOptions *options);
bool vhost_net_query(VHostNetState *net, VirtIODevice *dev);
int vhost_net_start(VirtIODevice *dev, NetClientState *ncs, int total_queues);
@@ -20,4 +28,5 @@ void vhost_net_ack_features(VHostNetState *net, unsigned features);
bool vhost_net_virtqueue_pending(VHostNetState *net, int n);
void vhost_net_virtqueue_mask(VHostNetState *net, VirtIODevice *dev,
int idx, bool mask);
+VHostNetState *get_vhost_net(NetClientState *nc);
#endif
diff --git a/include/qemu/osdep.h b/include/qemu/osdep.h
index ffb296692d..6d35c1bcba 100644
--- a/include/qemu/osdep.h
+++ b/include/qemu/osdep.h
@@ -116,6 +116,16 @@ void qemu_anon_ram_free(void *ptr, size_t size);
#else
#define QEMU_MADV_MERGEABLE QEMU_MADV_INVALID
#endif
+#ifdef MADV_UNMERGEABLE
+#define QEMU_MADV_UNMERGEABLE MADV_UNMERGEABLE
+#else
+#define QEMU_MADV_UNMERGEABLE QEMU_MADV_INVALID
+#endif
+#ifdef MADV_DODUMP
+#define QEMU_MADV_DODUMP MADV_DODUMP
+#else
+#define QEMU_MADV_DODUMP QEMU_MADV_INVALID
+#endif
#ifdef MADV_DONTDUMP
#define QEMU_MADV_DONTDUMP MADV_DONTDUMP
#else
@@ -133,6 +143,8 @@ void qemu_anon_ram_free(void *ptr, size_t size);
#define QEMU_MADV_DONTNEED POSIX_MADV_DONTNEED
#define QEMU_MADV_DONTFORK QEMU_MADV_INVALID
#define QEMU_MADV_MERGEABLE QEMU_MADV_INVALID
+#define QEMU_MADV_UNMERGEABLE QEMU_MADV_INVALID
+#define QEMU_MADV_DODUMP QEMU_MADV_INVALID
#define QEMU_MADV_DONTDUMP QEMU_MADV_INVALID
#define QEMU_MADV_HUGEPAGE QEMU_MADV_INVALID
@@ -142,6 +154,8 @@ void qemu_anon_ram_free(void *ptr, size_t size);
#define QEMU_MADV_DONTNEED QEMU_MADV_INVALID
#define QEMU_MADV_DONTFORK QEMU_MADV_INVALID
#define QEMU_MADV_MERGEABLE QEMU_MADV_INVALID
+#define QEMU_MADV_UNMERGEABLE QEMU_MADV_INVALID
+#define QEMU_MADV_DODUMP QEMU_MADV_INVALID
#define QEMU_MADV_DONTDUMP QEMU_MADV_INVALID
#define QEMU_MADV_HUGEPAGE QEMU_MADV_INVALID
@@ -251,4 +265,6 @@ void qemu_init_auxval(char **envp);
void qemu_set_tty_echo(int fd, bool echo);
+void os_mem_prealloc(int fd, char *area, size_t sz);
+
#endif
diff --git a/include/qemu/range.h b/include/qemu/range.h
index aae9720161..cfa021fd48 100644
--- a/include/qemu/range.h
+++ b/include/qemu/range.h
@@ -3,6 +3,7 @@
#include <inttypes.h>
#include <qemu/typedefs.h>
+#include "qemu/queue.h"
/*
* Operations on 64 bit address ranges.
@@ -60,4 +61,75 @@ static inline int ranges_overlap(uint64_t first1, uint64_t len1,
return !(last2 < first1 || last1 < first2);
}
+/* 0,1 can merge with 1,2 but don't overlap */
+static inline bool ranges_can_merge(Range *range1, Range *range2)
+{
+ return !(range1->end < range2->begin || range2->end < range1->begin);
+}
+
+static inline int range_merge(Range *range1, Range *range2)
+{
+ if (ranges_can_merge(range1, range2)) {
+ if (range1->end < range2->end) {
+ range1->end = range2->end;
+ }
+ if (range1->begin > range2->begin) {
+ range1->begin = range2->begin;
+ }
+ return 0;
+ }
+
+ return -1;
+}
+
+static inline GList *g_list_insert_sorted_merged(GList *list,
+ gpointer data,
+ GCompareFunc func)
+{
+ GList *l, *next = NULL;
+ Range *r, *nextr;
+
+ if (!list) {
+ list = g_list_insert_sorted(list, data, func);
+ return list;
+ }
+
+ nextr = data;
+ l = list;
+ while (l && l != next && nextr) {
+ r = l->data;
+ if (ranges_can_merge(r, nextr)) {
+ range_merge(r, nextr);
+ l = g_list_remove_link(l, next);
+ next = g_list_next(l);
+ if (next) {
+ nextr = next->data;
+ } else {
+ nextr = NULL;
+ }
+ } else {
+ l = g_list_next(l);
+ }
+ }
+
+ if (!l) {
+ list = g_list_insert_sorted(list, data, func);
+ }
+
+ return list;
+}
+
+static inline gint range_compare(gconstpointer a, gconstpointer b)
+{
+ Range *ra = (Range *)a, *rb = (Range *)b;
+ if (ra->begin == rb->begin && ra->end == rb->end) {
+ return 0;
+ } else if (range_get_last(ra->begin, ra->end) <
+ range_get_last(rb->begin, rb->end)) {
+ return -1;
+ } else {
+ return 1;
+ }
+}
+
#endif
diff --git a/include/qom/object.h b/include/qom/object.h
index a641dcde10..b882ccc85f 100644
--- a/include/qom/object.h
+++ b/include/qom/object.h
@@ -917,6 +917,34 @@ int64_t object_property_get_int(Object *obj, const char *name,
Error **errp);
/**
+ * object_property_get_enum:
+ * @obj: the object
+ * @name: the name of the property
+ * @strings: strings corresponding to enums
+ * @errp: returns an error if this function fails
+ *
+ * Returns: the value of the property, converted to an integer, or
+ * undefined if an error occurs (including when the property value is not
+ * an enum).
+ */
+int object_property_get_enum(Object *obj, const char *name,
+ const char *strings[], Error **errp);
+
+/**
+ * object_property_get_uint16List:
+ * @obj: the object
+ * @name: the name of the property
+ * @list: the returned int list
+ * @errp: returns an error if this function fails
+ *
+ * Returns: the value of the property, converted to integers, or
+ * undefined if an error occurs (including when the property value is not
+ * an list of integers).
+ */
+void object_property_get_uint16List(Object *obj, const char *name,
+ uint16List **list, Error **errp);
+
+/**
* object_property_set:
* @obj: the object
* @v: the visitor that will be used to write the property value. This should
diff --git a/include/sysemu/char.h b/include/sysemu/char.h
index 7f5eeb38b0..3b835f6fb3 100644
--- a/include/sysemu/char.h
+++ b/include/sysemu/char.h
@@ -56,10 +56,13 @@ typedef void IOEventHandler(void *opaque, int event);
struct CharDriverState {
void (*init)(struct CharDriverState *s);
int (*chr_write)(struct CharDriverState *s, const uint8_t *buf, int len);
+ int (*chr_sync_read)(struct CharDriverState *s,
+ const uint8_t *buf, int len);
GSource *(*chr_add_watch)(struct CharDriverState *s, GIOCondition cond);
void (*chr_update_read_handler)(struct CharDriverState *s);
int (*chr_ioctl)(struct CharDriverState *s, int cmd, void *arg);
- int (*get_msgfd)(struct CharDriverState *s);
+ int (*get_msgfds)(struct CharDriverState *s, int* fds, int num);
+ int (*set_msgfds)(struct CharDriverState *s, int *fds, int num);
int (*chr_add_client)(struct CharDriverState *chr, int fd);
IOEventHandler *chr_event;
IOCanReadHandler *chr_can_read;
@@ -80,6 +83,7 @@ struct CharDriverState {
int avail_connections;
int is_mux;
guint fd_in_tag;
+ guint fd_hup_tag;
QemuOpts *opts;
QTAILQ_ENTRY(CharDriverState) next;
};
@@ -189,6 +193,18 @@ int qemu_chr_fe_write(CharDriverState *s, const uint8_t *buf, int len);
int qemu_chr_fe_write_all(CharDriverState *s, const uint8_t *buf, int len);
/**
+ * @qemu_chr_fe_read_all:
+ *
+ * Read data to a buffer from the back end.
+ *
+ * @buf the data buffer
+ * @len the number of bytes to read
+ *
+ * Returns: the number of bytes read
+ */
+int qemu_chr_fe_read_all(CharDriverState *s, uint8_t *buf, int len);
+
+/**
* @qemu_chr_fe_ioctl:
*
* Issue a device specific ioctl to a backend.
@@ -215,6 +231,32 @@ int qemu_chr_fe_ioctl(CharDriverState *s, int cmd, void *arg);
int qemu_chr_fe_get_msgfd(CharDriverState *s);
/**
+ * @qemu_chr_fe_get_msgfds:
+ *
+ * For backends capable of fd passing, return the number of file received
+ * descriptors and fills the fds array up to num elements
+ *
+ * Returns: -1 if fd passing isn't supported or there are no pending file
+ * descriptors. If file descriptors are returned, subsequent calls to
+ * this function will return -1 until a client sends a new set of file
+ * descriptors.
+ */
+int qemu_chr_fe_get_msgfds(CharDriverState *s, int *fds, int num);
+
+/**
+ * @qemu_chr_fe_set_msgfds:
+ *
+ * For backends capable of fd passing, set an array of fds to be passed with
+ * the next send operation.
+ * A subsequent call to this function before calling a write function will
+ * result in overwriting the fd array with the new value without being send.
+ * Upon writing the message the fd array is freed.
+ *
+ * Returns: -1 if fd passing isn't supported.
+ */
+int qemu_chr_fe_set_msgfds(CharDriverState *s, int *fds, int num);
+
+/**
* @qemu_chr_fe_claim:
*
* Claim a backend before using it, should be called before calling
diff --git a/include/sysemu/cpus.h b/include/sysemu/cpus.h
index 6502488a05..4f790810bf 100644
--- a/include/sysemu/cpus.h
+++ b/include/sysemu/cpus.h
@@ -23,7 +23,6 @@ extern int smp_threads;
#define smp_threads 1
#endif
-void set_numa_modes(void);
void list_cpus(FILE *f, fprintf_function cpu_fprintf, const char *optarg);
#endif
diff --git a/include/sysemu/hostmem.h b/include/sysemu/hostmem.h
new file mode 100644
index 0000000000..1ce439415d
--- /dev/null
+++ b/include/sysemu/hostmem.h
@@ -0,0 +1,68 @@
+/*
+ * QEMU Host Memory Backend
+ *
+ * Copyright (C) 2013-2014 Red Hat Inc
+ *
+ * Authors:
+ * Igor Mammedov <imammedo@redhat.com>
+ *
+ * This work is licensed under the terms of the GNU GPL, version 2 or later.
+ * See the COPYING file in the top-level directory.
+ */
+#ifndef QEMU_RAM_H
+#define QEMU_RAM_H
+
+#include "sysemu/sysemu.h" /* for MAX_NODES */
+#include "qom/object.h"
+#include "qapi/error.h"
+#include "exec/memory.h"
+#include "qemu/option.h"
+#include "qemu/bitmap.h"
+
+#define TYPE_MEMORY_BACKEND "memory-backend"
+#define MEMORY_BACKEND(obj) \
+ OBJECT_CHECK(HostMemoryBackend, (obj), TYPE_MEMORY_BACKEND)
+#define MEMORY_BACKEND_GET_CLASS(obj) \
+ OBJECT_GET_CLASS(HostMemoryBackendClass, (obj), TYPE_MEMORY_BACKEND)
+#define MEMORY_BACKEND_CLASS(klass) \
+ OBJECT_CLASS_CHECK(HostMemoryBackendClass, (klass), TYPE_MEMORY_BACKEND)
+
+typedef struct HostMemoryBackend HostMemoryBackend;
+typedef struct HostMemoryBackendClass HostMemoryBackendClass;
+
+/**
+ * HostMemoryBackendClass:
+ * @parent_class: opaque parent class container
+ */
+struct HostMemoryBackendClass {
+ ObjectClass parent_class;
+
+ void (*alloc)(HostMemoryBackend *backend, Error **errp);
+};
+
+/**
+ * @HostMemoryBackend
+ *
+ * @parent: opaque parent object container
+ * @size: amount of memory backend provides
+ * @id: unique identification string in memdev namespace
+ * @mr: MemoryRegion representing host memory belonging to backend
+ */
+struct HostMemoryBackend {
+ /* private */
+ Object parent;
+
+ /* protected */
+ uint64_t size;
+ bool merge, dump;
+ bool prealloc, force_prealloc;
+ DECLARE_BITMAP(host_nodes, MAX_NODES + 1);
+ HostMemPolicy policy;
+
+ MemoryRegion mr;
+};
+
+MemoryRegion *host_memory_backend_get_memory(HostMemoryBackend *backend,
+ Error **errp);
+
+#endif
diff --git a/include/sysemu/kvm.h b/include/sysemu/kvm.h
index e79e92c50e..c4556ad59e 100644
--- a/include/sysemu/kvm.h
+++ b/include/sysemu/kvm.h
@@ -43,6 +43,7 @@ extern bool kvm_allowed;
extern bool kvm_kernel_irqchip;
extern bool kvm_async_interrupts_allowed;
extern bool kvm_halt_in_kernel_allowed;
+extern bool kvm_eventfds_allowed;
extern bool kvm_irqfds_allowed;
extern bool kvm_msi_via_irqfd_allowed;
extern bool kvm_gsi_routing_allowed;
@@ -83,6 +84,15 @@ extern bool kvm_readonly_mem_allowed;
#define kvm_halt_in_kernel() (kvm_halt_in_kernel_allowed)
/**
+ * kvm_eventfds_enabled:
+ *
+ * Returns: true if we can use eventfds to receive notifications
+ * from a KVM CPU (ie the kernel supports eventds and we are running
+ * with a configuration where it is meaningful to use them).
+ */
+#define kvm_eventfds_enabled() (kvm_eventfds_allowed)
+
+/**
* kvm_irqfds_enabled:
*
* Returns: true if we can use irqfds to inject interrupts into
@@ -128,6 +138,7 @@ extern bool kvm_readonly_mem_allowed;
#define kvm_irqchip_in_kernel() (false)
#define kvm_async_interrupts_enabled() (false)
#define kvm_halt_in_kernel() (false)
+#define kvm_eventfds_enabled() (false)
#define kvm_irqfds_enabled() (false)
#define kvm_msi_via_irqfd_enabled() (false)
#define kvm_gsi_routing_allowed() (false)
diff --git a/include/sysemu/os-win32.h b/include/sysemu/os-win32.h
index bf8523ada1..af3fbc47d8 100644
--- a/include/sysemu/os-win32.h
+++ b/include/sysemu/os-win32.h
@@ -89,6 +89,8 @@ static inline void os_setup_post(void) {}
void os_set_line_buffering(void);
static inline void os_set_proc_name(const char *dummy) {}
+size_t getpagesize(void);
+
#if !defined(EPROTONOSUPPORT)
# define EPROTONOSUPPORT EINVAL
#endif
diff --git a/include/sysemu/sysemu.h b/include/sysemu/sysemu.h
index ba5c7f8093..277230db49 100644
--- a/include/sysemu/sysemu.h
+++ b/include/sysemu/sysemu.h
@@ -9,6 +9,8 @@
#include "qapi-types.h"
#include "qemu/notify.h"
#include "qemu/main-loop.h"
+#include "qemu/bitmap.h"
+#include "qom/object.h"
/* vl.c */
@@ -131,8 +133,10 @@ extern uint8_t *boot_splash_filedata;
extern size_t boot_splash_filedata_size;
extern uint8_t qemu_extra_params_fw[2];
extern QEMUClockType rtc_clock;
+extern const char *mem_path;
+extern int mem_prealloc;
-#define MAX_NODES 64
+#define MAX_NODES 128
/* The following shall be true for all CPUs:
* cpu->cpu_index < max_cpus <= MAX_CPUMASK_BITS
@@ -142,8 +146,16 @@ extern QEMUClockType rtc_clock;
#define MAX_CPUMASK_BITS 255
extern int nb_numa_nodes;
-extern uint64_t node_mem[MAX_NODES];
-extern unsigned long *node_cpumask[MAX_NODES];
+typedef struct node_info {
+ uint64_t node_mem;
+ DECLARE_BITMAP(node_cpu, MAX_CPUMASK_BITS);
+ struct HostMemoryBackend *node_memdev;
+} NodeInfo;
+extern NodeInfo numa_info[MAX_NODES];
+void set_numa_nodes(void);
+void set_numa_modes(void);
+extern QemuOptsList qemu_numa_opts;
+int numa_init_func(QemuOpts *opts, void *opaque);
#define MAX_OPTION_ROMS 16
typedef struct QEMUOptionRom {
diff --git a/kvm-all.c b/kvm-all.c
index ef9f0f2213..0f0abfeb24 100644
--- a/kvm-all.c
+++ b/kvm-all.c
@@ -113,6 +113,7 @@ KVMState *kvm_state;
bool kvm_kernel_irqchip;
bool kvm_async_interrupts_allowed;
bool kvm_halt_in_kernel_allowed;
+bool kvm_eventfds_allowed;
bool kvm_irqfds_allowed;
bool kvm_msi_via_irqfd_allowed;
bool kvm_gsi_routing_allowed;
@@ -1541,6 +1542,9 @@ int kvm_init(MachineClass *mc)
(kvm_check_extension(s, KVM_CAP_READONLY_MEM) > 0);
#endif
+ kvm_eventfds_allowed =
+ (kvm_check_extension(s, KVM_CAP_IOEVENTFD) > 0);
+
ret = kvm_arch_init(s);
if (ret < 0) {
goto err;
diff --git a/kvm-stub.c b/kvm-stub.c
index ac33d8666d..8e7737caa9 100644
--- a/kvm-stub.c
+++ b/kvm-stub.c
@@ -22,6 +22,7 @@
KVMState *kvm_state;
bool kvm_kernel_irqchip;
bool kvm_async_interrupts_allowed;
+bool kvm_eventfds_allowed;
bool kvm_irqfds_allowed;
bool kvm_msi_via_irqfd_allowed;
bool kvm_gsi_routing_allowed;
diff --git a/memory.c b/memory.c
index 4895e25376..b91a60a921 100644
--- a/memory.c
+++ b/memory.c
@@ -23,6 +23,7 @@
#include "exec/memory-internal.h"
#include "exec/ram_addr.h"
+#include "sysemu/sysemu.h"
//#define DEBUG_UNASSIGNED
@@ -493,7 +494,7 @@ static AddressSpace *memory_region_to_address_space(MemoryRegion *mr)
return as;
}
}
- abort();
+ return NULL;
}
/* Render a memory region into the global view. Ranges in @view obscure
@@ -1032,6 +1033,23 @@ void memory_region_init_ram(MemoryRegion *mr,
mr->ram_addr = qemu_ram_alloc(size, mr);
}
+#ifdef __linux__
+void memory_region_init_ram_from_file(MemoryRegion *mr,
+ struct Object *owner,
+ const char *name,
+ uint64_t size,
+ bool share,
+ const char *path,
+ Error **errp)
+{
+ memory_region_init(mr, owner, name, size);
+ mr->ram = true;
+ mr->terminates = true;
+ mr->destructor = memory_region_destructor_ram;
+ mr->ram_addr = qemu_ram_alloc_from_file(size, mr, share, path, errp);
+}
+#endif
+
void memory_region_init_ram_ptr(MemoryRegion *mr,
Object *owner,
const char *name,
@@ -1254,6 +1272,17 @@ void memory_region_reset_dirty(MemoryRegion *mr, hwaddr addr,
cpu_physical_memory_reset_dirty(mr->ram_addr + addr, size, client);
}
+int memory_region_get_fd(MemoryRegion *mr)
+{
+ if (mr->alias) {
+ return memory_region_get_fd(mr->alias);
+ }
+
+ assert(mr->terminates);
+
+ return qemu_get_ram_fd(mr->ram_addr & TARGET_PAGE_MASK);
+}
+
void *memory_region_get_ram_ptr(MemoryRegion *mr)
{
if (mr->alias) {
@@ -1593,6 +1622,11 @@ bool memory_region_present(MemoryRegion *container, hwaddr addr)
return true;
}
+bool memory_region_is_mapped(MemoryRegion *mr)
+{
+ return mr->container ? true : false;
+}
+
MemoryRegionSection memory_region_find(MemoryRegion *mr,
hwaddr addr, uint64_t size)
{
@@ -1610,6 +1644,9 @@ MemoryRegionSection memory_region_find(MemoryRegion *mr,
}
as = memory_region_to_address_space(root);
+ if (!as) {
+ return ret;
+ }
range = addrrange_make(int128_make64(addr), int128_make64(size));
view = address_space_get_flatview(as);
diff --git a/monitor.c b/monitor.c
index 2901187f5f..c7f879713e 100644
--- a/monitor.c
+++ b/monitor.c
@@ -487,6 +487,7 @@ static const char *monitor_event_names[] = {
[QEVENT_BLOCK_IMAGE_CORRUPTED] = "BLOCK_IMAGE_CORRUPTED",
[QEVENT_QUORUM_FAILURE] = "QUORUM_FAILURE",
[QEVENT_QUORUM_REPORT_BAD] = "QUORUM_REPORT_BAD",
+ [QEVENT_ACPI_OST] = "ACPI_DEVICE_OST",
};
QEMU_BUILD_BUG_ON(ARRAY_SIZE(monitor_event_names) != QEVENT_MAX)
@@ -2011,7 +2012,7 @@ static void do_info_numa(Monitor *mon, const QDict *qdict)
}
monitor_printf(mon, "\n");
monitor_printf(mon, "node %d size: %" PRId64 " MB\n", i,
- node_mem[i] >> 20);
+ numa_info[i].node_mem >> 20);
}
}
@@ -2964,6 +2965,13 @@ static mon_cmd_t info_cmds[] = {
.mhandler.cmd = hmp_info_tpm,
},
{
+ .name = "memdev",
+ .args_type = "",
+ .params = "",
+ .help = "show the memory device",
+ .mhandler.cmd = hmp_info_memdev,
+ },
+ {
.name = NULL,
},
};
diff --git a/net/Makefile.objs b/net/Makefile.objs
index c25fe6920c..301f6b6b51 100644
--- a/net/Makefile.objs
+++ b/net/Makefile.objs
@@ -2,7 +2,7 @@ common-obj-y = net.o queue.o checksum.o util.o hub.o
common-obj-y += socket.o
common-obj-y += dump.o
common-obj-y += eth.o
-common-obj-$(CONFIG_POSIX) += tap.o
+common-obj-$(CONFIG_POSIX) += tap.o vhost-user.o
common-obj-$(CONFIG_LINUX) += tap-linux.o
common-obj-$(CONFIG_WIN32) += tap-win32.o
common-obj-$(CONFIG_BSD) += tap-bsd.o
diff --git a/net/clients.h b/net/clients.h
index 7322ff5f33..7f3d4ae9f3 100644
--- a/net/clients.h
+++ b/net/clients.h
@@ -57,4 +57,7 @@ int net_init_netmap(const NetClientOptions *opts, const char *name,
NetClientState *peer);
#endif
+int net_init_vhost_user(const NetClientOptions *opts, const char *name,
+ NetClientState *peer);
+
#endif /* QEMU_NET_CLIENTS_H */
diff --git a/net/hub.c b/net/hub.c
index 33a99c99ef..7e0f2d6f0d 100644
--- a/net/hub.c
+++ b/net/hub.c
@@ -322,6 +322,7 @@ void net_hub_check_clients(void)
case NET_CLIENT_OPTIONS_KIND_TAP:
case NET_CLIENT_OPTIONS_KIND_SOCKET:
case NET_CLIENT_OPTIONS_KIND_VDE:
+ case NET_CLIENT_OPTIONS_KIND_VHOST_USER:
has_host_dev = 1;
break;
default:
diff --git a/net/net.c b/net/net.c
index 6344160403..3dac29b844 100644
--- a/net/net.c
+++ b/net/net.c
@@ -62,6 +62,7 @@ const char *host_net_devices[] = {
#ifdef CONFIG_VDE
"vde",
#endif
+ "vhost-user",
NULL,
};
@@ -802,6 +803,9 @@ static int (* const net_client_init_fun[NET_CLIENT_OPTIONS_KIND_MAX])(
[NET_CLIENT_OPTIONS_KIND_BRIDGE] = net_init_bridge,
#endif
[NET_CLIENT_OPTIONS_KIND_HUBPORT] = net_init_hubport,
+#ifdef CONFIG_VHOST_NET_USED
+ [NET_CLIENT_OPTIONS_KIND_VHOST_USER] = net_init_vhost_user,
+#endif
};
@@ -835,6 +839,9 @@ static int net_client_init1(const void *object, int is_netdev, Error **errp)
case NET_CLIENT_OPTIONS_KIND_BRIDGE:
#endif
case NET_CLIENT_OPTIONS_KIND_HUBPORT:
+#ifdef CONFIG_VHOST_NET_USED
+ case NET_CLIENT_OPTIONS_KIND_VHOST_USER:
+#endif
break;
default:
diff --git a/net/tap.c b/net/tap.c
index fc1b865e08..a40f7f023f 100644
--- a/net/tap.c
+++ b/net/tap.c
@@ -594,6 +594,7 @@ static int net_init_tap_one(const NetdevTapOptions *tap, NetClientState *peer,
int vnet_hdr, int fd)
{
TAPState *s;
+ int vhostfd;
s = net_tap_fd_init(peer, model, name, fd, vnet_hdr);
if (!s) {
@@ -624,7 +625,11 @@ static int net_init_tap_one(const NetdevTapOptions *tap, NetClientState *peer,
if (tap->has_vhost ? tap->vhost :
vhostfdname || (tap->has_vhostforce && tap->vhostforce)) {
- int vhostfd;
+ VhostNetOptions options;
+
+ options.backend_type = VHOST_BACKEND_TYPE_KERNEL;
+ options.net_backend = &s->nc;
+ options.force = tap->has_vhostforce && tap->vhostforce;
if (tap->has_vhostfd || tap->has_vhostfds) {
vhostfd = monitor_handle_fd_param(cur_mon, vhostfdname);
@@ -632,11 +637,16 @@ static int net_init_tap_one(const NetdevTapOptions *tap, NetClientState *peer,
return -1;
}
} else {
- vhostfd = -1;
+ vhostfd = open("/dev/vhost-net", O_RDWR);
+ if (vhostfd < 0) {
+ error_report("tap: open vhost char device failed: %s",
+ strerror(errno));
+ return -1;
+ }
}
+ options.opaque = (void *)(uintptr_t)vhostfd;
- s->vhost_net = vhost_net_init(&s->nc, vhostfd,
- tap->has_vhostforce && tap->vhostforce);
+ s->vhost_net = vhost_net_init(&options);
if (!s->vhost_net) {
error_report("vhost-net requested but could not be initialized");
return -1;
diff --git a/net/vhost-user.c b/net/vhost-user.c
new file mode 100644
index 0000000000..24e050c772
--- /dev/null
+++ b/net/vhost-user.c
@@ -0,0 +1,258 @@
+/*
+ * vhost-user.c
+ *
+ * Copyright (c) 2013 Virtual Open Systems Sarl.
+ *
+ * This work is licensed under the terms of the GNU GPL, version 2 or later.
+ * See the COPYING file in the top-level directory.
+ *
+ */
+
+#include "clients.h"
+#include "net/vhost_net.h"
+#include "net/vhost-user.h"
+#include "sysemu/char.h"
+#include "qemu/config-file.h"
+#include "qemu/error-report.h"
+
+typedef struct VhostUserState {
+ NetClientState nc;
+ CharDriverState *chr;
+ bool vhostforce;
+ VHostNetState *vhost_net;
+} VhostUserState;
+
+typedef struct VhostUserChardevProps {
+ bool is_socket;
+ bool is_unix;
+ bool is_server;
+} VhostUserChardevProps;
+
+VHostNetState *vhost_user_get_vhost_net(NetClientState *nc)
+{
+ VhostUserState *s = DO_UPCAST(VhostUserState, nc, nc);
+ assert(nc->info->type == NET_CLIENT_OPTIONS_KIND_VHOST_USER);
+ return s->vhost_net;
+}
+
+static int vhost_user_running(VhostUserState *s)
+{
+ return (s->vhost_net) ? 1 : 0;
+}
+
+static int vhost_user_start(VhostUserState *s)
+{
+ VhostNetOptions options;
+
+ if (vhost_user_running(s)) {
+ return 0;
+ }
+
+ options.backend_type = VHOST_BACKEND_TYPE_USER;
+ options.net_backend = &s->nc;
+ options.opaque = s->chr;
+ options.force = s->vhostforce;
+
+ s->vhost_net = vhost_net_init(&options);
+
+ return vhost_user_running(s) ? 0 : -1;
+}
+
+static void vhost_user_stop(VhostUserState *s)
+{
+ if (vhost_user_running(s)) {
+ vhost_net_cleanup(s->vhost_net);
+ }
+
+ s->vhost_net = 0;
+}
+
+static void vhost_user_cleanup(NetClientState *nc)
+{
+ VhostUserState *s = DO_UPCAST(VhostUserState, nc, nc);
+
+ vhost_user_stop(s);
+ qemu_purge_queued_packets(nc);
+}
+
+static bool vhost_user_has_vnet_hdr(NetClientState *nc)
+{
+ assert(nc->info->type == NET_CLIENT_OPTIONS_KIND_VHOST_USER);
+
+ return true;
+}
+
+static bool vhost_user_has_ufo(NetClientState *nc)
+{
+ assert(nc->info->type == NET_CLIENT_OPTIONS_KIND_VHOST_USER);
+
+ return true;
+}
+
+static NetClientInfo net_vhost_user_info = {
+ .type = NET_CLIENT_OPTIONS_KIND_VHOST_USER,
+ .size = sizeof(VhostUserState),
+ .cleanup = vhost_user_cleanup,
+ .has_vnet_hdr = vhost_user_has_vnet_hdr,
+ .has_ufo = vhost_user_has_ufo,
+};
+
+static void net_vhost_link_down(VhostUserState *s, bool link_down)
+{
+ s->nc.link_down = link_down;
+
+ if (s->nc.peer) {
+ s->nc.peer->link_down = link_down;
+ }
+
+ if (s->nc.info->link_status_changed) {
+ s->nc.info->link_status_changed(&s->nc);
+ }
+
+ if (s->nc.peer && s->nc.peer->info->link_status_changed) {
+ s->nc.peer->info->link_status_changed(s->nc.peer);
+ }
+}
+
+static void net_vhost_user_event(void *opaque, int event)
+{
+ VhostUserState *s = opaque;
+
+ switch (event) {
+ case CHR_EVENT_OPENED:
+ vhost_user_start(s);
+ net_vhost_link_down(s, false);
+ error_report("chardev \"%s\" went up\n", s->chr->label);
+ break;
+ case CHR_EVENT_CLOSED:
+ net_vhost_link_down(s, true);
+ vhost_user_stop(s);
+ error_report("chardev \"%s\" went down\n", s->chr->label);
+ break;
+ }
+}
+
+static int net_vhost_user_init(NetClientState *peer, const char *device,
+ const char *name, CharDriverState *chr,
+ bool vhostforce)
+{
+ NetClientState *nc;
+ VhostUserState *s;
+
+ nc = qemu_new_net_client(&net_vhost_user_info, peer, device, name);
+
+ snprintf(nc->info_str, sizeof(nc->info_str), "vhost-user to %s",
+ chr->label);
+
+ s = DO_UPCAST(VhostUserState, nc, nc);
+
+ /* We don't provide a receive callback */
+ s->nc.receive_disabled = 1;
+ s->chr = chr;
+ s->vhostforce = vhostforce;
+
+ qemu_chr_add_handlers(s->chr, NULL, NULL, net_vhost_user_event, s);
+
+ return 0;
+}
+
+static int net_vhost_chardev_opts(const char *name, const char *value,
+ void *opaque)
+{
+ VhostUserChardevProps *props = opaque;
+
+ if (strcmp(name, "backend") == 0 && strcmp(value, "socket") == 0) {
+ props->is_socket = true;
+ } else if (strcmp(name, "path") == 0) {
+ props->is_unix = true;
+ } else if (strcmp(name, "server") == 0) {
+ props->is_server = true;
+ } else {
+ error_report("vhost-user does not support a chardev"
+ " with the following option:\n %s = %s",
+ name, value);
+ return -1;
+ }
+ return 0;
+}
+
+static CharDriverState *net_vhost_parse_chardev(const NetdevVhostUserOptions *opts)
+{
+ CharDriverState *chr = qemu_chr_find(opts->chardev);
+ VhostUserChardevProps props;
+
+ if (chr == NULL) {
+ error_report("chardev \"%s\" not found", opts->chardev);
+ return NULL;
+ }
+
+ /* inspect chardev opts */
+ memset(&props, 0, sizeof(props));
+ if (qemu_opt_foreach(chr->opts, net_vhost_chardev_opts, &props, true) != 0) {
+ return NULL;
+ }
+
+ if (!props.is_socket || !props.is_unix) {
+ error_report("chardev \"%s\" is not a unix socket",
+ opts->chardev);
+ return NULL;
+ }
+
+ qemu_chr_fe_claim_no_fail(chr);
+
+ return chr;
+}
+
+static int net_vhost_check_net(QemuOpts *opts, void *opaque)
+{
+ const char *name = opaque;
+ const char *driver, *netdev;
+ const char virtio_name[] = "virtio-net-";
+
+ driver = qemu_opt_get(opts, "driver");
+ netdev = qemu_opt_get(opts, "netdev");
+
+ if (!driver || !netdev) {
+ return 0;
+ }
+
+ if (strcmp(netdev, name) == 0 &&
+ strncmp(driver, virtio_name, strlen(virtio_name)) != 0) {
+ error_report("vhost-user requires frontend driver virtio-net-*");
+ return -1;
+ }
+
+ return 0;
+}
+
+int net_init_vhost_user(const NetClientOptions *opts, const char *name,
+ NetClientState *peer)
+{
+ const NetdevVhostUserOptions *vhost_user_opts;
+ CharDriverState *chr;
+ bool vhostforce;
+
+ assert(opts->kind == NET_CLIENT_OPTIONS_KIND_VHOST_USER);
+ vhost_user_opts = opts->vhost_user;
+
+ chr = net_vhost_parse_chardev(vhost_user_opts);
+ if (!chr) {
+ error_report("No suitable chardev found");
+ return -1;
+ }
+
+ /* verify net frontend */
+ if (qemu_opts_foreach(qemu_find_opts("device"), net_vhost_check_net,
+ (char *)name, true) == -1) {
+ return -1;
+ }
+
+ /* vhostforce for non-MSIX */
+ if (vhost_user_opts->has_vhostforce) {
+ vhostforce = vhost_user_opts->vhostforce;
+ } else {
+ vhostforce = false;
+ }
+
+ return net_vhost_user_init(peer, "vhost_user", name, chr, vhostforce);
+}
diff --git a/numa.c b/numa.c
new file mode 100644
index 0000000000..e471afe04a
--- /dev/null
+++ b/numa.c
@@ -0,0 +1,369 @@
+/*
+ * NUMA parameter parsing routines
+ *
+ * Copyright (c) 2014 Fujitsu Ltd.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+ * THE SOFTWARE.
+ */
+
+#include "sysemu/sysemu.h"
+#include "exec/cpu-common.h"
+#include "qemu/bitmap.h"
+#include "qom/cpu.h"
+#include "qemu/error-report.h"
+#include "include/exec/cpu-common.h" /* for RAM_ADDR_FMT */
+#include "qapi-visit.h"
+#include "qapi/opts-visitor.h"
+#include "qapi/dealloc-visitor.h"
+#include "qapi/qmp/qerror.h"
+#include "hw/boards.h"
+#include "sysemu/hostmem.h"
+#include "qmp-commands.h"
+
+QemuOptsList qemu_numa_opts = {
+ .name = "numa",
+ .implied_opt_name = "type",
+ .head = QTAILQ_HEAD_INITIALIZER(qemu_numa_opts.head),
+ .desc = { { 0 } } /* validated with OptsVisitor */
+};
+
+static int have_memdevs = -1;
+
+static void numa_node_parse(NumaNodeOptions *node, QemuOpts *opts, Error **errp)
+{
+ uint16_t nodenr;
+ uint16List *cpus = NULL;
+
+ if (node->has_nodeid) {
+ nodenr = node->nodeid;
+ } else {
+ nodenr = nb_numa_nodes;
+ }
+
+ if (nodenr >= MAX_NODES) {
+ error_setg(errp, "Max number of NUMA nodes reached: %"
+ PRIu16 "\n", nodenr);
+ return;
+ }
+
+ for (cpus = node->cpus; cpus; cpus = cpus->next) {
+ if (cpus->value > MAX_CPUMASK_BITS) {
+ error_setg(errp, "CPU number %" PRIu16 " is bigger than %d",
+ cpus->value, MAX_CPUMASK_BITS);
+ return;
+ }
+ bitmap_set(numa_info[nodenr].node_cpu, cpus->value, 1);
+ }
+
+ if (node->has_mem && node->has_memdev) {
+ error_setg(errp, "qemu: cannot specify both mem= and memdev=\n");
+ return;
+ }
+
+ if (have_memdevs == -1) {
+ have_memdevs = node->has_memdev;
+ }
+ if (node->has_memdev != have_memdevs) {
+ error_setg(errp, "qemu: memdev option must be specified for either "
+ "all or no nodes\n");
+ return;
+ }
+
+ if (node->has_mem) {
+ uint64_t mem_size = node->mem;
+ const char *mem_str = qemu_opt_get(opts, "mem");
+ /* Fix up legacy suffix-less format */
+ if (g_ascii_isdigit(mem_str[strlen(mem_str) - 1])) {
+ mem_size <<= 20;
+ }
+ numa_info[nodenr].node_mem = mem_size;
+ }
+ if (node->has_memdev) {
+ Object *o;
+ o = object_resolve_path_type(node->memdev, TYPE_MEMORY_BACKEND, NULL);
+ if (!o) {
+ error_setg(errp, "memdev=%s is ambiguous", node->memdev);
+ return;
+ }
+
+ object_ref(o);
+ numa_info[nodenr].node_mem = object_property_get_int(o, "size", NULL);
+ numa_info[nodenr].node_memdev = MEMORY_BACKEND(o);
+ }
+}
+
+int numa_init_func(QemuOpts *opts, void *opaque)
+{
+ NumaOptions *object = NULL;
+ Error *err = NULL;
+
+ {
+ OptsVisitor *ov = opts_visitor_new(opts);
+ visit_type_NumaOptions(opts_get_visitor(ov), &object, NULL, &err);
+ opts_visitor_cleanup(ov);
+ }
+
+ if (err) {
+ goto error;
+ }
+
+ switch (object->kind) {
+ case NUMA_OPTIONS_KIND_NODE:
+ numa_node_parse(object->node, opts, &err);
+ if (err) {
+ goto error;
+ }
+ nb_numa_nodes++;
+ break;
+ default:
+ abort();
+ }
+
+ return 0;
+
+error:
+ qerror_report_err(err);
+ error_free(err);
+
+ if (object) {
+ QapiDeallocVisitor *dv = qapi_dealloc_visitor_new();
+ visit_type_NumaOptions(qapi_dealloc_get_visitor(dv),
+ &object, NULL, NULL);
+ qapi_dealloc_visitor_cleanup(dv);
+ }
+
+ return -1;
+}
+
+void set_numa_nodes(void)
+{
+ if (nb_numa_nodes > 0) {
+ uint64_t numa_total;
+ int i;
+
+ if (nb_numa_nodes > MAX_NODES) {
+ nb_numa_nodes = MAX_NODES;
+ }
+
+ /* If no memory size if given for any node, assume the default case
+ * and distribute the available memory equally across all nodes
+ */
+ for (i = 0; i < nb_numa_nodes; i++) {
+ if (numa_info[i].node_mem != 0) {
+ break;
+ }
+ }
+ if (i == nb_numa_nodes) {
+ uint64_t usedmem = 0;
+
+ /* On Linux, the each node's border has to be 8MB aligned,
+ * the final node gets the rest.
+ */
+ for (i = 0; i < nb_numa_nodes - 1; i++) {
+ numa_info[i].node_mem = (ram_size / nb_numa_nodes) &
+ ~((1 << 23UL) - 1);
+ usedmem += numa_info[i].node_mem;
+ }
+ numa_info[i].node_mem = ram_size - usedmem;
+ }
+
+ numa_total = 0;
+ for (i = 0; i < nb_numa_nodes; i++) {
+ numa_total += numa_info[i].node_mem;
+ }
+ if (numa_total != ram_size) {
+ error_report("total memory for NUMA nodes (%" PRIu64 ")"
+ " should equal RAM size (" RAM_ADDR_FMT ")",
+ numa_total, ram_size);
+ exit(1);
+ }
+
+ for (i = 0; i < nb_numa_nodes; i++) {
+ if (!bitmap_empty(numa_info[i].node_cpu, MAX_CPUMASK_BITS)) {
+ break;
+ }
+ }
+ /* assigning the VCPUs round-robin is easier to implement, guest OSes
+ * must cope with this anyway, because there are BIOSes out there in
+ * real machines which also use this scheme.
+ */
+ if (i == nb_numa_nodes) {
+ for (i = 0; i < max_cpus; i++) {
+ set_bit(i, numa_info[i % nb_numa_nodes].node_cpu);
+ }
+ }
+ }
+}
+
+void set_numa_modes(void)
+{
+ CPUState *cpu;
+ int i;
+
+ CPU_FOREACH(cpu) {
+ for (i = 0; i < nb_numa_nodes; i++) {
+ if (test_bit(cpu->cpu_index, numa_info[i].node_cpu)) {
+ cpu->numa_node = i;
+ }
+ }
+ }
+}
+
+static void allocate_system_memory_nonnuma(MemoryRegion *mr, Object *owner,
+ const char *name,
+ uint64_t ram_size)
+{
+ if (mem_path) {
+#ifdef __linux__
+ Error *err = NULL;
+ memory_region_init_ram_from_file(mr, owner, name, ram_size, false,
+ mem_path, &err);
+
+ /* Legacy behavior: if allocation failed, fall back to
+ * regular RAM allocation.
+ */
+ if (err) {
+ qerror_report_err(err);
+ error_free(err);
+ memory_region_init_ram(mr, owner, name, ram_size);
+ }
+#else
+ fprintf(stderr, "-mem-path not supported on this host\n");
+ exit(1);
+#endif
+ } else {
+ memory_region_init_ram(mr, owner, name, ram_size);
+ }
+ vmstate_register_ram_global(mr);
+}
+
+void memory_region_allocate_system_memory(MemoryRegion *mr, Object *owner,
+ const char *name,
+ uint64_t ram_size)
+{
+ uint64_t addr = 0;
+ int i;
+
+ if (nb_numa_nodes == 0 || !have_memdevs) {
+ allocate_system_memory_nonnuma(mr, owner, name, ram_size);
+ return;
+ }
+
+ memory_region_init(mr, owner, name, ram_size);
+ for (i = 0; i < MAX_NODES; i++) {
+ Error *local_err = NULL;
+ uint64_t size = numa_info[i].node_mem;
+ HostMemoryBackend *backend = numa_info[i].node_memdev;
+ if (!backend) {
+ continue;
+ }
+ MemoryRegion *seg = host_memory_backend_get_memory(backend, &local_err);
+ if (local_err) {
+ qerror_report_err(local_err);
+ exit(1);
+ }
+
+ memory_region_add_subregion(mr, addr, seg);
+ vmstate_register_ram_global(seg);
+ addr += size;
+ }
+}
+
+static int query_memdev(Object *obj, void *opaque)
+{
+ MemdevList **list = opaque;
+ Error *err = NULL;
+
+ if (object_dynamic_cast(obj, TYPE_MEMORY_BACKEND)) {
+ MemdevList *m = g_malloc0(sizeof(*m));
+
+ m->value = g_malloc0(sizeof(*m->value));
+
+ m->value->size = object_property_get_int(obj, "size",
+ &err);
+ if (err) {
+ goto error;
+ }
+
+ m->value->merge = object_property_get_bool(obj, "merge",
+ &err);
+ if (err) {
+ goto error;
+ }
+
+ m->value->dump = object_property_get_bool(obj, "dump",
+ &err);
+ if (err) {
+ goto error;
+ }
+
+ m->value->prealloc = object_property_get_bool(obj,
+ "prealloc", &err);
+ if (err) {
+ goto error;
+ }
+
+ m->value->policy = object_property_get_enum(obj,
+ "policy",
+ HostMemPolicy_lookup,
+ &err);
+ if (err) {
+ goto error;
+ }
+
+ object_property_get_uint16List(obj, "host-nodes",
+ &m->value->host_nodes, &err);
+ if (err) {
+ goto error;
+ }
+
+ m->next = *list;
+ *list = m;
+ }
+
+ return 0;
+error:
+ return -1;
+}
+
+MemdevList *qmp_query_memdev(Error **errp)
+{
+ Object *obj;
+ MemdevList *list = NULL, *m;
+
+ obj = object_resolve_path("/objects", NULL);
+ if (obj == NULL) {
+ return NULL;
+ }
+
+ if (object_child_foreach(obj, query_memdev, &list) != 0) {
+ goto error;
+ }
+
+ return list;
+
+error:
+ while (list) {
+ m = list;
+ list = list->next;
+ g_free(m->value);
+ g_free(m);
+ }
+ return NULL;
+}
diff --git a/qapi-schema.json b/qapi-schema.json
index dc2abe479e..98350048f6 100644
--- a/qapi-schema.json
+++ b/qapi-schema.json
@@ -2069,6 +2069,22 @@
'*devname': 'str' } }
##
+# @NetdevVhostUserOptions
+#
+# Vhost-user network backend
+#
+# @chardev: name of a unix socket chardev
+#
+# @vhostforce: #optional vhost on for non-MSIX virtio guests (default: false).
+#
+# Since 2.1
+##
+{ 'type': 'NetdevVhostUserOptions',
+ 'data': {
+ 'chardev': 'str',
+ '*vhostforce': 'bool' } }
+
+##
# @NetClientOptions
#
# A discriminated record of network device traits.
@@ -2086,7 +2102,8 @@
'dump': 'NetdevDumpOptions',
'bridge': 'NetdevBridgeOptions',
'hubport': 'NetdevHubPortOptions',
- 'netmap': 'NetdevNetmapOptions' } }
+ 'netmap': 'NetdevNetmapOptions',
+ 'vhost-user': 'NetdevVhostUserOptions' } }
##
# @NetLegacy
@@ -3080,3 +3097,192 @@
'btn' : 'InputBtnEvent',
'rel' : 'InputMoveEvent',
'abs' : 'InputMoveEvent' } }
+
+##
+# @NumaOptions
+#
+# A discriminated record of NUMA options. (for OptsVisitor)
+#
+# Since 2.1
+##
+{ 'union': 'NumaOptions',
+ 'data': {
+ 'node': 'NumaNodeOptions' }}
+
+##
+# @NumaNodeOptions
+#
+# Create a guest NUMA node. (for OptsVisitor)
+#
+# @nodeid: #optional NUMA node ID (increase by 1 from 0 if omitted)
+#
+# @cpus: #optional VCPUs belonging to this node (assign VCPUS round-robin
+# if omitted)
+#
+# @mem: #optional memory size of this node; mutually exclusive with @memdev.
+# Equally divide total memory among nodes if both @mem and @memdev are
+# omitted.
+#
+# @memdev: #optional memory backend object. If specified for one node,
+# it must be specified for all nodes.
+#
+# Since: 2.1
+##
+{ 'type': 'NumaNodeOptions',
+ 'data': {
+ '*nodeid': 'uint16',
+ '*cpus': ['uint16'],
+ '*mem': 'size',
+ '*memdev': 'str' }}
+
+##
+# @HostMemPolicy
+#
+# Host memory policy types
+#
+# @default: restore default policy, remove any nondefault policy
+#
+# @preferred: set the preferred host nodes for allocation
+#
+# @bind: a strict policy that restricts memory allocation to the
+# host nodes specified
+#
+# @interleave: memory allocations are interleaved across the set
+# of host nodes specified
+#
+# Since 2.1
+##
+{ 'enum': 'HostMemPolicy',
+ 'data': [ 'default', 'preferred', 'bind', 'interleave' ] }
+
+##
+# @Memdev:
+#
+# Information of memory device
+#
+# @size: memory device size
+#
+# @merge: enables or disables memory merge support
+#
+# @dump: includes memory device's memory in a core dump or not
+#
+# @prealloc: enables or disables memory preallocation
+#
+# @host-nodes: host nodes for its memory policy
+#
+# @policy: memory policy of memory device
+#
+# Since: 2.1
+##
+
+{ 'type': 'Memdev',
+ 'data': {
+ 'size': 'size',
+ 'merge': 'bool',
+ 'dump': 'bool',
+ 'prealloc': 'bool',
+ 'host-nodes': ['uint16'],
+ 'policy': 'HostMemPolicy' }}
+
+##
+# @query-memdev:
+#
+# Returns information for all memory devices.
+#
+# Returns: a list of @Memdev.
+#
+# Since: 2.1
+##
+{ 'command': 'query-memdev', 'returns': ['Memdev'] }
+# @PCDIMMDeviceInfo:
+#
+# PCDIMMDevice state information
+#
+# @id: #optional device's ID
+#
+# @addr: physical address, where device is mapped
+#
+# @size: size of memory that the device provides
+#
+# @slot: slot number at which device is plugged in
+#
+# @node: NUMA node number where device is plugged in
+#
+# @memdev: memory backend linked with device
+#
+# @hotplugged: true if device was hotplugged
+#
+# @hotpluggable: true if device if could be added/removed while machine is running
+#
+# Since: 2.1
+##
+{ 'type': 'PCDIMMDeviceInfo',
+ 'data': { '*id': 'str',
+ 'addr': 'int',
+ 'size': 'int',
+ 'slot': 'int',
+ 'node': 'int',
+ 'memdev': 'str',
+ 'hotplugged': 'bool',
+ 'hotpluggable': 'bool'
+ }
+}
+
+##
+# @MemoryDeviceInfo:
+#
+# Union containing information about a memory device
+#
+# Since: 2.1
+##
+{ 'union': 'MemoryDeviceInfo', 'data': {'dimm': 'PCDIMMDeviceInfo'} }
+
+##
+# @query-memory-devices
+#
+# Lists available memory devices and their state
+#
+# Since: 2.1
+##
+{ 'command': 'query-memory-devices', 'returns': ['MemoryDeviceInfo'] }
+
+## @ACPISlotType
+#
+# @DIMM: memory slot
+#
+{ 'enum': 'ACPISlotType', 'data': [ 'DIMM' ] }
+
+## @ACPIOSTInfo
+#
+# OSPM Status Indication for a device
+# For description of possible values of @source and @status fields
+# see "_OST (OSPM Status Indication)" chapter of ACPI5.0 spec.
+#
+# @device: #optional device ID associated with slot
+#
+# @slot: slot ID, unique per slot of a given @slot-type
+#
+# @slot-type: type of the slot
+#
+# @source: an integer containing the source event
+#
+# @status: an integer containing the status code
+#
+# Since: 2.1
+##
+{ 'type': 'ACPIOSTInfo',
+ 'data' : { '*device': 'str',
+ 'slot': 'str',
+ 'slot-type': 'ACPISlotType',
+ 'source': 'int',
+ 'status': 'int' } }
+
+##
+# @query-acpi-ospm-status
+#
+# Lists ACPI OSPM status of ACPI device objects,
+# which might be reported via _OST method
+#
+# Since: 2.1
+##
+{ 'command': 'query-acpi-ospm-status', 'returns': ['ACPIOSTInfo'] }
diff --git a/qapi/string-input-visitor.c b/qapi/string-input-visitor.c
index 5780944792..d8a8db02ed 100644
--- a/qapi/string-input-visitor.c
+++ b/qapi/string-input-visitor.c
@@ -15,31 +15,210 @@
#include "qapi/visitor-impl.h"
#include "qapi/qmp/qerror.h"
#include "qemu/option.h"
+#include "qemu/queue.h"
+#include "qemu/range.h"
+
struct StringInputVisitor
{
Visitor visitor;
+
+ bool head;
+
+ GList *ranges;
+ GList *cur_range;
+ int64_t cur;
+
const char *string;
};
+static void free_range(void *range, void *dummy)
+{
+ g_free(range);
+}
+
+static void parse_str(StringInputVisitor *siv, Error **errp)
+{
+ char *str = (char *) siv->string;
+ long long start, end;
+ Range *cur;
+ char *endptr;
+
+ if (siv->ranges) {
+ return;
+ }
+
+ do {
+ errno = 0;
+ start = strtoll(str, &endptr, 0);
+ if (errno == 0 && endptr > str) {
+ if (*endptr == '\0') {
+ cur = g_malloc0(sizeof(*cur));
+ cur->begin = start;
+ cur->end = start + 1;
+ siv->ranges = g_list_insert_sorted_merged(siv->ranges, cur,
+ range_compare);
+ cur = NULL;
+ str = NULL;
+ } else if (*endptr == '-') {
+ str = endptr + 1;
+ errno = 0;
+ end = strtoll(str, &endptr, 0);
+ if (errno == 0 && endptr > str && start <= end &&
+ (start > INT64_MAX - 65536 ||
+ end < start + 65536)) {
+ if (*endptr == '\0') {
+ cur = g_malloc0(sizeof(*cur));
+ cur->begin = start;
+ cur->end = end + 1;
+ siv->ranges =
+ g_list_insert_sorted_merged(siv->ranges,
+ cur,
+ range_compare);
+ cur = NULL;
+ str = NULL;
+ } else if (*endptr == ',') {
+ str = endptr + 1;
+ cur = g_malloc0(sizeof(*cur));
+ cur->begin = start;
+ cur->end = end + 1;
+ siv->ranges =
+ g_list_insert_sorted_merged(siv->ranges,
+ cur,
+ range_compare);
+ cur = NULL;
+ } else {
+ goto error;
+ }
+ } else {
+ goto error;
+ }
+ } else if (*endptr == ',') {
+ str = endptr + 1;
+ cur = g_malloc0(sizeof(*cur));
+ cur->begin = start;
+ cur->end = start + 1;
+ siv->ranges = g_list_insert_sorted_merged(siv->ranges,
+ cur,
+ range_compare);
+ cur = NULL;
+ } else {
+ goto error;
+ }
+ } else {
+ goto error;
+ }
+ } while (str);
+
+ return;
+error:
+ g_list_foreach(siv->ranges, free_range, NULL);
+ g_list_free(siv->ranges);
+ siv->ranges = NULL;
+}
+
+static void
+start_list(Visitor *v, const char *name, Error **errp)
+{
+ StringInputVisitor *siv = DO_UPCAST(StringInputVisitor, visitor, v);
+
+ parse_str(siv, errp);
+
+ siv->cur_range = g_list_first(siv->ranges);
+ if (siv->cur_range) {
+ Range *r = siv->cur_range->data;
+ if (r) {
+ siv->cur = r->begin;
+ }
+ }
+}
+
+static GenericList *
+next_list(Visitor *v, GenericList **list, Error **errp)
+{
+ StringInputVisitor *siv = DO_UPCAST(StringInputVisitor, visitor, v);
+ GenericList **link;
+ Range *r;
+
+ if (!siv->ranges || !siv->cur_range) {
+ return NULL;
+ }
+
+ r = siv->cur_range->data;
+ if (!r) {
+ return NULL;
+ }
+
+ if (siv->cur < r->begin || siv->cur >= r->end) {
+ siv->cur_range = g_list_next(siv->cur_range);
+ if (!siv->cur_range) {
+ return NULL;
+ }
+ r = siv->cur_range->data;
+ if (!r) {
+ return NULL;
+ }
+ siv->cur = r->begin;
+ }
+
+ if (siv->head) {
+ link = list;
+ siv->head = false;
+ } else {
+ link = &(*list)->next;
+ }
+
+ *link = g_malloc0(sizeof **link);
+ return *link;
+}
+
+static void
+end_list(Visitor *v, Error **errp)
+{
+ StringInputVisitor *siv = DO_UPCAST(StringInputVisitor, visitor, v);
+ siv->head = true;
+}
+
static void parse_type_int(Visitor *v, int64_t *obj, const char *name,
Error **errp)
{
StringInputVisitor *siv = DO_UPCAST(StringInputVisitor, visitor, v);
- char *endp = (char *) siv->string;
- long long val;
- errno = 0;
- if (siv->string) {
- val = strtoll(siv->string, &endp, 0);
- }
- if (!siv->string || errno || endp == siv->string || *endp) {
+ if (!siv->string) {
error_set(errp, QERR_INVALID_PARAMETER_TYPE, name ? name : "null",
"integer");
return;
}
- *obj = val;
+ parse_str(siv, errp);
+
+ if (!siv->ranges) {
+ goto error;
+ }
+
+ if (!siv->cur_range) {
+ Range *r;
+
+ siv->cur_range = g_list_first(siv->ranges);
+ if (!siv->cur_range) {
+ goto error;
+ }
+
+ r = siv->cur_range->data;
+ if (!r) {
+ goto error;
+ }
+
+ siv->cur = r->begin;
+ }
+
+ *obj = siv->cur;
+ siv->cur++;
+ return;
+
+error:
+ error_set(errp, QERR_INVALID_PARAMETER_VALUE, name,
+ "an int64 value or range");
}
static void parse_type_size(Visitor *v, uint64_t *obj, const char *name,
@@ -140,6 +319,8 @@ Visitor *string_input_get_visitor(StringInputVisitor *v)
void string_input_visitor_cleanup(StringInputVisitor *v)
{
+ g_list_foreach(v->ranges, free_range, NULL);
+ g_list_free(v->ranges);
g_free(v);
}
@@ -155,8 +336,12 @@ StringInputVisitor *string_input_visitor_new(const char *str)
v->visitor.type_bool = parse_type_bool;
v->visitor.type_str = parse_type_str;
v->visitor.type_number = parse_type_number;
+ v->visitor.start_list = start_list;
+ v->visitor.next_list = next_list;
+ v->visitor.end_list = end_list;
v->visitor.optional = parse_optional;
v->string = str;
+ v->head = true;
return v;
}
diff --git a/qapi/string-output-visitor.c b/qapi/string-output-visitor.c
index fb1d2e806d..e9aca3bfdc 100644
--- a/qapi/string-output-visitor.c
+++ b/qapi/string-output-visitor.c
@@ -16,32 +16,181 @@
#include "qapi/qmp/qerror.h"
#include "qemu/host-utils.h"
#include <math.h>
+#include "qemu/range.h"
+
+enum ListMode {
+ LM_NONE, /* not traversing a list of repeated options */
+ LM_STARTED, /* start_list() succeeded */
+
+ LM_IN_PROGRESS, /* next_list() has been called.
+ *
+ * Generating the next list link will consume the most
+ * recently parsed QemuOpt instance of the repeated
+ * option.
+ *
+ * Parsing a value into the list link will examine the
+ * next QemuOpt instance of the repeated option, and
+ * possibly enter LM_SIGNED_INTERVAL or
+ * LM_UNSIGNED_INTERVAL.
+ */
+
+ LM_SIGNED_INTERVAL, /* next_list() has been called.
+ *
+ * Generating the next list link will consume the most
+ * recently stored element from the signed interval,
+ * parsed from the most recent QemuOpt instance of the
+ * repeated option. This may consume QemuOpt itself
+ * and return to LM_IN_PROGRESS.
+ *
+ * Parsing a value into the list link will store the
+ * next element of the signed interval.
+ */
+
+ LM_UNSIGNED_INTERVAL,/* Same as above, only for an unsigned interval. */
+
+ LM_END
+};
+
+typedef enum ListMode ListMode;
struct StringOutputVisitor
{
Visitor visitor;
bool human;
- char *string;
+ GString *string;
+ bool head;
+ ListMode list_mode;
+ union {
+ int64_t s;
+ uint64_t u;
+ } range_start, range_end;
+ GList *ranges;
};
static void string_output_set(StringOutputVisitor *sov, char *string)
{
- g_free(sov->string);
- sov->string = string;
+ if (sov->string) {
+ g_string_free(sov->string, true);
+ }
+ sov->string = g_string_new(string);
+ g_free(string);
+}
+
+static void string_output_append(StringOutputVisitor *sov, int64_t a)
+{
+ Range *r = g_malloc0(sizeof(*r));
+ r->begin = a;
+ r->end = a + 1;
+ sov->ranges = g_list_insert_sorted_merged(sov->ranges, r, range_compare);
+}
+
+static void string_output_append_range(StringOutputVisitor *sov,
+ int64_t s, int64_t e)
+{
+ Range *r = g_malloc0(sizeof(*r));
+ r->begin = s;
+ r->end = e + 1;
+ sov->ranges = g_list_insert_sorted_merged(sov->ranges, r, range_compare);
+}
+
+static void format_string(StringOutputVisitor *sov, Range *r, bool next,
+ bool human)
+{
+ if (r->end - r->begin > 1) {
+ if (human) {
+ g_string_append_printf(sov->string, "0x%" PRIx64 "-%" PRIx64,
+ r->begin, r->end - 1);
+
+ } else {
+ g_string_append_printf(sov->string, "%" PRId64 "-%" PRId64,
+ r->begin, r->end - 1);
+ }
+ } else {
+ if (human) {
+ g_string_append_printf(sov->string, "0x%" PRIx64, r->begin);
+ } else {
+ g_string_append_printf(sov->string, "%" PRId64, r->begin);
+ }
+ }
+ if (next) {
+ g_string_append(sov->string, ",");
+ }
}
static void print_type_int(Visitor *v, int64_t *obj, const char *name,
Error **errp)
{
StringOutputVisitor *sov = DO_UPCAST(StringOutputVisitor, visitor, v);
- char *out;
+ GList *l;
+
+ switch (sov->list_mode) {
+ case LM_NONE:
+ string_output_append(sov, *obj);
+ break;
+
+ case LM_STARTED:
+ sov->range_start.s = *obj;
+ sov->range_end.s = *obj;
+ sov->list_mode = LM_IN_PROGRESS;
+ return;
+
+ case LM_IN_PROGRESS:
+ if (sov->range_end.s + 1 == *obj) {
+ sov->range_end.s++;
+ } else {
+ if (sov->range_start.s == sov->range_end.s) {
+ string_output_append(sov, sov->range_end.s);
+ } else {
+ assert(sov->range_start.s < sov->range_end.s);
+ string_output_append_range(sov, sov->range_start.s,
+ sov->range_end.s);
+ }
+
+ sov->range_start.s = *obj;
+ sov->range_end.s = *obj;
+ }
+ return;
+
+ case LM_END:
+ if (sov->range_end.s + 1 == *obj) {
+ sov->range_end.s++;
+ assert(sov->range_start.s < sov->range_end.s);
+ string_output_append_range(sov, sov->range_start.s,
+ sov->range_end.s);
+ } else {
+ if (sov->range_start.s == sov->range_end.s) {
+ string_output_append(sov, sov->range_end.s);
+ } else {
+ assert(sov->range_start.s < sov->range_end.s);
+
+ string_output_append_range(sov, sov->range_start.s,
+ sov->range_end.s);
+ }
+ string_output_append(sov, *obj);
+ }
+ break;
+
+ default:
+ abort();
+ }
+
+ l = sov->ranges;
+ while (l) {
+ Range *r = l->data;
+ format_string(sov, r, l->next != NULL, false);
+ l = l->next;
+ }
if (sov->human) {
- out = g_strdup_printf("%lld (%#llx)", (long long) *obj, (long long) *obj);
- } else {
- out = g_strdup_printf("%lld", (long long) *obj);
+ l = sov->ranges;
+ g_string_append(sov->string, " (");
+ while (l) {
+ Range *r = l->data;
+ format_string(sov, r, l->next != NULL, true);
+ l = l->next;
+ }
+ g_string_append(sov->string, ")");
}
- string_output_set(sov, out);
}
static void print_type_size(Visitor *v, uint64_t *obj, const char *name,
@@ -103,9 +252,61 @@ static void print_type_number(Visitor *v, double *obj, const char *name,
string_output_set(sov, g_strdup_printf("%f", *obj));
}
+static void
+start_list(Visitor *v, const char *name, Error **errp)
+{
+ StringOutputVisitor *sov = DO_UPCAST(StringOutputVisitor, visitor, v);
+
+ /* we can't traverse a list in a list */
+ assert(sov->list_mode == LM_NONE);
+ sov->list_mode = LM_STARTED;
+ sov->head = true;
+}
+
+static GenericList *
+next_list(Visitor *v, GenericList **list, Error **errp)
+{
+ StringOutputVisitor *sov = DO_UPCAST(StringOutputVisitor, visitor, v);
+ GenericList *ret = NULL;
+ if (*list) {
+ if (sov->head) {
+ ret = *list;
+ } else {
+ ret = (*list)->next;
+ }
+
+ if (sov->head) {
+ if (ret && ret->next == NULL) {
+ sov->list_mode = LM_NONE;
+ }
+ sov->head = false;
+ } else {
+ if (ret && ret->next == NULL) {
+ sov->list_mode = LM_END;
+ }
+ }
+ }
+
+ return ret;
+}
+
+static void
+end_list(Visitor *v, Error **errp)
+{
+ StringOutputVisitor *sov = DO_UPCAST(StringOutputVisitor, visitor, v);
+
+ assert(sov->list_mode == LM_STARTED ||
+ sov->list_mode == LM_END ||
+ sov->list_mode == LM_NONE ||
+ sov->list_mode == LM_IN_PROGRESS);
+ sov->list_mode = LM_NONE;
+ sov->head = true;
+
+}
+
char *string_output_get_string(StringOutputVisitor *sov)
{
- char *string = sov->string;
+ char *string = g_string_free(sov->string, false);
sov->string = NULL;
return string;
}
@@ -115,9 +316,19 @@ Visitor *string_output_get_visitor(StringOutputVisitor *sov)
return &sov->visitor;
}
+static void free_range(void *range, void *dummy)
+{
+ g_free(range);
+}
+
void string_output_visitor_cleanup(StringOutputVisitor *sov)
{
- g_free(sov->string);
+ if (sov->string) {
+ g_string_free(sov->string, true);
+ }
+
+ g_list_foreach(sov->ranges, free_range, NULL);
+ g_list_free(sov->ranges);
g_free(sov);
}
@@ -127,6 +338,7 @@ StringOutputVisitor *string_output_visitor_new(bool human)
v = g_malloc0(sizeof(*v));
+ v->string = g_string_new(NULL);
v->human = human;
v->visitor.type_enum = output_type_enum;
v->visitor.type_int = print_type_int;
@@ -134,6 +346,9 @@ StringOutputVisitor *string_output_visitor_new(bool human)
v->visitor.type_bool = print_type_bool;
v->visitor.type_str = print_type_str;
v->visitor.type_number = print_type_number;
+ v->visitor.start_list = start_list;
+ v->visitor.next_list = next_list;
+ v->visitor.end_list = end_list;
return v;
}
diff --git a/qemu-char.c b/qemu-char.c
index f918f90972..b3bd3b5af4 100644
--- a/qemu-char.c
+++ b/qemu-char.c
@@ -84,6 +84,7 @@
#include "ui/qemu-spice.h"
#define READ_BUF_LEN 4096
+#define READ_RETRIES 10
/***********************************************************/
/* character device */
@@ -145,6 +146,41 @@ int qemu_chr_fe_write_all(CharDriverState *s, const uint8_t *buf, int len)
return offset;
}
+int qemu_chr_fe_read_all(CharDriverState *s, uint8_t *buf, int len)
+{
+ int offset = 0, counter = 10;
+ int res;
+
+ if (!s->chr_sync_read) {
+ return 0;
+ }
+
+ while (offset < len) {
+ do {
+ res = s->chr_sync_read(s, buf + offset, len - offset);
+ if (res == -1 && errno == EAGAIN) {
+ g_usleep(100);
+ }
+ } while (res == -1 && errno == EAGAIN);
+
+ if (res == 0) {
+ break;
+ }
+
+ if (res < 0) {
+ return res;
+ }
+
+ offset += res;
+
+ if (!counter--) {
+ break;
+ }
+ }
+
+ return offset;
+}
+
int qemu_chr_fe_ioctl(CharDriverState *s, int cmd, void *arg)
{
if (!s->chr_ioctl)
@@ -168,7 +204,18 @@ void qemu_chr_be_write(CharDriverState *s, uint8_t *buf, int len)
int qemu_chr_fe_get_msgfd(CharDriverState *s)
{
- return s->get_msgfd ? s->get_msgfd(s) : -1;
+ int fd;
+ return (qemu_chr_fe_get_msgfds(s, &fd, 1) >= 0) ? fd : -1;
+}
+
+int qemu_chr_fe_get_msgfds(CharDriverState *s, int *fds, int len)
+{
+ return s->get_msgfds ? s->get_msgfds(s, fds, len) : -1;
+}
+
+int qemu_chr_fe_set_msgfds(CharDriverState *s, int *fds, int num)
+{
+ return s->set_msgfds ? s->set_msgfds(s, fds, num) : -1;
}
int qemu_chr_add_client(CharDriverState *s, int fd)
@@ -2296,16 +2343,73 @@ typedef struct {
int do_telnetopt;
int do_nodelay;
int is_unix;
- int msgfd;
+ int *read_msgfds;
+ int read_msgfds_num;
+ int *write_msgfds;
+ int write_msgfds_num;
} TCPCharDriver;
static gboolean tcp_chr_accept(GIOChannel *chan, GIOCondition cond, void *opaque);
+#ifndef _WIN32
+static int unix_send_msgfds(CharDriverState *chr, const uint8_t *buf, int len)
+{
+ TCPCharDriver *s = chr->opaque;
+ struct msghdr msgh;
+ struct iovec iov;
+ int r;
+
+ size_t fd_size = s->write_msgfds_num * sizeof(int);
+ char control[CMSG_SPACE(fd_size)];
+ struct cmsghdr *cmsg;
+
+ memset(&msgh, 0, sizeof(msgh));
+ memset(control, 0, sizeof(control));
+
+ /* set the payload */
+ iov.iov_base = (uint8_t *) buf;
+ iov.iov_len = len;
+
+ msgh.msg_iov = &iov;
+ msgh.msg_iovlen = 1;
+
+ msgh.msg_control = control;
+ msgh.msg_controllen = sizeof(control);
+
+ cmsg = CMSG_FIRSTHDR(&msgh);
+
+ cmsg->cmsg_len = CMSG_LEN(fd_size);
+ cmsg->cmsg_level = SOL_SOCKET;
+ cmsg->cmsg_type = SCM_RIGHTS;
+ memcpy(CMSG_DATA(cmsg), s->write_msgfds, fd_size);
+
+ do {
+ r = sendmsg(s->fd, &msgh, 0);
+ } while (r < 0 && errno == EINTR);
+
+ /* free the written msgfds, no matter what */
+ if (s->write_msgfds_num) {
+ g_free(s->write_msgfds);
+ s->write_msgfds = 0;
+ s->write_msgfds_num = 0;
+ }
+
+ return r;
+}
+#endif
+
static int tcp_chr_write(CharDriverState *chr, const uint8_t *buf, int len)
{
TCPCharDriver *s = chr->opaque;
if (s->connected) {
- return io_channel_send(s->chan, buf, len);
+#ifndef _WIN32
+ if (s->is_unix && s->write_msgfds_num) {
+ return unix_send_msgfds(chr, buf, len);
+ } else
+#endif
+ {
+ return io_channel_send(s->chan, buf, len);
+ }
} else {
/* XXX: indicate an error ? */
return len;
@@ -2372,12 +2476,39 @@ static void tcp_chr_process_IAC_bytes(CharDriverState *chr,
*size = j;
}
-static int tcp_get_msgfd(CharDriverState *chr)
+static int tcp_get_msgfds(CharDriverState *chr, int *fds, int num)
{
TCPCharDriver *s = chr->opaque;
- int fd = s->msgfd;
- s->msgfd = -1;
- return fd;
+ int to_copy = (s->read_msgfds_num < num) ? s->read_msgfds_num : num;
+
+ if (to_copy) {
+ memcpy(fds, s->read_msgfds, to_copy * sizeof(int));
+
+ g_free(s->read_msgfds);
+ s->read_msgfds = 0;
+ s->read_msgfds_num = 0;
+ }
+
+ return to_copy;
+}
+
+static int tcp_set_msgfds(CharDriverState *chr, int *fds, int num)
+{
+ TCPCharDriver *s = chr->opaque;
+
+ /* clear old pending fd array */
+ if (s->write_msgfds) {
+ g_free(s->write_msgfds);
+ }
+
+ if (num) {
+ s->write_msgfds = g_malloc(num * sizeof(int));
+ memcpy(s->write_msgfds, fds, num * sizeof(int));
+ }
+
+ s->write_msgfds_num = num;
+
+ return 0;
}
#ifndef _WIN32
@@ -2387,26 +2518,46 @@ static void unix_process_msgfd(CharDriverState *chr, struct msghdr *msg)
struct cmsghdr *cmsg;
for (cmsg = CMSG_FIRSTHDR(msg); cmsg; cmsg = CMSG_NXTHDR(msg, cmsg)) {
- int fd;
+ int fd_size, i;
- if (cmsg->cmsg_len != CMSG_LEN(sizeof(int)) ||
+ if (cmsg->cmsg_len < CMSG_LEN(sizeof(int)) ||
cmsg->cmsg_level != SOL_SOCKET ||
- cmsg->cmsg_type != SCM_RIGHTS)
+ cmsg->cmsg_type != SCM_RIGHTS) {
continue;
+ }
+
+ fd_size = cmsg->cmsg_len - CMSG_LEN(0);
- fd = *((int *)CMSG_DATA(cmsg));
- if (fd < 0)
+ if (!fd_size) {
continue;
+ }
+
+ /* close and clean read_msgfds */
+ for (i = 0; i < s->read_msgfds_num; i++) {
+ close(s->read_msgfds[i]);
+ }
+
+ if (s->read_msgfds_num) {
+ g_free(s->read_msgfds);
+ }
- /* O_NONBLOCK is preserved across SCM_RIGHTS so reset it */
- qemu_set_block(fd);
+ s->read_msgfds_num = fd_size / sizeof(int);
+ s->read_msgfds = g_malloc(fd_size);
+ memcpy(s->read_msgfds, CMSG_DATA(cmsg), fd_size);
-#ifndef MSG_CMSG_CLOEXEC
- qemu_set_cloexec(fd);
-#endif
- if (s->msgfd != -1)
- close(s->msgfd);
- s->msgfd = fd;
+ for (i = 0; i < s->read_msgfds_num; i++) {
+ int fd = s->read_msgfds[i];
+ if (fd < 0) {
+ continue;
+ }
+
+ /* O_NONBLOCK is preserved across SCM_RIGHTS so reset it */
+ qemu_set_block(fd);
+
+ #ifndef MSG_CMSG_CLOEXEC
+ qemu_set_cloexec(fd);
+ #endif
+ }
}
}
@@ -2454,6 +2605,23 @@ static GSource *tcp_chr_add_watch(CharDriverState *chr, GIOCondition cond)
return g_io_create_watch(s->chan, cond);
}
+static void tcp_chr_disconnect(CharDriverState *chr)
+{
+ TCPCharDriver *s = chr->opaque;
+
+ s->connected = 0;
+ if (s->listen_chan) {
+ s->listen_tag = g_io_add_watch(s->listen_chan, G_IO_IN,
+ tcp_chr_accept, chr);
+ }
+ remove_fd_in_watch(chr);
+ g_io_channel_unref(s->chan);
+ s->chan = NULL;
+ closesocket(s->fd);
+ s->fd = -1;
+ qemu_chr_be_event(chr, CHR_EVENT_CLOSED);
+}
+
static gboolean tcp_chr_read(GIOChannel *chan, GIOCondition cond, void *opaque)
{
CharDriverState *chr = opaque;
@@ -2470,16 +2638,7 @@ static gboolean tcp_chr_read(GIOChannel *chan, GIOCondition cond, void *opaque)
size = tcp_chr_recv(chr, (void *)buf, len);
if (size == 0) {
/* connection closed */
- s->connected = 0;
- if (s->listen_chan) {
- s->listen_tag = g_io_add_watch(s->listen_chan, G_IO_IN, tcp_chr_accept, chr);
- }
- remove_fd_in_watch(chr);
- g_io_channel_unref(s->chan);
- s->chan = NULL;
- closesocket(s->fd);
- s->fd = -1;
- qemu_chr_be_event(chr, CHR_EVENT_CLOSED);
+ tcp_chr_disconnect(chr);
} else if (size > 0) {
if (s->do_telnetopt)
tcp_chr_process_IAC_bytes(chr, s, buf, &size);
@@ -2490,6 +2649,24 @@ static gboolean tcp_chr_read(GIOChannel *chan, GIOCondition cond, void *opaque)
return TRUE;
}
+static int tcp_chr_sync_read(CharDriverState *chr, const uint8_t *buf, int len)
+{
+ TCPCharDriver *s = chr->opaque;
+ int size;
+
+ if (!s->connected) {
+ return 0;
+ }
+
+ size = tcp_chr_recv(chr, (void *) buf, len);
+ if (size == 0) {
+ /* connection closed */
+ tcp_chr_disconnect(chr);
+ }
+
+ return size;
+}
+
#ifndef _WIN32
CharDriverState *qemu_chr_open_eventfd(int eventfd)
{
@@ -2503,6 +2680,25 @@ CharDriverState *qemu_chr_open_eventfd(int eventfd)
}
#endif
+static gboolean tcp_chr_chan_close(GIOChannel *channel, GIOCondition cond,
+ void *opaque)
+{
+ CharDriverState *chr = opaque;
+
+ if (cond != G_IO_HUP) {
+ return FALSE;
+ }
+
+ /* connection closed */
+ tcp_chr_disconnect(chr);
+ if (chr->fd_hup_tag) {
+ g_source_remove(chr->fd_hup_tag);
+ chr->fd_hup_tag = 0;
+ }
+
+ return TRUE;
+}
+
static void tcp_chr_connect(void *opaque)
{
CharDriverState *chr = opaque;
@@ -2512,6 +2708,8 @@ static void tcp_chr_connect(void *opaque)
if (s->chan) {
chr->fd_in_tag = io_add_watch_poll(s->chan, tcp_chr_read_poll,
tcp_chr_read, chr);
+ chr->fd_hup_tag = g_io_add_watch(s->chan, G_IO_HUP, tcp_chr_chan_close,
+ chr);
}
qemu_chr_be_generic_open(chr);
}
@@ -2604,6 +2802,7 @@ static gboolean tcp_chr_accept(GIOChannel *channel, GIOCondition cond, void *opa
static void tcp_chr_close(CharDriverState *chr)
{
TCPCharDriver *s = chr->opaque;
+ int i;
if (s->fd >= 0) {
remove_fd_in_watch(chr);
if (s->chan) {
@@ -2621,6 +2820,15 @@ static void tcp_chr_close(CharDriverState *chr)
}
closesocket(s->listen_fd);
}
+ if (s->read_msgfds_num) {
+ for (i = 0; i < s->read_msgfds_num; i++) {
+ close(s->read_msgfds[i]);
+ }
+ g_free(s->read_msgfds);
+ }
+ if (s->write_msgfds_num) {
+ g_free(s->write_msgfds);
+ }
g_free(s);
qemu_chr_be_event(chr, CHR_EVENT_CLOSED);
}
@@ -2649,7 +2857,10 @@ static CharDriverState *qemu_chr_open_socket_fd(int fd, bool do_nodelay,
s->connected = 0;
s->fd = -1;
s->listen_fd = -1;
- s->msgfd = -1;
+ s->read_msgfds = 0;
+ s->read_msgfds_num = 0;
+ s->write_msgfds = 0;
+ s->write_msgfds_num = 0;
chr->filename = g_malloc(256);
switch (ss.ss_family) {
@@ -2678,8 +2889,10 @@ static CharDriverState *qemu_chr_open_socket_fd(int fd, bool do_nodelay,
chr->opaque = s;
chr->chr_write = tcp_chr_write;
+ chr->chr_sync_read = tcp_chr_sync_read;
chr->chr_close = tcp_chr_close;
- chr->get_msgfd = tcp_get_msgfd;
+ chr->get_msgfds = tcp_get_msgfds;
+ chr->set_msgfds = tcp_set_msgfds;
chr->chr_add_client = tcp_chr_add_client;
chr->chr_add_watch = tcp_chr_add_watch;
chr->chr_update_read_handler = tcp_chr_update_read_handler;
diff --git a/qemu-options.hx b/qemu-options.hx
index d0714c43a6..ca75760b27 100644
--- a/qemu-options.hx
+++ b/qemu-options.hx
@@ -95,12 +95,22 @@ specifies the maximum number of hotpluggable CPUs.
ETEXI
DEF("numa", HAS_ARG, QEMU_OPTION_numa,
- "-numa node[,mem=size][,cpus=cpu[-cpu]][,nodeid=node]\n", QEMU_ARCH_ALL)
+ "-numa node[,mem=size][,cpus=cpu[-cpu]][,nodeid=node]\n"
+ "-numa node[,memdev=id][,cpus=cpu[-cpu]][,nodeid=node]\n", QEMU_ARCH_ALL)
STEXI
-@item -numa @var{opts}
+@item -numa node[,mem=@var{size}][,cpus=@var{cpu[-cpu]}][,nodeid=@var{node}]
+@item -numa node[,memdev=@var{id}][,cpus=@var{cpu[-cpu]}][,nodeid=@var{node}]
@findex -numa
-Simulate a multi node NUMA system. If mem and cpus are omitted, resources
-are split equally.
+Simulate a multi node NUMA system. If @samp{mem}, @samp{memdev}
+and @samp{cpus} are omitted, resources are split equally. Also, note
+that the -@option{numa} option doesn't allocate any of the specified
+resources. That is, it just assigns existing resources to NUMA nodes. This
+means that one still has to use the @option{-m}, @option{-smp} options
+to allocate RAM and VCPUs respectively, and possibly @option{-object}
+to specify the memory backend for the @samp{memdev} suboption.
+
+@samp{mem} and @samp{memdev} are mutually exclusive. Furthermore, if one
+node uses @samp{memdev}, all of them have to use it.
ETEXI
DEF("add-fd", HAS_ARG, QEMU_OPTION_add_fd,
@@ -210,17 +220,20 @@ use is discouraged as it may be removed from future versions.
ETEXI
DEF("m", HAS_ARG, QEMU_OPTION_m,
- "-m [size=]megs\n"
+ "-m[emory] [size=]megs[,slots=n,maxmem=size]\n"
" configure guest RAM\n"
" size: initial amount of guest memory (default: "
- stringify(DEFAULT_RAM_SIZE) "MiB)\n",
+ stringify(DEFAULT_RAM_SIZE) "MiB)\n"
+ " slots: number of hotplug slots (default: none)\n"
+ " maxmem: maximum amount of guest memory (default: none)\n",
QEMU_ARCH_ALL)
STEXI
@item -m [size=]@var{megs}
@findex -m
Set virtual RAM size to @var{megs} megabytes. Default is 128 MiB. Optionally,
a suffix of ``M'' or ``G'' can be used to signify a value in megabytes or
-gigabytes respectively.
+gigabytes respectively. Optional pair @var{slots}, @var{maxmem} could be used
+to set amount of hotluggable memory slots and possible maximum amount of memory.
ETEXI
DEF("mem-path", HAS_ARG, QEMU_OPTION_mempath,
@@ -1457,6 +1470,7 @@ DEF("netdev", HAS_ARG, QEMU_OPTION_netdev,
#ifdef CONFIG_NETMAP
"netmap|"
#endif
+ "vhost-user|"
"socket|"
"hubport],id=str[,option][,option][,...]\n", QEMU_ARCH_ALL)
STEXI
@@ -1788,6 +1802,23 @@ The hubport netdev lets you connect a NIC to a QEMU "vlan" instead of a single
netdev. @code{-net} and @code{-device} with parameter @option{vlan} create the
required hub automatically.
+@item -netdev vhost-user,chardev=@var{id}[,vhostforce=on|off]
+
+Establish a vhost-user netdev, backed by a chardev @var{id}. The chardev should
+be a unix domain socket backed one. The vhost-user uses a specifically defined
+protocol to pass vhost ioctl replacement messages to an application on the other
+end of the socket. On non-MSIX guests, the feature can be forced with
+@var{vhostforce}.
+
+Example:
+@example
+qemu -m 512 -object memory-backend-file,id=mem,size=512M,mem-path=/hugetlbfs,share=on \
+ -numa node,memdev=mem \
+ -chardev socket,path=/path/to/socket \
+ -netdev type=vhost-user,id=net0,chardev=chr0 \
+ -device virtio-net-pci,netdev=net0
+@end example
+
@item -net dump[,vlan=@var{n}][,file=@var{file}][,len=@var{len}]
Dump network traffic on VLAN @var{n} to file @var{file} (@file{qemu-vlan0.pcap} by default).
At most @var{len} bytes (64k by default) per packet are stored. The file format is
diff --git a/qmp-commands.hx b/qmp-commands.hx
index d6bb0f483f..e4a1c80434 100644
--- a/qmp-commands.hx
+++ b/qmp-commands.hx
@@ -3571,6 +3571,93 @@ Example:
"format":"qcow2",
"virtual-size":2048000
}
- } } ] }
+ } } ] }
EQMP
+
+ {
+ .name = "query-memdev",
+ .args_type = "",
+ .mhandler.cmd_new = qmp_marshal_input_query_memdev,
+ },
+
+SQMP
+query-memdev
+------------
+
+Show memory devices information.
+
+
+Example (1):
+
+-> { "execute": "query-memdev" }
+<- { "return": [
+ {
+ "size": 536870912,
+ "merge": false,
+ "dump": true,
+ "prealloc": false,
+ "host-nodes": [0, 1],
+ "policy": "bind"
+ },
+ {
+ "size": 536870912,
+ "merge": false,
+ "dump": true,
+ "prealloc": true,
+ "host-nodes": [2, 3],
+ "policy": "preferred"
+ }
+ ]
+ }
+
+EQMP
+
+ {
+ .name = "query-memory-devices",
+ .args_type = "",
+ .mhandler.cmd_new = qmp_marshal_input_query_memory_devices,
+ },
+
+SQMP
+@query-memory-devices
+--------------------
+
+Return a list of memory devices.
+
+Example:
+-> { "execute": "query-memory-devices" }
+<- { "return": [ { "data":
+ { "addr": 5368709120,
+ "hotpluggable": true,
+ "hotplugged": true,
+ "id": "d1",
+ "memdev": "/objects/memX",
+ "node": 0,
+ "size": 1073741824,
+ "slot": 0},
+ "type": "dimm"
+ } ] }
+EQMP
+
+ {
+ .name = "query-acpi-ospm-status",
+ .args_type = "",
+ .mhandler.cmd_new = qmp_marshal_input_query_acpi_ospm_status,
+ },
+
+SQMP
+@query-acpi-ospm-status
+--------------------
+
+Return list of ACPIOSTInfo for devices that support status reporting
+via ACPI _OST method.
+
+Example:
+-> { "execute": "query-acpi-ospm-status" }
+<- { "return": [ { "device": "d1", "slot": "0", "slot-type": "DIMM", "source": 1, "status": 0},
+ { "slot": "1", "slot-type": "DIMM", "source": 0, "status": 0},
+ { "slot": "2", "slot-type": "DIMM", "source": 0, "status": 0},
+ { "slot": "3", "slot-type": "DIMM", "source": 0, "status": 0}
+ ]}
+EQMP
diff --git a/qmp.c b/qmp.c
index a7f432b37e..dca6efb7b8 100644
--- a/qmp.c
+++ b/qmp.c
@@ -28,6 +28,8 @@
#include "qapi/qmp-input-visitor.h"
#include "hw/boards.h"
#include "qom/object_interfaces.h"
+#include "hw/mem/pc-dimm.h"
+#include "hw/acpi/acpi_dev_interface.h"
NameInfo *qmp_query_name(Error **errp)
{
@@ -540,7 +542,7 @@ void object_add(const char *type, const char *id, const QDict *qdict,
klass = object_class_by_name(type);
if (!klass) {
- error_setg(errp, "invalid class name");
+ error_setg(errp, "invalid object type: %s", type);
return;
}
@@ -565,13 +567,18 @@ void object_add(const char *type, const char *id, const QDict *qdict,
}
}
- user_creatable_complete(obj, &local_err);
+ object_property_add_child(container_get(object_get_root(), "/objects"),
+ id, obj, &local_err);
if (local_err) {
goto out;
}
- object_property_add_child(container_get(object_get_root(), "/objects"),
- id, obj, &local_err);
+ user_creatable_complete(obj, &local_err);
+ if (local_err) {
+ object_property_del(container_get(object_get_root(), "/objects"),
+ id, &error_abort);
+ goto out;
+ }
out:
if (local_err) {
error_propagate(errp, local_err);
@@ -623,3 +630,32 @@ void qmp_object_del(const char *id, Error **errp)
}
object_unparent(obj);
}
+
+MemoryDeviceInfoList *qmp_query_memory_devices(Error **errp)
+{
+ MemoryDeviceInfoList *head = NULL;
+ MemoryDeviceInfoList **prev = &head;
+
+ qmp_pc_dimm_device_list(qdev_get_machine(), &prev);
+
+ return head;
+}
+
+ACPIOSTInfoList *qmp_query_acpi_ospm_status(Error **errp)
+{
+ bool ambig;
+ ACPIOSTInfoList *head = NULL;
+ ACPIOSTInfoList **prev = &head;
+ Object *obj = object_resolve_path_type("", TYPE_ACPI_DEVICE_IF, &ambig);
+
+ if (obj) {
+ AcpiDeviceIfClass *adevc = ACPI_DEVICE_IF_GET_CLASS(obj);
+ AcpiDeviceIf *adev = ACPI_DEVICE_IF(obj);
+
+ adevc->ospm_status(adev, &prev);
+ } else {
+ error_setg(errp, "command is not supported, missing ACPI device");
+ }
+
+ return head;
+}
diff --git a/qom/object.c b/qom/object.c
index e42b254303..3876618c2e 100644
--- a/qom/object.c
+++ b/qom/object.c
@@ -13,6 +13,7 @@
#include "qom/object.h"
#include "qemu-common.h"
#include "qapi/visitor.h"
+#include "qapi-visit.h"
#include "qapi/string-input-visitor.h"
#include "qapi/string-output-visitor.h"
#include "qapi/qmp/qerror.h"
@@ -938,6 +939,40 @@ int64_t object_property_get_int(Object *obj, const char *name,
return retval;
}
+int object_property_get_enum(Object *obj, const char *name,
+ const char *strings[], Error **errp)
+{
+ StringOutputVisitor *sov;
+ StringInputVisitor *siv;
+ int ret;
+
+ sov = string_output_visitor_new(false);
+ object_property_get(obj, string_output_get_visitor(sov), name, errp);
+ siv = string_input_visitor_new(string_output_get_string(sov));
+ string_output_visitor_cleanup(sov);
+ visit_type_enum(string_input_get_visitor(siv),
+ &ret, strings, NULL, name, errp);
+ string_input_visitor_cleanup(siv);
+
+ return ret;
+}
+
+void object_property_get_uint16List(Object *obj, const char *name,
+ uint16List **list, Error **errp)
+{
+ StringOutputVisitor *ov;
+ StringInputVisitor *iv;
+
+ ov = string_output_visitor_new(false);
+ object_property_get(obj, string_output_get_visitor(ov),
+ name, errp);
+ iv = string_input_visitor_new(string_output_get_string(ov));
+ visit_type_uint16List(string_input_get_visitor(iv),
+ list, NULL, errp);
+ string_output_visitor_cleanup(ov);
+ string_input_visitor_cleanup(iv);
+}
+
void object_property_parse(Object *obj, const char *string,
const char *name, Error **errp)
{
diff --git a/savevm.c b/savevm.c
index 7b2c410ea1..6cbdaacacd 100644
--- a/savevm.c
+++ b/savevm.c
@@ -42,7 +42,6 @@
#include "block/snapshot.h"
#include "block/qapi.h"
-#define SELF_ANNOUNCE_ROUNDS 5
#ifndef ETH_P_RARP
#define ETH_P_RARP 0x8035
@@ -98,7 +97,7 @@ static void qemu_announce_self_once(void *opaque)
if (--count) {
/* delay 50ms, 150ms, 250ms, ... */
timer_mod(timer, qemu_clock_get_ms(QEMU_CLOCK_REALTIME) +
- 50 + (SELF_ANNOUNCE_ROUNDS - count - 1) * 100);
+ self_announce_delay(count));
} else {
timer_del(timer);
timer_free(timer);
diff --git a/stubs/Makefile.objs b/stubs/Makefile.objs
index d99e2b9259..997d68d5b9 100644
--- a/stubs/Makefile.objs
+++ b/stubs/Makefile.objs
@@ -1,4 +1,6 @@
stub-obj-y += arch-query-cpu-def.o
+stub-obj-y += bdrv-commit-all.o
+stub-obj-y += chr-msmouse.o
stub-obj-y += clock-warp.o
stub-obj-y += cpu-get-clock.o
stub-obj-y += cpu-get-icount.o
@@ -9,13 +11,18 @@ stub-obj-y += fdset-get-fd.o
stub-obj-y += fdset-remove-fd.o
stub-obj-y += gdbstub.o
stub-obj-y += get-fd.o
+stub-obj-y += get-next-serial.o
stub-obj-y += get-vm-name.o
stub-obj-y += iothread-lock.o
+stub-obj-y += is-daemonized.o
+stub-obj-y += machine-init-done.o
stub-obj-y += migr-blocker.o
stub-obj-y += mon-is-qmp.o
stub-obj-y += mon-printf.o
stub-obj-y += mon-protocol-event.o
stub-obj-y += mon-set-error.o
+stub-obj-y += monitor-init.o
+stub-obj-y += notify-event.o
stub-obj-y += pci-drive-hot-add.o
stub-obj-y += qtest.o
stub-obj-y += reset.o
@@ -24,8 +31,10 @@ stub-obj-y += set-fd-handler.o
stub-obj-y += slirp.o
stub-obj-y += sysbus.o
stub-obj-y += uuid.o
+stub-obj-y += vc-init.o
stub-obj-y += vm-stop.o
stub-obj-y += vmstate.o
stub-obj-$(CONFIG_WIN32) += fd-register.o
stub-obj-y += cpus.o
stub-obj-y += kvm.o
+stub-obj-y += qmp_pc_dimm_device_list.o
diff --git a/stubs/bdrv-commit-all.c b/stubs/bdrv-commit-all.c
new file mode 100644
index 0000000000..a8e0a95417
--- /dev/null
+++ b/stubs/bdrv-commit-all.c
@@ -0,0 +1,7 @@
+#include "qemu-common.h"
+#include "block/block.h"
+
+int bdrv_commit_all(void)
+{
+ return 0;
+}
diff --git a/stubs/chr-msmouse.c b/stubs/chr-msmouse.c
new file mode 100644
index 0000000000..812f8b0abe
--- /dev/null
+++ b/stubs/chr-msmouse.c
@@ -0,0 +1,7 @@
+#include "qemu-common.h"
+#include "sysemu/char.h"
+
+CharDriverState *qemu_chr_open_msmouse(void)
+{
+ return 0;
+}
diff --git a/stubs/get-next-serial.c b/stubs/get-next-serial.c
new file mode 100644
index 0000000000..40c56d13d7
--- /dev/null
+++ b/stubs/get-next-serial.c
@@ -0,0 +1,3 @@
+#include "qemu-common.h"
+
+CharDriverState *serial_hds[0];
diff --git a/stubs/is-daemonized.c b/stubs/is-daemonized.c
new file mode 100644
index 0000000000..c0ee9171a7
--- /dev/null
+++ b/stubs/is-daemonized.c
@@ -0,0 +1,9 @@
+#include "qemu-common.h"
+
+/* Win32 has its own inline stub */
+#ifndef _WIN32
+bool is_daemonized(void)
+{
+ return false;
+}
+#endif
diff --git a/stubs/machine-init-done.c b/stubs/machine-init-done.c
new file mode 100644
index 0000000000..28a92555b6
--- /dev/null
+++ b/stubs/machine-init-done.c
@@ -0,0 +1,6 @@
+#include "qemu-common.h"
+#include "sysemu/sysemu.h"
+
+void qemu_add_machine_init_done_notifier(Notifier *notify)
+{
+}
diff --git a/stubs/monitor-init.c b/stubs/monitor-init.c
new file mode 100644
index 0000000000..563902b412
--- /dev/null
+++ b/stubs/monitor-init.c
@@ -0,0 +1,6 @@
+#include "qemu-common.h"
+#include "monitor/monitor.h"
+
+void monitor_init(CharDriverState *chr, int flags)
+{
+}
diff --git a/stubs/notify-event.c b/stubs/notify-event.c
new file mode 100644
index 0000000000..32f7289d3a
--- /dev/null
+++ b/stubs/notify-event.c
@@ -0,0 +1,6 @@
+#include "qemu-common.h"
+#include "qemu/main-loop.h"
+
+void qemu_notify_event(void)
+{
+}
diff --git a/stubs/qmp_pc_dimm_device_list.c b/stubs/qmp_pc_dimm_device_list.c
new file mode 100644
index 0000000000..5cb220c66c
--- /dev/null
+++ b/stubs/qmp_pc_dimm_device_list.c
@@ -0,0 +1,7 @@
+#include "qom/object.h"
+#include "hw/mem/pc-dimm.h"
+
+int qmp_pc_dimm_device_list(Object *obj, void *opaque)
+{
+ return 0;
+}
diff --git a/stubs/vc-init.c b/stubs/vc-init.c
new file mode 100644
index 0000000000..2af054fe6b
--- /dev/null
+++ b/stubs/vc-init.c
@@ -0,0 +1,7 @@
+#include "qemu-common.h"
+#include "ui/console.h"
+
+CharDriverState *vc_init(ChardevVC *vc)
+{
+ return 0;
+}
diff --git a/tests/Makefile b/tests/Makefile
index 361bb7b6e3..4caf7deb89 100644
--- a/tests/Makefile
+++ b/tests/Makefile
@@ -156,6 +156,7 @@ gcov-files-i386-y += hw/usb/hcd-ehci.c
gcov-files-i386-y += hw/usb/hcd-uhci.c
gcov-files-i386-y += hw/usb/dev-hid.c
gcov-files-i386-y += hw/usb/dev-storage.c
+#check-qtest-i386-y += tests/vhost-user-test$(EXESUF)
check-qtest-x86_64-y = $(check-qtest-i386-y)
gcov-files-i386-y += i386-softmmu/hw/timer/mc146818rtc.c
gcov-files-x86_64-y = $(subst i386-softmmu/,x86_64-softmmu/,$(gcov-files-i386-y))
@@ -322,9 +323,12 @@ tests/es1370-test$(EXESUF): tests/es1370-test.o
tests/intel-hda-test$(EXESUF): tests/intel-hda-test.o
tests/ioh3420-test$(EXESUF): tests/ioh3420-test.o
tests/usb-hcd-ehci-test$(EXESUF): tests/usb-hcd-ehci-test.o $(libqos-pc-obj-y)
+tests/vhost-user-test$(EXESUF): tests/vhost-user-test.o qemu-char.o qemu-timer.o libqemuutil.a libqemustub.a
tests/qemu-iotests/socket_scm_helper$(EXESUF): tests/qemu-iotests/socket_scm_helper.o
tests/test-qemu-opts$(EXESUF): tests/test-qemu-opts.o libqemuutil.a libqemustub.a
+#LIBS+= -lutil
+
# QTest rules
TARGETS=$(patsubst %-softmmu,%, $(filter %-softmmu,$(TARGET_DIRS)))
diff --git a/tests/acpi-test-data/pc/DSDT b/tests/acpi-test-data/pc/DSDT
index d0bb3de79d..7ed03fd37e 100644
--- a/tests/acpi-test-data/pc/DSDT
+++ b/tests/acpi-test-data/pc/DSDT
Binary files differ
diff --git a/tests/acpi-test-data/pc/SSDT b/tests/acpi-test-data/pc/SSDT
index c987fb2379..eb2d8b698c 100644
--- a/tests/acpi-test-data/pc/SSDT
+++ b/tests/acpi-test-data/pc/SSDT
Binary files differ
diff --git a/tests/acpi-test-data/q35/DSDT b/tests/acpi-test-data/q35/DSDT
index fc5b970009..2d2bc4adaf 100644
--- a/tests/acpi-test-data/q35/DSDT
+++ b/tests/acpi-test-data/q35/DSDT
Binary files differ
diff --git a/tests/acpi-test-data/q35/SSDT b/tests/acpi-test-data/q35/SSDT
index 9199638757..778b79bf42 100644
--- a/tests/acpi-test-data/q35/SSDT
+++ b/tests/acpi-test-data/q35/SSDT
Binary files differ
diff --git a/tests/test-string-input-visitor.c b/tests/test-string-input-visitor.c
index 877e737714..8e3433e0c7 100644
--- a/tests/test-string-input-visitor.c
+++ b/tests/test-string-input-visitor.c
@@ -64,6 +64,33 @@ static void test_visitor_in_int(TestInputVisitorData *data,
g_assert_cmpint(res, ==, value);
}
+static void test_visitor_in_intList(TestInputVisitorData *data,
+ const void *unused)
+{
+ int64_t value[] = {0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 20};
+ int16List *res = NULL, *tmp;
+ Visitor *v;
+ int i = 0;
+
+ v = visitor_input_test_init(data, "1,2,0,2-4,20,5-9,1-8");
+
+ visit_type_int16List(v, &res, NULL, &error_abort);
+ tmp = res;
+ while (i < sizeof(value) / sizeof(value[0])) {
+ g_assert(tmp);
+ g_assert_cmpint(tmp->value, ==, value[i++]);
+ tmp = tmp->next;
+ }
+ g_assert(!tmp);
+
+ tmp = res;
+ while (tmp) {
+ res = res->next;
+ g_free(tmp);
+ tmp = res;
+ }
+}
+
static void test_visitor_in_bool(TestInputVisitorData *data,
const void *unused)
{
@@ -170,6 +197,7 @@ static void test_visitor_in_fuzz(TestInputVisitorData *data,
const void *unused)
{
int64_t ires;
+ intList *ilres;
bool bres;
double nres;
char *sres;
@@ -193,6 +221,11 @@ static void test_visitor_in_fuzz(TestInputVisitorData *data,
v = visitor_input_test_init(data, buf);
visit_type_int(v, &ires, NULL, NULL);
+ visitor_input_teardown(data, NULL);
+
+ v = visitor_input_test_init(data, buf);
+ visit_type_intList(v, &ilres, NULL, NULL);
+ visitor_input_teardown(data, NULL);
v = visitor_input_test_init(data, buf);
visit_type_bool(v, &bres, NULL, NULL);
@@ -200,11 +233,13 @@ static void test_visitor_in_fuzz(TestInputVisitorData *data,
v = visitor_input_test_init(data, buf);
visit_type_number(v, &nres, NULL, NULL);
+ visitor_input_teardown(data, NULL);
v = visitor_input_test_init(data, buf);
sres = NULL;
visit_type_str(v, &sres, NULL, NULL);
g_free(sres);
+ visitor_input_teardown(data, NULL);
v = visitor_input_test_init(data, buf);
visit_type_EnumOne(v, &eres, NULL, NULL);
@@ -228,6 +263,8 @@ int main(int argc, char **argv)
input_visitor_test_add("/string-visitor/input/int",
&in_visitor_data, test_visitor_in_int);
+ input_visitor_test_add("/string-visitor/input/intList",
+ &in_visitor_data, test_visitor_in_intList);
input_visitor_test_add("/string-visitor/input/bool",
&in_visitor_data, test_visitor_in_bool);
input_visitor_test_add("/string-visitor/input/number",
diff --git a/tests/test-string-output-visitor.c b/tests/test-string-output-visitor.c
index 2af5a21ab5..28e7359a2a 100644
--- a/tests/test-string-output-visitor.c
+++ b/tests/test-string-output-visitor.c
@@ -44,7 +44,7 @@ static void visitor_output_teardown(TestOutputVisitorData *data,
static void test_visitor_out_int(TestOutputVisitorData *data,
const void *unused)
{
- int64_t value = -42;
+ int64_t value = 42;
Error *err = NULL;
char *str;
@@ -53,10 +53,42 @@ static void test_visitor_out_int(TestOutputVisitorData *data,
str = string_output_get_string(data->sov);
g_assert(str != NULL);
- g_assert_cmpstr(str, ==, "-42");
+ g_assert_cmpstr(str, ==, "42");
g_free(str);
}
+static void test_visitor_out_intList(TestOutputVisitorData *data,
+ const void *unused)
+{
+ int64_t value[] = {0, 1, 9, 10, 16, 15, 14,
+ 3, 4, 5, 6, 11, 12, 13, 21, 22, INT64_MAX - 1, INT64_MAX};
+ intList *list = NULL, **tmp = &list;
+ int i;
+ Error *errp = NULL;
+ char *str;
+
+ for (i = 0; i < sizeof(value) / sizeof(value[0]); i++) {
+ *tmp = g_malloc0(sizeof(**tmp));
+ (*tmp)->value = value[i];
+ tmp = &(*tmp)->next;
+ }
+
+ visit_type_intList(data->ov, &list, NULL, &errp);
+ g_assert(errp == NULL);
+
+ str = string_output_get_string(data->sov);
+ g_assert(str != NULL);
+ g_assert_cmpstr(str, ==,
+ "0-1,3-6,9-16,21-22,9223372036854775806-9223372036854775807");
+ g_free(str);
+ while (list) {
+ intList *tmp2;
+ tmp2 = list->next;
+ g_free(list);
+ list = tmp2;
+ }
+}
+
static void test_visitor_out_bool(TestOutputVisitorData *data,
const void *unused)
{
@@ -182,6 +214,8 @@ int main(int argc, char **argv)
&out_visitor_data, test_visitor_out_enum);
output_visitor_test_add("/string-visitor/output/enum-errors",
&out_visitor_data, test_visitor_out_enum_errors);
+ output_visitor_test_add("/string-visitor/output/intList",
+ &out_visitor_data, test_visitor_out_intList);
g_test_run();
diff --git a/tests/vhost-user-test.c b/tests/vhost-user-test.c
new file mode 100644
index 0000000000..7c826b49e5
--- /dev/null
+++ b/tests/vhost-user-test.c
@@ -0,0 +1,312 @@
+/*
+ * QTest testcase for the vhost-user
+ *
+ * Copyright (c) 2014 Virtual Open Systems Sarl.
+ *
+ * This work is licensed under the terms of the GNU GPL, version 2 or later.
+ * See the COPYING file in the top-level directory.
+ *
+ */
+
+#include "libqtest.h"
+#include "qemu/option.h"
+#include "sysemu/char.h"
+#include "sysemu/sysemu.h"
+
+#include <glib.h>
+#include <linux/vhost.h>
+#include <sys/mman.h>
+#include <sys/vfs.h>
+#include <qemu/sockets.h>
+
+#define QEMU_CMD_ACCEL " -machine accel=tcg"
+#define QEMU_CMD_MEM " -m 512 -object memory-backend-file,id=mem,size=512M,"\
+ "mem-path=%s,share=on -numa node,memdev=mem"
+#define QEMU_CMD_CHR " -chardev socket,id=chr0,path=%s"
+#define QEMU_CMD_NETDEV " -netdev vhost-user,id=net0,chardev=chr0,vhostforce"
+#define QEMU_CMD_NET " -device virtio-net-pci,netdev=net0 "
+#define QEMU_CMD_ROM " -option-rom ../pc-bios/pxe-virtio.rom"
+
+#define QEMU_CMD QEMU_CMD_ACCEL QEMU_CMD_MEM QEMU_CMD_CHR \
+ QEMU_CMD_NETDEV QEMU_CMD_NET QEMU_CMD_ROM
+
+#define HUGETLBFS_MAGIC 0x958458f6
+
+/*********** FROM hw/virtio/vhost-user.c *************************************/
+
+#define VHOST_MEMORY_MAX_NREGIONS 8
+
+typedef enum VhostUserRequest {
+ VHOST_USER_NONE = 0,
+ VHOST_USER_GET_FEATURES = 1,
+ VHOST_USER_SET_FEATURES = 2,
+ VHOST_USER_SET_OWNER = 3,
+ VHOST_USER_RESET_OWNER = 4,
+ VHOST_USER_SET_MEM_TABLE = 5,
+ VHOST_USER_SET_LOG_BASE = 6,
+ VHOST_USER_SET_LOG_FD = 7,
+ VHOST_USER_SET_VRING_NUM = 8,
+ VHOST_USER_SET_VRING_ADDR = 9,
+ VHOST_USER_SET_VRING_BASE = 10,
+ VHOST_USER_GET_VRING_BASE = 11,
+ VHOST_USER_SET_VRING_KICK = 12,
+ VHOST_USER_SET_VRING_CALL = 13,
+ VHOST_USER_SET_VRING_ERR = 14,
+ VHOST_USER_MAX
+} VhostUserRequest;
+
+typedef struct VhostUserMemoryRegion {
+ uint64_t guest_phys_addr;
+ uint64_t memory_size;
+ uint64_t userspace_addr;
+} VhostUserMemoryRegion;
+
+typedef struct VhostUserMemory {
+ uint32_t nregions;
+ uint32_t padding;
+ VhostUserMemoryRegion regions[VHOST_MEMORY_MAX_NREGIONS];
+} VhostUserMemory;
+
+typedef struct VhostUserMsg {
+ VhostUserRequest request;
+
+#define VHOST_USER_VERSION_MASK (0x3)
+#define VHOST_USER_REPLY_MASK (0x1<<2)
+ uint32_t flags;
+ uint32_t size; /* the following payload size */
+ union {
+ uint64_t u64;
+ struct vhost_vring_state state;
+ struct vhost_vring_addr addr;
+ VhostUserMemory memory;
+ };
+} QEMU_PACKED VhostUserMsg;
+
+static VhostUserMsg m __attribute__ ((unused));
+#define VHOST_USER_HDR_SIZE (sizeof(m.request) \
+ + sizeof(m.flags) \
+ + sizeof(m.size))
+
+#define VHOST_USER_PAYLOAD_SIZE (sizeof(m) - VHOST_USER_HDR_SIZE)
+
+/* The version of the protocol we support */
+#define VHOST_USER_VERSION (0x1)
+/*****************************************************************************/
+
+int fds_num = 0, fds[VHOST_MEMORY_MAX_NREGIONS];
+static VhostUserMemory memory;
+static GMutex data_mutex;
+static GCond data_cond;
+
+static void read_guest_mem(void)
+{
+ uint32_t *guest_mem;
+ gint64 end_time;
+ int i, j;
+
+ g_mutex_lock(&data_mutex);
+
+ end_time = g_get_monotonic_time() + 5 * G_TIME_SPAN_SECOND;
+ while (!fds_num) {
+ if (!g_cond_wait_until(&data_cond, &data_mutex, end_time)) {
+ /* timeout has passed */
+ g_assert(fds_num);
+ break;
+ }
+ }
+
+ /* check for sanity */
+ g_assert_cmpint(fds_num, >, 0);
+ g_assert_cmpint(fds_num, ==, memory.nregions);
+
+ /* iterate all regions */
+ for (i = 0; i < fds_num; i++) {
+
+ /* We'll check only the region statring at 0x0*/
+ if (memory.regions[i].guest_phys_addr != 0x0) {
+ continue;
+ }
+
+ g_assert_cmpint(memory.regions[i].memory_size, >, 1024);
+
+ guest_mem = mmap(0, memory.regions[i].memory_size,
+ PROT_READ | PROT_WRITE, MAP_SHARED, fds[i], 0);
+
+ for (j = 0; j < 256; j++) {
+ uint32_t a = readl(memory.regions[i].guest_phys_addr + j*4);
+ uint32_t b = guest_mem[j];
+
+ g_assert_cmpint(a, ==, b);
+ }
+
+ munmap(guest_mem, memory.regions[i].memory_size);
+ }
+
+ g_assert_cmpint(1, ==, 1);
+ g_mutex_unlock(&data_mutex);
+}
+
+static void *thread_function(void *data)
+{
+ GMainLoop *loop;
+ loop = g_main_loop_new(NULL, FALSE);
+ g_main_loop_run(loop);
+ return NULL;
+}
+
+static int chr_can_read(void *opaque)
+{
+ return VHOST_USER_HDR_SIZE;
+}
+
+static void chr_read(void *opaque, const uint8_t *buf, int size)
+{
+ CharDriverState *chr = opaque;
+ VhostUserMsg msg;
+ uint8_t *p = (uint8_t *) &msg;
+ int fd;
+
+ if (size != VHOST_USER_HDR_SIZE) {
+ g_test_message("Wrong message size received %d\n", size);
+ return;
+ }
+
+ memcpy(p, buf, VHOST_USER_HDR_SIZE);
+
+ if (msg.size) {
+ p += VHOST_USER_HDR_SIZE;
+ qemu_chr_fe_read_all(chr, p, msg.size);
+ }
+
+ switch (msg.request) {
+ case VHOST_USER_GET_FEATURES:
+ /* send back features to qemu */
+ msg.flags |= VHOST_USER_REPLY_MASK;
+ msg.size = sizeof(m.u64);
+ msg.u64 = 0;
+ p = (uint8_t *) &msg;
+ qemu_chr_fe_write_all(chr, p, VHOST_USER_HDR_SIZE + msg.size);
+ break;
+
+ case VHOST_USER_GET_VRING_BASE:
+ /* send back vring base to qemu */
+ msg.flags |= VHOST_USER_REPLY_MASK;
+ msg.size = sizeof(m.state);
+ msg.state.num = 0;
+ p = (uint8_t *) &msg;
+ qemu_chr_fe_write_all(chr, p, VHOST_USER_HDR_SIZE + msg.size);
+ break;
+
+ case VHOST_USER_SET_MEM_TABLE:
+ /* received the mem table */
+ memcpy(&memory, &msg.memory, sizeof(msg.memory));
+ fds_num = qemu_chr_fe_get_msgfds(chr, fds, sizeof(fds) / sizeof(int));
+
+ /* signal the test that it can continue */
+ g_cond_signal(&data_cond);
+ g_mutex_unlock(&data_mutex);
+ break;
+
+ case VHOST_USER_SET_VRING_KICK:
+ case VHOST_USER_SET_VRING_CALL:
+ /* consume the fd */
+ qemu_chr_fe_get_msgfds(chr, &fd, 1);
+ /*
+ * This is a non-blocking eventfd.
+ * The receive function forces it to be blocking,
+ * so revert it back to non-blocking.
+ */
+ qemu_set_nonblock(fd);
+ break;
+ default:
+ break;
+ }
+}
+
+static const char *init_hugepagefs(void)
+{
+ const char *path;
+ struct statfs fs;
+ int ret;
+
+ path = getenv("QTEST_HUGETLBFS_PATH");
+ if (!path) {
+ path = "/hugetlbfs";
+ }
+
+ if (access(path, R_OK | W_OK | X_OK)) {
+ g_test_message("access on path (%s): %s\n", path, strerror(errno));
+ return NULL;
+ }
+
+ do {
+ ret = statfs(path, &fs);
+ } while (ret != 0 && errno == EINTR);
+
+ if (ret != 0) {
+ g_test_message("statfs on path (%s): %s\n", path, strerror(errno));
+ return NULL;
+ }
+
+ if (fs.f_type != HUGETLBFS_MAGIC) {
+ g_test_message("Warning: path not on HugeTLBFS: %s\n", path);
+ return NULL;
+ }
+
+ return path;
+}
+
+int main(int argc, char **argv)
+{
+ QTestState *s = NULL;
+ CharDriverState *chr = NULL;
+ const char *hugefs = 0;
+ char *socket_path = 0;
+ char *qemu_cmd = 0;
+ char *chr_path = 0;
+ int ret;
+
+ g_test_init(&argc, &argv, NULL);
+
+ module_call_init(MODULE_INIT_QOM);
+
+ hugefs = init_hugepagefs();
+ if (!hugefs) {
+ return 0;
+ }
+
+ socket_path = g_strdup_printf("/tmp/vhost-%d.sock", getpid());
+
+ /* create char dev and add read handlers */
+ qemu_add_opts(&qemu_chardev_opts);
+ chr_path = g_strdup_printf("unix:%s,server,nowait", socket_path);
+ chr = qemu_chr_new("chr0", chr_path, NULL);
+ g_free(chr_path);
+ qemu_chr_add_handlers(chr, chr_can_read, chr_read, NULL, chr);
+
+ /* run the main loop thread so the chardev may operate */
+ g_mutex_init(&data_mutex);
+ g_cond_init(&data_cond);
+ g_mutex_lock(&data_mutex);
+ g_thread_new(NULL, thread_function, NULL);
+
+ qemu_cmd = g_strdup_printf(QEMU_CMD, hugefs, socket_path);
+ s = qtest_start(qemu_cmd);
+ g_free(qemu_cmd);
+
+ qtest_add_func("/vhost-user/read-guest-mem", read_guest_mem);
+
+ ret = g_test_run();
+
+ if (s) {
+ qtest_quit(s);
+ }
+
+ /* cleanup */
+ unlink(socket_path);
+ g_free(socket_path);
+ g_cond_clear(&data_cond);
+ g_mutex_clear(&data_mutex);
+
+ return ret;
+}
diff --git a/trace-events b/trace-events
index f8dff485b2..ba01ad52cf 100644
--- a/trace-events
+++ b/trace-events
@@ -1272,6 +1272,23 @@ xen_pv_mmio_write(uint64_t addr) "WARNING: write to Xen PV Device MMIO space (ad
pci_cfg_read(const char *dev, unsigned devid, unsigned fnid, unsigned offs, unsigned val) "%s %02u:%u @0x%x -> 0x%x"
pci_cfg_write(const char *dev, unsigned devid, unsigned fnid, unsigned offs, unsigned val) "%s %02u:%u @0x%x <- 0x%x"
+#hw/acpi/memory_hotplug.c
+mhp_acpi_invalid_slot_selected(uint32_t slot) "0x%"PRIx32
+mhp_acpi_read_addr_lo(uint32_t slot, uint32_t addr) "slot[0x%"PRIx32"] addr lo: 0x%"PRIx32
+mhp_acpi_read_addr_hi(uint32_t slot, uint32_t addr) "slot[0x%"PRIx32"] addr hi: 0x%"PRIx32
+mhp_acpi_read_size_lo(uint32_t slot, uint32_t size) "slot[0x%"PRIx32"] size lo: 0x%"PRIx32
+mhp_acpi_read_size_hi(uint32_t slot, uint32_t size) "slot[0x%"PRIx32"] size hi: 0x%"PRIx32
+mhp_acpi_read_pxm(uint32_t slot, uint32_t pxm) "slot[0x%"PRIx32"] proximity: 0x%"PRIx32
+mhp_acpi_read_flags(uint32_t slot, uint32_t flags) "slot[0x%"PRIx32"] flags: 0x%"PRIx32
+mhp_acpi_write_slot(uint32_t slot) "set active slot: 0x%"PRIx32
+mhp_acpi_write_ost_ev(uint32_t slot, uint32_t ev) "slot[0x%"PRIx32"] OST EVENT: 0x%"PRIx32
+mhp_acpi_write_ost_status(uint32_t slot, uint32_t st) "slot[0x%"PRIx32"] OST STATUS: 0x%"PRIx32
+mhp_acpi_clear_insert_evt(uint32_t slot) "slot[0x%"PRIx32"] clear insert event"
+
+#hw/i386/pc.c
+mhp_pc_dimm_assigned_slot(int slot) "0x%d"
+mhp_pc_dimm_assigned_address(uint64_t addr) "0x%"PRIx64
+
# target-s390x/kvm.c
kvm_enable_cmma(int rc) "CMMA: enabling with result code %d"
kvm_clear_cmma(int rc) "CMMA: clearing with result code %d"
diff --git a/translate-all.c b/translate-all.c
index 6b7b46e761..5425d038d9 100644
--- a/translate-all.c
+++ b/translate-all.c
@@ -295,14 +295,7 @@ void page_size_init(void)
{
/* NOTE: we can always suppose that qemu_host_page_size >=
TARGET_PAGE_SIZE */
-#ifdef _WIN32
- SYSTEM_INFO system_info;
-
- GetSystemInfo(&system_info);
- qemu_real_host_page_size = system_info.dwPageSize;
-#else
qemu_real_host_page_size = getpagesize();
-#endif
if (qemu_host_page_size == 0) {
qemu_host_page_size = qemu_real_host_page_size;
}
diff --git a/util/oslib-posix.c b/util/oslib-posix.c
index 8e9c770d28..1524ead755 100644
--- a/util/oslib-posix.c
+++ b/util/oslib-posix.c
@@ -46,6 +46,7 @@ extern int daemon(int, int);
#else
# define QEMU_VMALLOC_ALIGN getpagesize()
#endif
+#define HUGETLBFS_MAGIC 0x958458f6
#include <termios.h>
#include <unistd.h>
@@ -58,9 +59,12 @@ extern int daemon(int, int);
#include "qemu/sockets.h"
#include <sys/mman.h>
#include <libgen.h>
+#include <setjmp.h>
+#include <sys/signal.h>
#ifdef CONFIG_LINUX
#include <sys/syscall.h>
+#include <sys/vfs.h>
#endif
#ifdef __FreeBSD__
@@ -332,3 +336,72 @@ char *qemu_get_exec_dir(void)
{
return g_strdup(exec_dir);
}
+
+static sigjmp_buf sigjump;
+
+static void sigbus_handler(int signal)
+{
+ siglongjmp(sigjump, 1);
+}
+
+static size_t fd_getpagesize(int fd)
+{
+#ifdef CONFIG_LINUX
+ struct statfs fs;
+ int ret;
+
+ if (fd != -1) {
+ do {
+ ret = fstatfs(fd, &fs);
+ } while (ret != 0 && errno == EINTR);
+
+ if (ret == 0 && fs.f_type == HUGETLBFS_MAGIC) {
+ return fs.f_bsize;
+ }
+ }
+#endif
+
+ return getpagesize();
+}
+
+void os_mem_prealloc(int fd, char *area, size_t memory)
+{
+ int ret, i;
+ struct sigaction act, oldact;
+ sigset_t set, oldset;
+ size_t hpagesize = fd_getpagesize(fd);
+
+ memset(&act, 0, sizeof(act));
+ act.sa_handler = &sigbus_handler;
+ act.sa_flags = 0;
+
+ ret = sigaction(SIGBUS, &act, &oldact);
+ if (ret) {
+ perror("os_mem_prealloc: failed to install signal handler");
+ exit(1);
+ }
+
+ /* unblock SIGBUS */
+ sigemptyset(&set);
+ sigaddset(&set, SIGBUS);
+ pthread_sigmask(SIG_UNBLOCK, &set, &oldset);
+
+ if (sigsetjmp(sigjump, 1)) {
+ fprintf(stderr, "os_mem_prealloc: failed to preallocate pages\n");
+ exit(1);
+ }
+
+ /* MAP_POPULATE silently ignores failures */
+ memory = (memory + hpagesize - 1) & -hpagesize;
+ for (i = 0; i < (memory/hpagesize); i++) {
+ memset(area + (hpagesize*i), 0, 1);
+ }
+
+ ret = sigaction(SIGBUS, &oldact, NULL);
+ if (ret) {
+ perror("os_mem_prealloc: failed to reinstall signal handler");
+ exit(1);
+ }
+
+ pthread_sigmask(SIG_SETMASK, &oldset, NULL);
+}
diff --git a/util/oslib-win32.c b/util/oslib-win32.c
index 157f10fab2..507cedd84d 100644
--- a/util/oslib-win32.c
+++ b/util/oslib-win32.c
@@ -446,3 +446,22 @@ gint g_poll(GPollFD *fds, guint nfds, gint timeout)
return retval;
}
+
+size_t getpagesize(void)
+{
+ SYSTEM_INFO system_info;
+
+ GetSystemInfo(&system_info);
+ return system_info.dwPageSize;
+}
+
+void os_mem_prealloc(int fd, char *area, size_t memory)
+{
+ int i;
+ size_t pagesize = getpagesize();
+
+ memory = (memory + pagesize - 1) & -pagesize;
+ for (i = 0; i < memory / pagesize; i++) {
+ memset(area + pagesize * i, 0, 1);
+ }
+}
diff --git a/vl.c b/vl.c
index be69c7f346..54b46271c2 100644
--- a/vl.c
+++ b/vl.c
@@ -116,7 +116,7 @@ int main(int argc, char **argv)
#include "ui/qemu-spice.h"
#include "qapi/string-input-visitor.h"
-#include "qom/object_interfaces.h"
+#include "qapi/opts-visitor.h"
#define DEFAULT_RAM_SIZE 128
@@ -195,8 +195,7 @@ static QTAILQ_HEAD(, FWBootEntry) fw_boot_order =
QTAILQ_HEAD_INITIALIZER(fw_boot_order);
int nb_numa_nodes;
-uint64_t node_mem[MAX_NODES];
-unsigned long *node_cpumask[MAX_NODES];
+NodeInfo numa_info[MAX_NODES];
uint8_t qemu_uuid[16];
bool qemu_uuid_set;
@@ -520,6 +519,14 @@ static QemuOptsList qemu_mem_opts = {
.name = "size",
.type = QEMU_OPT_SIZE,
},
+ {
+ .name = "slots",
+ .type = QEMU_OPT_NUMBER,
+ },
+ {
+ .name = "maxmem",
+ .type = QEMU_OPT_SIZE,
+ },
{ /* end of list */ }
},
};
@@ -1267,102 +1274,6 @@ char *get_boot_devices_list(size_t *size, bool ignore_suffixes)
return list;
}
-static void numa_node_parse_cpus(int nodenr, const char *cpus)
-{
- char *endptr;
- unsigned long long value, endvalue;
-
- /* Empty CPU range strings will be considered valid, they will simply
- * not set any bit in the CPU bitmap.
- */
- if (!*cpus) {
- return;
- }
-
- if (parse_uint(cpus, &value, &endptr, 10) < 0) {
- goto error;
- }
- if (*endptr == '-') {
- if (parse_uint_full(endptr + 1, &endvalue, 10) < 0) {
- goto error;
- }
- } else if (*endptr == '\0') {
- endvalue = value;
- } else {
- goto error;
- }
-
- if (endvalue >= MAX_CPUMASK_BITS) {
- endvalue = MAX_CPUMASK_BITS - 1;
- fprintf(stderr,
- "qemu: NUMA: A max of %d VCPUs are supported\n",
- MAX_CPUMASK_BITS);
- }
-
- if (endvalue < value) {
- goto error;
- }
-
- bitmap_set(node_cpumask[nodenr], value, endvalue-value+1);
- return;
-
-error:
- fprintf(stderr, "qemu: Invalid NUMA CPU range: %s\n", cpus);
- exit(1);
-}
-
-static void numa_add(const char *optarg)
-{
- char option[128];
- char *endptr;
- unsigned long long nodenr;
-
- optarg = get_opt_name(option, 128, optarg, ',');
- if (*optarg == ',') {
- optarg++;
- }
- if (!strcmp(option, "node")) {
-
- if (nb_numa_nodes >= MAX_NODES) {
- fprintf(stderr, "qemu: too many NUMA nodes\n");
- exit(1);
- }
-
- if (get_param_value(option, 128, "nodeid", optarg) == 0) {
- nodenr = nb_numa_nodes;
- } else {
- if (parse_uint_full(option, &nodenr, 10) < 0) {
- fprintf(stderr, "qemu: Invalid NUMA nodeid: %s\n", option);
- exit(1);
- }
- }
-
- if (nodenr >= MAX_NODES) {
- fprintf(stderr, "qemu: invalid NUMA nodeid: %llu\n", nodenr);
- exit(1);
- }
-
- if (get_param_value(option, 128, "mem", optarg) == 0) {
- node_mem[nodenr] = 0;
- } else {
- int64_t sval;
- sval = strtosz(option, &endptr);
- if (sval < 0 || *endptr) {
- fprintf(stderr, "qemu: invalid numa mem size: %s\n", optarg);
- exit(1);
- }
- node_mem[nodenr] = sval;
- }
- if (get_param_value(option, 128, "cpus", optarg) != 0) {
- numa_node_parse_cpus(nodenr, option);
- }
- nb_numa_nodes++;
- } else {
- fprintf(stderr, "Invalid -numa option: %s\n", option);
- exit(1);
- }
-}
-
static QemuOptsList qemu_smp_opts = {
.name = "smp-opts",
.implied_opt_name = "cpus",
@@ -2911,43 +2822,51 @@ static int object_set_property(const char *name, const char *value, void *opaque
static int object_create(QemuOpts *opts, void *opaque)
{
- const char *type = qemu_opt_get(opts, "qom-type");
- const char *id = qemu_opts_id(opts);
- Error *local_err = NULL;
- Object *obj;
+ Error *err = NULL;
+ char *type = NULL;
+ char *id = NULL;
+ void *dummy = NULL;
+ OptsVisitor *ov;
+ QDict *pdict;
- g_assert(type != NULL);
+ ov = opts_visitor_new(opts);
+ pdict = qemu_opts_to_qdict(opts, NULL);
- if (id == NULL) {
- qerror_report(QERR_MISSING_PARAMETER, "id");
- return -1;
+ visit_start_struct(opts_get_visitor(ov), &dummy, NULL, NULL, 0, &err);
+ if (err) {
+ goto out;
}
- obj = object_new(type);
- if (qemu_opt_foreach(opts, object_set_property, obj, 1) < 0) {
- object_unref(obj);
- return -1;
+ qdict_del(pdict, "qom-type");
+ visit_type_str(opts_get_visitor(ov), &type, "qom-type", &err);
+ if (err) {
+ goto out;
}
- if (!object_dynamic_cast(obj, TYPE_USER_CREATABLE)) {
- error_setg(&local_err, "object '%s' isn't supported by -object",
- id);
+ qdict_del(pdict, "id");
+ visit_type_str(opts_get_visitor(ov), &id, "id", &err);
+ if (err) {
goto out;
}
- user_creatable_complete(obj, &local_err);
- if (local_err) {
+ object_add(type, id, pdict, opts_get_visitor(ov), &err);
+ if (err) {
goto out;
}
-
- object_property_add_child(container_get(object_get_root(), "/objects"),
- id, obj, &local_err);
+ visit_end_struct(opts_get_visitor(ov), &err);
+ if (err) {
+ qmp_object_del(id, NULL);
+ }
out:
- object_unref(obj);
- if (local_err) {
- qerror_report_err(local_err);
- error_free(local_err);
+ opts_visitor_cleanup(ov);
+
+ QDECREF(pdict);
+ g_free(id);
+ g_free(type);
+ g_free(dummy);
+ if (err) {
+ qerror_report_err(err);
return -1;
}
return 0;
@@ -2991,6 +2910,8 @@ int main(int argc, char **argv, char **envp)
const char *trace_file = NULL;
const ram_addr_t default_ram_size = (ram_addr_t)DEFAULT_RAM_SIZE *
1024 * 1024;
+ ram_addr_t maxram_size = default_ram_size;
+ uint64_t ram_slots = 0;
atexit(qemu_run_exit_notifiers);
error_set_progname(argv[0]);
@@ -3024,6 +2945,7 @@ int main(int argc, char **argv, char **envp)
qemu_add_opts(&qemu_realtime_opts);
qemu_add_opts(&qemu_msg_opts);
qemu_add_opts(&qemu_name_opts);
+ qemu_add_opts(&qemu_numa_opts);
runstate_init();
@@ -3044,8 +2966,8 @@ int main(int argc, char **argv, char **envp)
translation = BIOS_ATA_TRANSLATION_AUTO;
for (i = 0; i < MAX_NODES; i++) {
- node_mem[i] = 0;
- node_cpumask[i] = bitmap_new(MAX_CPUMASK_BITS);
+ numa_info[i].node_mem = 0;
+ bitmap_zero(numa_info[i].node_cpu, MAX_CPUMASK_BITS);
}
nb_numa_nodes = 0;
@@ -3219,7 +3141,10 @@ int main(int argc, char **argv, char **envp)
}
break;
case QEMU_OPTION_numa:
- numa_add(optarg);
+ opts = qemu_opts_parse(qemu_find_opts("numa"), optarg, 1);
+ if (!opts) {
+ exit(1);
+ }
break;
case QEMU_OPTION_display:
display_type = select_display(optarg);
@@ -3326,6 +3251,7 @@ int main(int argc, char **argv, char **envp)
case QEMU_OPTION_m: {
uint64_t sz;
const char *mem_str;
+ const char *maxmem_str, *slots_str;
opts = qemu_opts_parse(qemu_find_opts("memory"),
optarg, 1);
@@ -3367,6 +3293,44 @@ int main(int argc, char **argv, char **envp)
error_report("ram size too large");
exit(EXIT_FAILURE);
}
+
+ maxmem_str = qemu_opt_get(opts, "maxmem");
+ slots_str = qemu_opt_get(opts, "slots");
+ if (maxmem_str && slots_str) {
+ uint64_t slots;
+
+ sz = qemu_opt_get_size(opts, "maxmem", 0);
+ if (sz < ram_size) {
+ fprintf(stderr, "qemu: invalid -m option value: maxmem "
+ "(%" PRIu64 ") <= initial memory ("
+ RAM_ADDR_FMT ")\n", sz, ram_size);
+ exit(EXIT_FAILURE);
+ }
+
+ slots = qemu_opt_get_number(opts, "slots", 0);
+ if ((sz > ram_size) && !slots) {
+ fprintf(stderr, "qemu: invalid -m option value: maxmem "
+ "(%" PRIu64 ") more than initial memory ("
+ RAM_ADDR_FMT ") but no hotplug slots where "
+ "specified\n", sz, ram_size);
+ exit(EXIT_FAILURE);
+ }
+
+ if ((sz <= ram_size) && slots) {
+ fprintf(stderr, "qemu: invalid -m option value: %"
+ PRIu64 " hotplug slots where specified but "
+ "maxmem (%" PRIu64 ") <= initial memory ("
+ RAM_ADDR_FMT ")\n", slots, sz, ram_size);
+ exit(EXIT_FAILURE);
+ }
+ maxram_size = sz;
+ ram_slots = slots;
+ } else if ((!maxmem_str && slots_str) ||
+ (maxmem_str && !slots_str)) {
+ fprintf(stderr, "qemu: invalid -m option value: missing "
+ "'%s' option\n", slots_str ? "maxmem" : "slots");
+ exit(EXIT_FAILURE);
+ }
break;
}
#ifdef CONFIG_TPM
@@ -3964,6 +3928,8 @@ int main(int argc, char **argv, char **envp)
}
loc_set_none();
+ os_daemonize();
+
if (qemu_init_main_loop()) {
fprintf(stderr, "qemu_init_main_loop failed\n");
exit(1);
@@ -3993,6 +3959,8 @@ int main(int argc, char **argv, char **envp)
exit(1);
}
+ cpu_exec_init_all();
+
current_machine = MACHINE(object_new(object_class_get_name(
OBJECT_CLASS(machine_class))));
object_property_add_child(object_get_root(), "machine",
@@ -4205,8 +4173,6 @@ int main(int argc, char **argv, char **envp)
}
#endif
- os_daemonize();
-
if (pid_file && qemu_create_pidfile(pid_file) != 0) {
os_pidfile_error();
exit(1);
@@ -4332,8 +4298,6 @@ int main(int argc, char **argv, char **envp)
}
}
- cpu_exec_init_all();
-
blk_mig_init();
ram_mig_init();
@@ -4350,49 +4314,13 @@ int main(int argc, char **argv, char **envp)
default_drive(default_floppy, snapshot, IF_FLOPPY, 0, FD_OPTS);
default_drive(default_sdcard, snapshot, IF_SD, 0, SD_OPTS);
- if (nb_numa_nodes > 0) {
- int i;
-
- if (nb_numa_nodes > MAX_NODES) {
- nb_numa_nodes = MAX_NODES;
- }
-
- /* If no memory size if given for any node, assume the default case
- * and distribute the available memory equally across all nodes
- */
- for (i = 0; i < nb_numa_nodes; i++) {
- if (node_mem[i] != 0)
- break;
- }
- if (i == nb_numa_nodes) {
- uint64_t usedmem = 0;
-
- /* On Linux, the each node's border has to be 8MB aligned,
- * the final node gets the rest.
- */
- for (i = 0; i < nb_numa_nodes - 1; i++) {
- node_mem[i] = (ram_size / nb_numa_nodes) & ~((1 << 23UL) - 1);
- usedmem += node_mem[i];
- }
- node_mem[i] = ram_size - usedmem;
- }
-
- for (i = 0; i < nb_numa_nodes; i++) {
- if (!bitmap_empty(node_cpumask[i], MAX_CPUMASK_BITS)) {
- break;
- }
- }
- /* assigning the VCPUs round-robin is easier to implement, guest OSes
- * must cope with this anyway, because there are BIOSes out there in
- * real machines which also use this scheme.
- */
- if (i == nb_numa_nodes) {
- for (i = 0; i < max_cpus; i++) {
- set_bit(i, node_cpumask[i % nb_numa_nodes]);
- }
- }
+ if (qemu_opts_foreach(qemu_find_opts("numa"), numa_init_func,
+ NULL, 1) != 0) {
+ exit(1);
}
+ set_numa_nodes();
+
if (qemu_opts_foreach(qemu_find_opts("mon"), mon_init_func, NULL, 1) != 0) {
exit(1);
}
@@ -4435,6 +4363,8 @@ int main(int argc, char **argv, char **envp)
qdev_machine_init();
current_machine->ram_size = ram_size;
+ current_machine->maxram_size = maxram_size;
+ current_machine->ram_slots = ram_slots;
current_machine->boot_order = boot_order;
current_machine->cpu_model = cpu_model;