diff options
126 files changed, 7416 insertions, 728 deletions
diff --git a/Makefile.target b/Makefile.target index 06c1e59bc4..fc5827cd72 100644 --- a/Makefile.target +++ b/Makefile.target @@ -119,7 +119,7 @@ endif #CONFIG_BSD_USER ######################################################### # System emulator target ifdef CONFIG_SOFTMMU -obj-y += arch_init.o cpus.o monitor.o gdbstub.o balloon.o ioport.o +obj-y += arch_init.o cpus.o monitor.o gdbstub.o balloon.o ioport.o numa.o obj-y += qtest.o obj-y += hw/ obj-$(CONFIG_FDT) += device_tree.o diff --git a/backends/Makefile.objs b/backends/Makefile.objs index 591ddcf6f3..506a46c33b 100644 --- a/backends/Makefile.objs +++ b/backends/Makefile.objs @@ -6,3 +6,6 @@ common-obj-$(CONFIG_BRLAPI) += baum.o baum.o-cflags := $(SDL_CFLAGS) common-obj-$(CONFIG_TPM) += tpm.o + +common-obj-y += hostmem.o hostmem-ram.o +common-obj-$(CONFIG_LINUX) += hostmem-file.o diff --git a/backends/hostmem-file.c b/backends/hostmem-file.c new file mode 100644 index 0000000000..51799943f1 --- /dev/null +++ b/backends/hostmem-file.c @@ -0,0 +1,134 @@ +/* + * QEMU Host Memory Backend for hugetlbfs + * + * Copyright (C) 2013-2014 Red Hat Inc + * + * Authors: + * Paolo Bonzini <pbonzini@redhat.com> + * + * This work is licensed under the terms of the GNU GPL, version 2 or later. + * See the COPYING file in the top-level directory. + */ +#include "qemu-common.h" +#include "sysemu/hostmem.h" +#include "sysemu/sysemu.h" +#include "qom/object_interfaces.h" + +/* hostmem-file.c */ +/** + * @TYPE_MEMORY_BACKEND_FILE: + * name of backend that uses mmap on a file descriptor + */ +#define TYPE_MEMORY_BACKEND_FILE "memory-backend-file" + +#define MEMORY_BACKEND_FILE(obj) \ + OBJECT_CHECK(HostMemoryBackendFile, (obj), TYPE_MEMORY_BACKEND_FILE) + +typedef struct HostMemoryBackendFile HostMemoryBackendFile; + +struct HostMemoryBackendFile { + HostMemoryBackend parent_obj; + + bool share; + char *mem_path; +}; + +static void +file_backend_memory_alloc(HostMemoryBackend *backend, Error **errp) +{ + HostMemoryBackendFile *fb = MEMORY_BACKEND_FILE(backend); + + if (!backend->size) { + error_setg(errp, "can't create backend with size 0"); + return; + } + if (!fb->mem_path) { + error_setg(errp, "mem_path property not set"); + return; + } +#ifndef CONFIG_LINUX + error_setg(errp, "-mem-path not supported on this host"); +#else + if (!memory_region_size(&backend->mr)) { + backend->force_prealloc = mem_prealloc; + memory_region_init_ram_from_file(&backend->mr, OBJECT(backend), + object_get_canonical_path(OBJECT(backend)), + backend->size, fb->share, + fb->mem_path, errp); + } +#endif +} + +static void +file_backend_class_init(ObjectClass *oc, void *data) +{ + HostMemoryBackendClass *bc = MEMORY_BACKEND_CLASS(oc); + + bc->alloc = file_backend_memory_alloc; +} + +static char *get_mem_path(Object *o, Error **errp) +{ + HostMemoryBackendFile *fb = MEMORY_BACKEND_FILE(o); + + return g_strdup(fb->mem_path); +} + +static void set_mem_path(Object *o, const char *str, Error **errp) +{ + HostMemoryBackend *backend = MEMORY_BACKEND(o); + HostMemoryBackendFile *fb = MEMORY_BACKEND_FILE(o); + + if (memory_region_size(&backend->mr)) { + error_setg(errp, "cannot change property value"); + return; + } + if (fb->mem_path) { + g_free(fb->mem_path); + } + fb->mem_path = g_strdup(str); +} + +static bool file_memory_backend_get_share(Object *o, Error **errp) +{ + HostMemoryBackendFile *fb = MEMORY_BACKEND_FILE(o); + + return fb->share; +} + +static void file_memory_backend_set_share(Object *o, bool value, Error **errp) +{ + HostMemoryBackend *backend = MEMORY_BACKEND(o); + HostMemoryBackendFile *fb = MEMORY_BACKEND_FILE(o); + + if (memory_region_size(&backend->mr)) { + error_setg(errp, "cannot change property value"); + return; + } + fb->share = value; +} + +static void +file_backend_instance_init(Object *o) +{ + object_property_add_bool(o, "share", + file_memory_backend_get_share, + file_memory_backend_set_share, NULL); + object_property_add_str(o, "mem-path", get_mem_path, + set_mem_path, NULL); +} + +static const TypeInfo file_backend_info = { + .name = TYPE_MEMORY_BACKEND_FILE, + .parent = TYPE_MEMORY_BACKEND, + .class_init = file_backend_class_init, + .instance_init = file_backend_instance_init, + .instance_size = sizeof(HostMemoryBackendFile), +}; + +static void register_types(void) +{ + type_register_static(&file_backend_info); +} + +type_init(register_types); diff --git a/backends/hostmem-ram.c b/backends/hostmem-ram.c new file mode 100644 index 0000000000..d9a8290dc9 --- /dev/null +++ b/backends/hostmem-ram.c @@ -0,0 +1,53 @@ +/* + * QEMU Host Memory Backend + * + * Copyright (C) 2013-2014 Red Hat Inc + * + * Authors: + * Igor Mammedov <imammedo@redhat.com> + * + * This work is licensed under the terms of the GNU GPL, version 2 or later. + * See the COPYING file in the top-level directory. + */ +#include "sysemu/hostmem.h" +#include "qom/object_interfaces.h" + +#define TYPE_MEMORY_BACKEND_RAM "memory-backend-ram" + + +static void +ram_backend_memory_alloc(HostMemoryBackend *backend, Error **errp) +{ + char *path; + + if (!backend->size) { + error_setg(errp, "can't create backend with size 0"); + return; + } + + path = object_get_canonical_path_component(OBJECT(backend)); + memory_region_init_ram(&backend->mr, OBJECT(backend), path, + backend->size); + g_free(path); +} + +static void +ram_backend_class_init(ObjectClass *oc, void *data) +{ + HostMemoryBackendClass *bc = MEMORY_BACKEND_CLASS(oc); + + bc->alloc = ram_backend_memory_alloc; +} + +static const TypeInfo ram_backend_info = { + .name = TYPE_MEMORY_BACKEND_RAM, + .parent = TYPE_MEMORY_BACKEND, + .class_init = ram_backend_class_init, +}; + +static void register_types(void) +{ + type_register_static(&ram_backend_info); +} + +type_init(register_types); diff --git a/backends/hostmem.c b/backends/hostmem.c new file mode 100644 index 0000000000..ca10c51b51 --- /dev/null +++ b/backends/hostmem.c @@ -0,0 +1,375 @@ +/* + * QEMU Host Memory Backend + * + * Copyright (C) 2013-2014 Red Hat Inc + * + * Authors: + * Igor Mammedov <imammedo@redhat.com> + * + * This work is licensed under the terms of the GNU GPL, version 2 or later. + * See the COPYING file in the top-level directory. + */ +#include "sysemu/hostmem.h" +#include "qapi/visitor.h" +#include "qapi-types.h" +#include "qapi-visit.h" +#include "qapi/qmp/qerror.h" +#include "qemu/config-file.h" +#include "qom/object_interfaces.h" + +#ifdef CONFIG_NUMA +#include <numaif.h> +QEMU_BUILD_BUG_ON(HOST_MEM_POLICY_DEFAULT != MPOL_DEFAULT); +QEMU_BUILD_BUG_ON(HOST_MEM_POLICY_PREFERRED != MPOL_PREFERRED); +QEMU_BUILD_BUG_ON(HOST_MEM_POLICY_BIND != MPOL_BIND); +QEMU_BUILD_BUG_ON(HOST_MEM_POLICY_INTERLEAVE != MPOL_INTERLEAVE); +#endif + +static void +host_memory_backend_get_size(Object *obj, Visitor *v, void *opaque, + const char *name, Error **errp) +{ + HostMemoryBackend *backend = MEMORY_BACKEND(obj); + uint64_t value = backend->size; + + visit_type_size(v, &value, name, errp); +} + +static void +host_memory_backend_set_size(Object *obj, Visitor *v, void *opaque, + const char *name, Error **errp) +{ + HostMemoryBackend *backend = MEMORY_BACKEND(obj); + Error *local_err = NULL; + uint64_t value; + + if (memory_region_size(&backend->mr)) { + error_setg(&local_err, "cannot change property value"); + goto out; + } + + visit_type_size(v, &value, name, &local_err); + if (local_err) { + goto out; + } + if (!value) { + error_setg(&local_err, "Property '%s.%s' doesn't take value '%" + PRIu64 "'", object_get_typename(obj), name, value); + goto out; + } + backend->size = value; +out: + error_propagate(errp, local_err); +} + +static void +host_memory_backend_get_host_nodes(Object *obj, Visitor *v, void *opaque, + const char *name, Error **errp) +{ + HostMemoryBackend *backend = MEMORY_BACKEND(obj); + uint16List *host_nodes = NULL; + uint16List **node = &host_nodes; + unsigned long value; + + value = find_first_bit(backend->host_nodes, MAX_NODES); + if (value == MAX_NODES) { + return; + } + + *node = g_malloc0(sizeof(**node)); + (*node)->value = value; + node = &(*node)->next; + + do { + value = find_next_bit(backend->host_nodes, MAX_NODES, value + 1); + if (value == MAX_NODES) { + break; + } + + *node = g_malloc0(sizeof(**node)); + (*node)->value = value; + node = &(*node)->next; + } while (true); + + visit_type_uint16List(v, &host_nodes, name, errp); +} + +static void +host_memory_backend_set_host_nodes(Object *obj, Visitor *v, void *opaque, + const char *name, Error **errp) +{ +#ifdef CONFIG_NUMA + HostMemoryBackend *backend = MEMORY_BACKEND(obj); + uint16List *l = NULL; + + visit_type_uint16List(v, &l, name, errp); + + while (l) { + bitmap_set(backend->host_nodes, l->value, 1); + l = l->next; + } +#else + error_setg(errp, "NUMA node binding are not supported by this QEMU"); +#endif +} + +static void +host_memory_backend_get_policy(Object *obj, Visitor *v, void *opaque, + const char *name, Error **errp) +{ + HostMemoryBackend *backend = MEMORY_BACKEND(obj); + int policy = backend->policy; + + visit_type_enum(v, &policy, HostMemPolicy_lookup, NULL, name, errp); +} + +static void +host_memory_backend_set_policy(Object *obj, Visitor *v, void *opaque, + const char *name, Error **errp) +{ + HostMemoryBackend *backend = MEMORY_BACKEND(obj); + int policy; + + visit_type_enum(v, &policy, HostMemPolicy_lookup, NULL, name, errp); + backend->policy = policy; + +#ifndef CONFIG_NUMA + if (policy != HOST_MEM_POLICY_DEFAULT) { + error_setg(errp, "NUMA policies are not supported by this QEMU"); + } +#endif +} + +static bool host_memory_backend_get_merge(Object *obj, Error **errp) +{ + HostMemoryBackend *backend = MEMORY_BACKEND(obj); + + return backend->merge; +} + +static void host_memory_backend_set_merge(Object *obj, bool value, Error **errp) +{ + HostMemoryBackend *backend = MEMORY_BACKEND(obj); + + if (!memory_region_size(&backend->mr)) { + backend->merge = value; + return; + } + + if (value != backend->merge) { + void *ptr = memory_region_get_ram_ptr(&backend->mr); + uint64_t sz = memory_region_size(&backend->mr); + + qemu_madvise(ptr, sz, + value ? QEMU_MADV_MERGEABLE : QEMU_MADV_UNMERGEABLE); + backend->merge = value; + } +} + +static bool host_memory_backend_get_dump(Object *obj, Error **errp) +{ + HostMemoryBackend *backend = MEMORY_BACKEND(obj); + + return backend->dump; +} + +static void host_memory_backend_set_dump(Object *obj, bool value, Error **errp) +{ + HostMemoryBackend *backend = MEMORY_BACKEND(obj); + + if (!memory_region_size(&backend->mr)) { + backend->dump = value; + return; + } + + if (value != backend->dump) { + void *ptr = memory_region_get_ram_ptr(&backend->mr); + uint64_t sz = memory_region_size(&backend->mr); + + qemu_madvise(ptr, sz, + value ? QEMU_MADV_DODUMP : QEMU_MADV_DONTDUMP); + backend->dump = value; + } +} + +static bool host_memory_backend_get_prealloc(Object *obj, Error **errp) +{ + HostMemoryBackend *backend = MEMORY_BACKEND(obj); + + return backend->prealloc || backend->force_prealloc; +} + +static void host_memory_backend_set_prealloc(Object *obj, bool value, + Error **errp) +{ + HostMemoryBackend *backend = MEMORY_BACKEND(obj); + + if (backend->force_prealloc) { + if (value) { + error_setg(errp, + "remove -mem-prealloc to use the prealloc property"); + return; + } + } + + if (!memory_region_size(&backend->mr)) { + backend->prealloc = value; + return; + } + + if (value && !backend->prealloc) { + int fd = memory_region_get_fd(&backend->mr); + void *ptr = memory_region_get_ram_ptr(&backend->mr); + uint64_t sz = memory_region_size(&backend->mr); + + os_mem_prealloc(fd, ptr, sz); + backend->prealloc = true; + } +} + +static void host_memory_backend_init(Object *obj) +{ + HostMemoryBackend *backend = MEMORY_BACKEND(obj); + + backend->merge = qemu_opt_get_bool(qemu_get_machine_opts(), + "mem-merge", true); + backend->dump = qemu_opt_get_bool(qemu_get_machine_opts(), + "dump-guest-core", true); + backend->prealloc = mem_prealloc; + + object_property_add_bool(obj, "merge", + host_memory_backend_get_merge, + host_memory_backend_set_merge, NULL); + object_property_add_bool(obj, "dump", + host_memory_backend_get_dump, + host_memory_backend_set_dump, NULL); + object_property_add_bool(obj, "prealloc", + host_memory_backend_get_prealloc, + host_memory_backend_set_prealloc, NULL); + object_property_add(obj, "size", "int", + host_memory_backend_get_size, + host_memory_backend_set_size, NULL, NULL, NULL); + object_property_add(obj, "host-nodes", "int", + host_memory_backend_get_host_nodes, + host_memory_backend_set_host_nodes, NULL, NULL, NULL); + object_property_add(obj, "policy", "str", + host_memory_backend_get_policy, + host_memory_backend_set_policy, NULL, NULL, NULL); +} + +static void host_memory_backend_finalize(Object *obj) +{ + HostMemoryBackend *backend = MEMORY_BACKEND(obj); + + if (memory_region_size(&backend->mr)) { + memory_region_destroy(&backend->mr); + } +} + +MemoryRegion * +host_memory_backend_get_memory(HostMemoryBackend *backend, Error **errp) +{ + return memory_region_size(&backend->mr) ? &backend->mr : NULL; +} + +static void +host_memory_backend_memory_complete(UserCreatable *uc, Error **errp) +{ + HostMemoryBackend *backend = MEMORY_BACKEND(uc); + HostMemoryBackendClass *bc = MEMORY_BACKEND_GET_CLASS(uc); + Error *local_err = NULL; + void *ptr; + uint64_t sz; + + if (bc->alloc) { + bc->alloc(backend, &local_err); + if (local_err) { + error_propagate(errp, local_err); + return; + } + + ptr = memory_region_get_ram_ptr(&backend->mr); + sz = memory_region_size(&backend->mr); + + if (backend->merge) { + qemu_madvise(ptr, sz, QEMU_MADV_MERGEABLE); + } + if (!backend->dump) { + qemu_madvise(ptr, sz, QEMU_MADV_DONTDUMP); + } +#ifdef CONFIG_NUMA + unsigned long lastbit = find_last_bit(backend->host_nodes, MAX_NODES); + /* lastbit == MAX_NODES means maxnode = 0 */ + unsigned long maxnode = (lastbit + 1) % (MAX_NODES + 1); + /* ensure policy won't be ignored in case memory is preallocated + * before mbind(). note: MPOL_MF_STRICT is ignored on hugepages so + * this doesn't catch hugepage case. */ + unsigned flags = MPOL_MF_STRICT; + + /* check for invalid host-nodes and policies and give more verbose + * error messages than mbind(). */ + if (maxnode && backend->policy == MPOL_DEFAULT) { + error_setg(errp, "host-nodes must be empty for policy default," + " or you should explicitly specify a policy other" + " than default"); + return; + } else if (maxnode == 0 && backend->policy != MPOL_DEFAULT) { + error_setg(errp, "host-nodes must be set for policy %s", + HostMemPolicy_lookup[backend->policy]); + return; + } + + /* We can have up to MAX_NODES nodes, but we need to pass maxnode+1 + * as argument to mbind() due to an old Linux bug (feature?) which + * cuts off the last specified node. This means backend->host_nodes + * must have MAX_NODES+1 bits available. + */ + assert(sizeof(backend->host_nodes) >= + BITS_TO_LONGS(MAX_NODES + 1) * sizeof(unsigned long)); + assert(maxnode <= MAX_NODES); + if (mbind(ptr, sz, backend->policy, + maxnode ? backend->host_nodes : NULL, maxnode + 1, flags)) { + error_setg_errno(errp, errno, + "cannot bind memory to host NUMA nodes"); + return; + } +#endif + /* Preallocate memory after the NUMA policy has been instantiated. + * This is necessary to guarantee memory is allocated with + * specified NUMA policy in place. + */ + if (backend->prealloc) { + os_mem_prealloc(memory_region_get_fd(&backend->mr), ptr, sz); + } + } +} + +static void +host_memory_backend_class_init(ObjectClass *oc, void *data) +{ + UserCreatableClass *ucc = USER_CREATABLE_CLASS(oc); + + ucc->complete = host_memory_backend_memory_complete; +} + +static const TypeInfo host_memory_backend_info = { + .name = TYPE_MEMORY_BACKEND, + .parent = TYPE_OBJECT, + .abstract = true, + .class_size = sizeof(HostMemoryBackendClass), + .class_init = host_memory_backend_class_init, + .instance_size = sizeof(HostMemoryBackend), + .instance_init = host_memory_backend_init, + .instance_finalize = host_memory_backend_finalize, + .interfaces = (InterfaceInfo[]) { + { TYPE_USER_CREATABLE }, + { } + } +}; + +static void register_types(void) +{ + type_register_static(&host_memory_backend_info); +} + +type_init(register_types); @@ -334,6 +334,7 @@ tpm="no" libssh2="" vhdx="" quorum="" +numa="" # parse CC options first for opt do @@ -1118,6 +1119,10 @@ for opt do ;; --enable-quorum) quorum="yes" ;; + --disable-numa) numa="no" + ;; + --enable-numa) numa="yes" + ;; *) echo "ERROR: unknown option $opt" echo "Try '$0 --help' for more information" @@ -1384,6 +1389,8 @@ Advanced options (experts only): --enable-vhdx enable support for the Microsoft VHDX image format --disable-quorum disable quorum block filter support --enable-quorum enable quorum block filter support + --disable-numa disable libnuma support + --enable-numa enable libnuma support NOTE: The object files are built at the place where configure is launched EOF @@ -3168,6 +3175,26 @@ if compile_prog "" "" ; then fi ########################################## +# libnuma probe + +if test "$numa" != "no" ; then + cat > $TMPC << EOF +#include <numa.h> +int main(void) { return numa_available(); } +EOF + + if compile_prog "" "-lnuma" ; then + numa=yes + libs_softmmu="-lnuma $libs_softmmu" + else + if test "$numa" = "yes" ; then + feature_not_found "numa" "install numactl devel" + fi + numa=no + fi +fi + +########################################## # signalfd probe signalfd="no" cat > $TMPC << EOF @@ -4211,6 +4238,7 @@ echo "vhdx $vhdx" echo "Quorum $quorum" echo "lzo support $lzo" echo "snappy support $snappy" +echo "NUMA host support $numa" if test "$sdl_too_old" = "yes"; then echo "-> Your SDL version is too old - please upgrade to have SDL support" @@ -4485,6 +4513,9 @@ fi if test "$vhost_scsi" = "yes" ; then echo "CONFIG_VHOST_SCSI=y" >> $config_host_mak fi +if test "$vhost_net" = "yes" ; then + echo "CONFIG_VHOST_NET_USED=y" >> $config_host_mak +fi if test "$blobs" = "yes" ; then echo "INSTALL_BLOBS=yes" >> $config_host_mak fi @@ -5174,6 +5205,10 @@ if [ "$dtc_internal" = "yes" ]; then echo "config-host.h: subdir-dtc" >> $config_host_mak fi +if test "$numa" = "yes"; then + echo "CONFIG_NUMA=y" >> $config_host_mak +fi + # build tree in object directory in case the source is not in the current directory DIRS="tests tests/tcg tests/tcg/cris tests/tcg/lm32 tests/libqos tests/qapi-schema tests/tcg/xtensa tests/qemu-iotests" DIRS="$DIRS fsdev" @@ -1312,20 +1312,6 @@ static void tcg_exec_all(void) exit_request = 0; } -void set_numa_modes(void) -{ - CPUState *cpu; - int i; - - CPU_FOREACH(cpu) { - for (i = 0; i < nb_numa_nodes; i++) { - if (test_bit(cpu->cpu_index, node_cpumask[i])) { - cpu->numa_node = i; - } - } - } -} - void list_cpus(FILE *f, fprintf_function cpu_fprintf, const char *optarg) { /* XXX: implement xxx_cpu_list for targets that still miss it */ diff --git a/default-configs/i386-softmmu.mak b/default-configs/i386-softmmu.mak index 37ef90f585..8e08841760 100644 --- a/default-configs/i386-softmmu.mak +++ b/default-configs/i386-softmmu.mak @@ -44,3 +44,4 @@ CONFIG_APIC=y CONFIG_IOAPIC=y CONFIG_ICC_BUS=y CONFIG_PVPANIC=y +CONFIG_MEM_HOTPLUG=y diff --git a/default-configs/x86_64-softmmu.mak b/default-configs/x86_64-softmmu.mak index 31bddce4f4..66557ac590 100644 --- a/default-configs/x86_64-softmmu.mak +++ b/default-configs/x86_64-softmmu.mak @@ -44,3 +44,4 @@ CONFIG_APIC=y CONFIG_IOAPIC=y CONFIG_ICC_BUS=y CONFIG_PVPANIC=y +CONFIG_MEM_HOTPLUG=y diff --git a/docs/qmp/qmp-events.txt b/docs/qmp/qmp-events.txt index 145402e078..019db53ec8 100644 --- a/docs/qmp/qmp-events.txt +++ b/docs/qmp/qmp-events.txt @@ -1,6 +1,16 @@ QEMU Machine Protocol Events ============================ +ACPI_DEVICE_OST +--------------- + +Emitted when guest executes ACPI _OST method. + + - data: ACPIOSTInfo type as described in qapi-schema.json + +{ "event": "ACPI_DEVICE_OST", + "data": { "device": "d1", "slot": "0", "slot-type": "DIMM", "source": 1, "status": 0 } } + BALLOON_CHANGE -------------- diff --git a/docs/specs/acpi_mem_hotplug.txt b/docs/specs/acpi_mem_hotplug.txt new file mode 100644 index 0000000000..12909940cc --- /dev/null +++ b/docs/specs/acpi_mem_hotplug.txt @@ -0,0 +1,44 @@ +QEMU<->ACPI BIOS memory hotplug interface +-------------------------------------- + +ACPI BIOS GPE.3 handler is dedicated for notifying OS about memory hot-add +events. + +Memory hot-plug interface (IO port 0xa00-0xa17, 1-4 byte access): +--------------------------------------------------------------- +0xa00: + read access: + [0x0-0x3] Lo part of memory device phys address + [0x4-0x7] Hi part of memory device phys address + [0x8-0xb] Lo part of memory device size in bytes + [0xc-0xf] Hi part of memory device size in bytes + [0x10-0x13] Memory device proximity domain + [0x14] Memory device status fields + bits: + 0: Device is enabled and may be used by guest + 1: Device insert event, used to distinguish device for which + no device check event to OSPM was issued. + It's valid only when bit 1 is set. + 2-7: reserved and should be ignored by OSPM + [0x15-0x17] reserved + + write access: + [0x0-0x3] Memory device slot selector, selects active memory device. + All following accesses to other registers in 0xa00-0xa17 + region will read/store data from/to selected memory device. + [0x4-0x7] OST event code reported by OSPM + [0x8-0xb] OST status code reported by OSPM + [0xc-0x13] reserved, writes into it are ignored + [0x14] Memory device control fields + bits: + 0: reserved, OSPM must clear it before writing to register + 1: if set to 1 clears device insert event, set by OSPM + after it has emitted device check event for the + selected memory device + 2-7: reserved, OSPM must clear them before writing to register + +Selecting memory device slot beyond present range has no effect on platform: + - write accesses to memory hot-plug registers not documented above are + ignored + - read accesses to memory hot-plug registers not documented above return + all bits set to 1. diff --git a/docs/specs/vhost-user.txt b/docs/specs/vhost-user.txt new file mode 100644 index 0000000000..0ea767e4b8 --- /dev/null +++ b/docs/specs/vhost-user.txt @@ -0,0 +1,266 @@ +Vhost-user Protocol +=================== + +Copyright (c) 2014 Virtual Open Systems Sarl. + +This work is licensed under the terms of the GNU GPL, version 2 or later. +See the COPYING file in the top-level directory. +=================== + +This protocol is aiming to complement the ioctl interface used to control the +vhost implementation in the Linux kernel. It implements the control plane needed +to establish virtqueue sharing with a user space process on the same host. It +uses communication over a Unix domain socket to share file descriptors in the +ancillary data of the message. + +The protocol defines 2 sides of the communication, master and slave. Master is +the application that shares its virtqueues, in our case QEMU. Slave is the +consumer of the virtqueues. + +In the current implementation QEMU is the Master, and the Slave is intended to +be a software Ethernet switch running in user space, such as Snabbswitch. + +Master and slave can be either a client (i.e. connecting) or server (listening) +in the socket communication. + +Message Specification +--------------------- + +Note that all numbers are in the machine native byte order. A vhost-user message +consists of 3 header fields and a payload: + +------------------------------------ +| request | flags | size | payload | +------------------------------------ + + * Request: 32-bit type of the request + * Flags: 32-bit bit field: + - Lower 2 bits are the version (currently 0x01) + - Bit 2 is the reply flag - needs to be sent on each reply from the slave + * Size - 32-bit size of the payload + + +Depending on the request type, payload can be: + + * A single 64-bit integer + ------- + | u64 | + ------- + + u64: a 64-bit unsigned integer + + * A vring state description + --------------- + | index | num | + --------------- + + Index: a 32-bit index + Num: a 32-bit number + + * A vring address description + -------------------------------------------------------------- + | index | flags | size | descriptor | used | available | log | + -------------------------------------------------------------- + + Index: a 32-bit vring index + Flags: a 32-bit vring flags + Descriptor: a 64-bit user address of the vring descriptor table + Used: a 64-bit user address of the vring used ring + Available: a 64-bit user address of the vring available ring + Log: a 64-bit guest address for logging + + * Memory regions description + --------------------------------------------------- + | num regions | padding | region0 | ... | region7 | + --------------------------------------------------- + + Num regions: a 32-bit number of regions + Padding: 32-bit + + A region is: + --------------------------------------- + | guest address | size | user address | + --------------------------------------- + + Guest address: a 64-bit guest address of the region + Size: a 64-bit size + User address: a 64-bit user address + + +In QEMU the vhost-user message is implemented with the following struct: + +typedef struct VhostUserMsg { + VhostUserRequest request; + uint32_t flags; + uint32_t size; + union { + uint64_t u64; + struct vhost_vring_state state; + struct vhost_vring_addr addr; + VhostUserMemory memory; + }; +} QEMU_PACKED VhostUserMsg; + +Communication +------------- + +The protocol for vhost-user is based on the existing implementation of vhost +for the Linux Kernel. Most messages that can be sent via the Unix domain socket +implementing vhost-user have an equivalent ioctl to the kernel implementation. + +The communication consists of master sending message requests and slave sending +message replies. Most of the requests don't require replies. Here is a list of +the ones that do: + + * VHOST_GET_FEATURES + * VHOST_GET_VRING_BASE + +There are several messages that the master sends with file descriptors passed +in the ancillary data: + + * VHOST_SET_MEM_TABLE + * VHOST_SET_LOG_FD + * VHOST_SET_VRING_KICK + * VHOST_SET_VRING_CALL + * VHOST_SET_VRING_ERR + +If Master is unable to send the full message or receives a wrong reply it will +close the connection. An optional reconnection mechanism can be implemented. + +Message types +------------- + + * VHOST_USER_GET_FEATURES + + Id: 2 + Equivalent ioctl: VHOST_GET_FEATURES + Master payload: N/A + Slave payload: u64 + + Get from the underlying vhost implementation the features bitmask. + + * VHOST_USER_SET_FEATURES + + Id: 3 + Ioctl: VHOST_SET_FEATURES + Master payload: u64 + + Enable features in the underlying vhost implementation using a bitmask. + + * VHOST_USER_SET_OWNER + + Id: 4 + Equivalent ioctl: VHOST_SET_OWNER + Master payload: N/A + + Issued when a new connection is established. It sets the current Master + as an owner of the session. This can be used on the Slave as a + "session start" flag. + + * VHOST_USER_RESET_OWNER + + Id: 5 + Equivalent ioctl: VHOST_RESET_OWNER + Master payload: N/A + + Issued when a new connection is about to be closed. The Master will no + longer own this connection (and will usually close it). + + * VHOST_USER_SET_MEM_TABLE + + Id: 6 + Equivalent ioctl: VHOST_SET_MEM_TABLE + Master payload: memory regions description + + Sets the memory map regions on the slave so it can translate the vring + addresses. In the ancillary data there is an array of file descriptors + for each memory mapped region. The size and ordering of the fds matches + the number and ordering of memory regions. + + * VHOST_USER_SET_LOG_BASE + + Id: 7 + Equivalent ioctl: VHOST_SET_LOG_BASE + Master payload: u64 + + Sets the logging base address. + + * VHOST_USER_SET_LOG_FD + + Id: 8 + Equivalent ioctl: VHOST_SET_LOG_FD + Master payload: N/A + + Sets the logging file descriptor, which is passed as ancillary data. + + * VHOST_USER_SET_VRING_NUM + + Id: 9 + Equivalent ioctl: VHOST_SET_VRING_NUM + Master payload: vring state description + + Sets the number of vrings for this owner. + + * VHOST_USER_SET_VRING_ADDR + + Id: 10 + Equivalent ioctl: VHOST_SET_VRING_ADDR + Master payload: vring address description + Slave payload: N/A + + Sets the addresses of the different aspects of the vring. + + * VHOST_USER_SET_VRING_BASE + + Id: 11 + Equivalent ioctl: VHOST_SET_VRING_BASE + Master payload: vring state description + + Sets the base offset in the available vring. + + * VHOST_USER_GET_VRING_BASE + + Id: 12 + Equivalent ioctl: VHOST_USER_GET_VRING_BASE + Master payload: vring state description + Slave payload: vring state description + + Get the available vring base offset. + + * VHOST_USER_SET_VRING_KICK + + Id: 13 + Equivalent ioctl: VHOST_SET_VRING_KICK + Master payload: u64 + + Set the event file descriptor for adding buffers to the vring. It + is passed in the ancillary data. + Bits (0-7) of the payload contain the vring index. Bit 8 is the + invalid FD flag. This flag is set when there is no file descriptor + in the ancillary data. This signals that polling should be used + instead of waiting for a kick. + + * VHOST_USER_SET_VRING_CALL + + Id: 14 + Equivalent ioctl: VHOST_SET_VRING_CALL + Master payload: u64 + + Set the event file descriptor to signal when buffers are used. It + is passed in the ancillary data. + Bits (0-7) of the payload contain the vring index. Bit 8 is the + invalid FD flag. This flag is set when there is no file descriptor + in the ancillary data. This signals that polling will be used + instead of waiting for the call. + + * VHOST_USER_SET_VRING_ERR + + Id: 15 + Equivalent ioctl: VHOST_SET_VRING_ERR + Master payload: u64 + + Set the event file descriptor to signal when error occurs. It + is passed in the ancillary data. + Bits (0-7) of the payload contain the vring index. Bit 8 is the + invalid FD flag. This flag is set when there is no file descriptor + in the ancillary data. @@ -70,6 +70,12 @@ AddressSpace address_space_memory; MemoryRegion io_mem_rom, io_mem_notdirty; static MemoryRegion io_mem_unassigned; +/* RAM is pre-allocated and passed into qemu_ram_alloc_from_ptr */ +#define RAM_PREALLOC (1 << 0) + +/* RAM is mmap-ed with MAP_SHARED */ +#define RAM_SHARED (1 << 1) + #endif struct CPUTailQ cpus = QTAILQ_HEAD_INITIALIZER(cpus); @@ -1011,16 +1017,10 @@ static long gethugepagesize(const char *path) return fs.f_bsize; } -static sigjmp_buf sigjump; - -static void sigbus_handler(int signal) -{ - siglongjmp(sigjump, 1); -} - static void *file_ram_alloc(RAMBlock *block, ram_addr_t memory, - const char *path) + const char *path, + Error **errp) { char *filename; char *sanitized_name; @@ -1039,7 +1039,8 @@ static void *file_ram_alloc(RAMBlock *block, } if (kvm_enabled() && !kvm_has_sync_mmu()) { - fprintf(stderr, "host lacks kvm mmu notifiers, -mem-path unsupported\n"); + error_setg(errp, + "host lacks kvm mmu notifiers, -mem-path unsupported"); goto error; } @@ -1056,7 +1057,8 @@ static void *file_ram_alloc(RAMBlock *block, fd = mkstemp(filename); if (fd < 0) { - perror("unable to create backing store for hugepages"); + error_setg_errno(errp, errno, + "unable to create backing store for hugepages"); g_free(filename); goto error; } @@ -1071,53 +1073,22 @@ static void *file_ram_alloc(RAMBlock *block, * If anything goes wrong with it under other filesystems, * mmap will fail. */ - if (ftruncate(fd, memory)) + if (ftruncate(fd, memory)) { perror("ftruncate"); + } - area = mmap(0, memory, PROT_READ | PROT_WRITE, MAP_PRIVATE, fd, 0); + area = mmap(0, memory, PROT_READ | PROT_WRITE, + (block->flags & RAM_SHARED ? MAP_SHARED : MAP_PRIVATE), + fd, 0); if (area == MAP_FAILED) { - perror("file_ram_alloc: can't mmap RAM pages"); + error_setg_errno(errp, errno, + "unable to map backing store for hugepages"); close(fd); goto error; } if (mem_prealloc) { - int ret, i; - struct sigaction act, oldact; - sigset_t set, oldset; - - memset(&act, 0, sizeof(act)); - act.sa_handler = &sigbus_handler; - act.sa_flags = 0; - - ret = sigaction(SIGBUS, &act, &oldact); - if (ret) { - perror("file_ram_alloc: failed to install signal handler"); - exit(1); - } - - /* unblock SIGBUS */ - sigemptyset(&set); - sigaddset(&set, SIGBUS); - pthread_sigmask(SIG_UNBLOCK, &set, &oldset); - - if (sigsetjmp(sigjump, 1)) { - fprintf(stderr, "file_ram_alloc: failed to preallocate pages\n"); - exit(1); - } - - /* MAP_POPULATE silently ignores failures */ - for (i = 0; i < (memory/hpagesize); i++) { - memset(area + (hpagesize*i), 0, 1); - } - - ret = sigaction(SIGBUS, &oldact, NULL); - if (ret) { - perror("file_ram_alloc: failed to reinstall signal handler"); - exit(1); - } - - pthread_sigmask(SIG_SETMASK, &oldset, NULL); + os_mem_prealloc(fd, area, memory); } block->fd = fd; @@ -1129,14 +1100,6 @@ error: } return NULL; } -#else -static void *file_ram_alloc(RAMBlock *block, - ram_addr_t memory, - const char *path) -{ - fprintf(stderr, "-mem-path not supported on this host\n"); - exit(1); -} #endif static ram_addr_t find_ram_offset(ram_addr_t size) @@ -1262,56 +1225,30 @@ static int memory_try_enable_merging(void *addr, size_t len) return qemu_madvise(addr, len, QEMU_MADV_MERGEABLE); } -ram_addr_t qemu_ram_alloc_from_ptr(ram_addr_t size, void *host, - MemoryRegion *mr) +static ram_addr_t ram_block_add(RAMBlock *new_block) { - RAMBlock *block, *new_block; + RAMBlock *block; ram_addr_t old_ram_size, new_ram_size; old_ram_size = last_ram_offset() >> TARGET_PAGE_BITS; - size = TARGET_PAGE_ALIGN(size); - new_block = g_malloc0(sizeof(*new_block)); - new_block->fd = -1; - /* This assumes the iothread lock is taken here too. */ qemu_mutex_lock_ramlist(); - new_block->mr = mr; - new_block->offset = find_ram_offset(size); - if (host) { - new_block->host = host; - new_block->flags |= RAM_PREALLOC_MASK; - } else if (xen_enabled()) { - if (mem_path) { - fprintf(stderr, "-mem-path not supported with Xen\n"); - exit(1); - } - xen_ram_alloc(new_block->offset, size, mr); - } else { - if (mem_path) { - if (phys_mem_alloc != qemu_anon_ram_alloc) { - /* - * file_ram_alloc() needs to allocate just like - * phys_mem_alloc, but we haven't bothered to provide - * a hook there. - */ - fprintf(stderr, - "-mem-path not supported with this accelerator\n"); - exit(1); - } - new_block->host = file_ram_alloc(new_block, size, mem_path); - } - if (!new_block->host) { - new_block->host = phys_mem_alloc(size); + new_block->offset = find_ram_offset(new_block->length); + + if (!new_block->host) { + if (xen_enabled()) { + xen_ram_alloc(new_block->offset, new_block->length, new_block->mr); + } else { + new_block->host = phys_mem_alloc(new_block->length); if (!new_block->host) { fprintf(stderr, "Cannot set up guest memory '%s': %s\n", new_block->mr->name, strerror(errno)); exit(1); } - memory_try_enable_merging(new_block->host, size); + memory_try_enable_merging(new_block->host, new_block->length); } } - new_block->length = size; /* Keep the list sorted from biggest to smallest block. */ QTAILQ_FOREACH(block, &ram_list.blocks, next) { @@ -1339,18 +1276,75 @@ ram_addr_t qemu_ram_alloc_from_ptr(ram_addr_t size, void *host, old_ram_size, new_ram_size); } } - cpu_physical_memory_set_dirty_range(new_block->offset, size); + cpu_physical_memory_set_dirty_range(new_block->offset, new_block->length); - qemu_ram_setup_dump(new_block->host, size); - qemu_madvise(new_block->host, size, QEMU_MADV_HUGEPAGE); - qemu_madvise(new_block->host, size, QEMU_MADV_DONTFORK); + qemu_ram_setup_dump(new_block->host, new_block->length); + qemu_madvise(new_block->host, new_block->length, QEMU_MADV_HUGEPAGE); + qemu_madvise(new_block->host, new_block->length, QEMU_MADV_DONTFORK); - if (kvm_enabled()) - kvm_setup_guest_memory(new_block->host, size); + if (kvm_enabled()) { + kvm_setup_guest_memory(new_block->host, new_block->length); + } return new_block->offset; } +#ifdef __linux__ +ram_addr_t qemu_ram_alloc_from_file(ram_addr_t size, MemoryRegion *mr, + bool share, const char *mem_path, + Error **errp) +{ + RAMBlock *new_block; + + if (xen_enabled()) { + error_setg(errp, "-mem-path not supported with Xen"); + return -1; + } + + if (phys_mem_alloc != qemu_anon_ram_alloc) { + /* + * file_ram_alloc() needs to allocate just like + * phys_mem_alloc, but we haven't bothered to provide + * a hook there. + */ + error_setg(errp, + "-mem-path not supported with this accelerator"); + return -1; + } + + size = TARGET_PAGE_ALIGN(size); + new_block = g_malloc0(sizeof(*new_block)); + new_block->mr = mr; + new_block->length = size; + new_block->flags = share ? RAM_SHARED : 0; + new_block->host = file_ram_alloc(new_block, size, + mem_path, errp); + if (!new_block->host) { + g_free(new_block); + return -1; + } + + return ram_block_add(new_block); +} +#endif + +ram_addr_t qemu_ram_alloc_from_ptr(ram_addr_t size, void *host, + MemoryRegion *mr) +{ + RAMBlock *new_block; + + size = TARGET_PAGE_ALIGN(size); + new_block = g_malloc0(sizeof(*new_block)); + new_block->mr = mr; + new_block->length = size; + new_block->fd = -1; + new_block->host = host; + if (host) { + new_block->flags |= RAM_PREALLOC; + } + return ram_block_add(new_block); +} + ram_addr_t qemu_ram_alloc(ram_addr_t size, MemoryRegion *mr) { return qemu_ram_alloc_from_ptr(size, NULL, mr); @@ -1385,7 +1379,7 @@ void qemu_ram_free(ram_addr_t addr) QTAILQ_REMOVE(&ram_list.blocks, block, next); ram_list.mru_block = NULL; ram_list.version++; - if (block->flags & RAM_PREALLOC_MASK) { + if (block->flags & RAM_PREALLOC) { ; } else if (xen_enabled()) { xen_invalidate_map_cache_entry(block->host); @@ -1417,7 +1411,7 @@ void qemu_ram_remap(ram_addr_t addr, ram_addr_t length) offset = addr - block->offset; if (offset < block->length) { vaddr = block->host + offset; - if (block->flags & RAM_PREALLOC_MASK) { + if (block->flags & RAM_PREALLOC) { ; } else if (xen_enabled()) { abort(); @@ -1425,12 +1419,8 @@ void qemu_ram_remap(ram_addr_t addr, ram_addr_t length) flags = MAP_FIXED; munmap(vaddr, length); if (block->fd >= 0) { -#ifdef MAP_POPULATE - flags |= mem_prealloc ? MAP_POPULATE | MAP_SHARED : - MAP_PRIVATE; -#else - flags |= MAP_PRIVATE; -#endif + flags |= (block->flags & RAM_SHARED ? + MAP_SHARED : MAP_PRIVATE); area = mmap(vaddr, length, PROT_READ | PROT_WRITE, flags, block->fd, offset); } else { @@ -1460,6 +1450,13 @@ void qemu_ram_remap(ram_addr_t addr, ram_addr_t length) } #endif /* !_WIN32 */ +int qemu_get_ram_fd(ram_addr_t addr) +{ + RAMBlock *block = qemu_get_ram_block(addr); + + return block->fd; +} + /* Return a host pointer to ram allocated with qemu_ram_alloc. With the exception of the softmmu code in this file, this should only be used for local memory (e.g. video ram) that the device owns, diff --git a/hmp-commands.hx b/hmp-commands.hx index 5f1a677b85..d0943b1ff3 100644 --- a/hmp-commands.hx +++ b/hmp-commands.hx @@ -1211,7 +1211,7 @@ ETEXI { .name = "host_net_add", .args_type = "device:s,opts:s?", - .params = "tap|user|socket|vde|netmap|bridge|dump [options]", + .params = "tap|user|socket|vde|netmap|bridge|vhost-user|dump [options]", .help = "add host VLAN client", .mhandler.cmd = net_host_device_add, .command_completion = host_net_add_completion, @@ -1241,7 +1241,7 @@ ETEXI { .name = "netdev_add", .args_type = "netdev:O", - .params = "[user|tap|socket|vde|bridge|hubport|netmap],id=str[,prop=value][,...]", + .params = "[user|tap|socket|vde|bridge|hubport|netmap|vhost-user],id=str[,prop=value][,...]", .help = "add host network device", .mhandler.cmd = hmp_netdev_add, .command_completion = netdev_add_completion, @@ -22,6 +22,8 @@ #include "qemu/sockets.h" #include "monitor/monitor.h" #include "qapi/opts-visitor.h" +#include "qapi/string-output-visitor.h" +#include "qapi-visit.h" #include "ui/console.h" #include "block/qapi.h" #include "qemu-io.h" @@ -1676,3 +1678,37 @@ void hmp_object_del(Monitor *mon, const QDict *qdict) qmp_object_del(id, &err); hmp_handle_error(mon, &err); } + +void hmp_info_memdev(Monitor *mon, const QDict *qdict) +{ + Error *err = NULL; + MemdevList *memdev_list = qmp_query_memdev(&err); + MemdevList *m = memdev_list; + StringOutputVisitor *ov; + int i = 0; + + + while (m) { + ov = string_output_visitor_new(false); + visit_type_uint16List(string_output_get_visitor(ov), + &m->value->host_nodes, NULL, NULL); + monitor_printf(mon, "memory device %d\n", i); + monitor_printf(mon, " size: %" PRId64 "\n", m->value->size); + monitor_printf(mon, " merge: %s\n", + m->value->merge ? "true" : "false"); + monitor_printf(mon, " dump: %s\n", + m->value->dump ? "true" : "false"); + monitor_printf(mon, " prealloc: %s\n", + m->value->prealloc ? "true" : "false"); + monitor_printf(mon, " policy: %s\n", + HostMemPolicy_lookup[m->value->policy]); + monitor_printf(mon, " host nodes: %s\n", + string_output_get_string(ov)); + + string_output_visitor_cleanup(ov); + m = m->next; + i++; + } + + monitor_printf(mon, "\n"); +} @@ -93,6 +93,7 @@ void hmp_qemu_io(Monitor *mon, const QDict *qdict); void hmp_cpu_add(Monitor *mon, const QDict *qdict); void hmp_object_add(Monitor *mon, const QDict *qdict); void hmp_object_del(Monitor *mon, const QDict *qdict); +void hmp_info_memdev(Monitor *mon, const QDict *qdict); void object_add_completion(ReadLineState *rs, int nb_args, const char *str); void object_del_completion(ReadLineState *rs, int nb_args, const char *str); void device_add_completion(ReadLineState *rs, int nb_args, const char *str); diff --git a/hw/9pfs/virtio-9p.c b/hw/9pfs/virtio-9p.c index 9aa6725f09..5861a5b826 100644 --- a/hw/9pfs/virtio-9p.c +++ b/hw/9pfs/virtio-9p.c @@ -299,9 +299,7 @@ static int v9fs_xattr_fid_clunk(V9fsPDU *pdu, V9fsFidState *fidp) free_out: v9fs_string_free(&fidp->fs.xattr.name); free_value: - if (fidp->fs.xattr.value) { - g_free(fidp->fs.xattr.value); - } + g_free(fidp->fs.xattr.value); return retval; } diff --git a/hw/Makefile.objs b/hw/Makefile.objs index d178b65de4..52a1464051 100644 --- a/hw/Makefile.objs +++ b/hw/Makefile.objs @@ -29,6 +29,7 @@ devices-dirs-$(CONFIG_SOFTMMU) += usb/ devices-dirs-$(CONFIG_VIRTIO) += virtio/ devices-dirs-$(CONFIG_SOFTMMU) += watchdog/ devices-dirs-$(CONFIG_SOFTMMU) += xen/ +devices-dirs-$(CONFIG_MEM_HOTPLUG) += mem/ devices-dirs-y += core/ common-obj-y += $(devices-dirs-y) obj-y += $(devices-dirs-y) diff --git a/hw/acpi/Makefile.objs b/hw/acpi/Makefile.objs index 397d32babd..acd2389431 100644 --- a/hw/acpi/Makefile.objs +++ b/hw/acpi/Makefile.objs @@ -1 +1,3 @@ common-obj-$(CONFIG_ACPI) += core.o piix4.o ich9.o pcihp.o cpu_hotplug.o +common-obj-$(CONFIG_ACPI) += memory_hotplug.o +common-obj-$(CONFIG_ACPI) += acpi_interface.o diff --git a/hw/acpi/acpi_interface.c b/hw/acpi/acpi_interface.c new file mode 100644 index 0000000000..c181bb2262 --- /dev/null +++ b/hw/acpi/acpi_interface.c @@ -0,0 +1,15 @@ +#include "hw/acpi/acpi_dev_interface.h" +#include "qemu/module.h" + +static void register_types(void) +{ + static const TypeInfo acpi_dev_if_info = { + .name = TYPE_ACPI_DEVICE_IF, + .parent = TYPE_INTERFACE, + .class_size = sizeof(AcpiDeviceIfClass), + }; + + type_register_static(&acpi_dev_if_info); +} + +type_init(register_types) diff --git a/hw/acpi/ich9.c b/hw/acpi/ich9.c index 407ae8900c..e7d6c77b34 100644 --- a/hw/acpi/ich9.c +++ b/hw/acpi/ich9.c @@ -34,6 +34,7 @@ #include "exec/address-spaces.h" #include "hw/i386/ich9.h" +#include "hw/mem/pc-dimm.h" //#define DEBUG @@ -139,6 +140,23 @@ static int ich9_pm_post_load(void *opaque, int version_id) .offset = vmstate_offset_pointer(_state, _field, uint8_t), \ } +static bool vmstate_test_use_memhp(void *opaque) +{ + ICH9LPCPMRegs *s = opaque; + return s->acpi_memory_hotplug.is_enabled; +} + +static const VMStateDescription vmstate_memhp_state = { + .name = "ich9_pm/memhp", + .version_id = 1, + .minimum_version_id = 1, + .minimum_version_id_old = 1, + .fields = (VMStateField[]) { + VMSTATE_MEMORY_HOTPLUG(acpi_memory_hotplug, ICH9LPCPMRegs), + VMSTATE_END_OF_LIST() + } +}; + const VMStateDescription vmstate_ich9_pm = { .name = "ich9_pm", .version_id = 1, @@ -155,6 +173,13 @@ const VMStateDescription vmstate_ich9_pm = { VMSTATE_UINT32(smi_en, ICH9LPCPMRegs), VMSTATE_UINT32(smi_sts, ICH9LPCPMRegs), VMSTATE_END_OF_LIST() + }, + .subsections = (VMStateSubsection[]) { + { + .vmsd = &vmstate_memhp_state, + .needed = vmstate_test_use_memhp, + }, + VMSTATE_END_OF_LIST() } }; @@ -223,6 +248,11 @@ void ich9_pm_init(PCIDevice *lpc_pci, ICH9LPCPMRegs *pm, &pm->gpe_cpu, ICH9_CPU_HOTPLUG_IO_BASE); pm->cpu_added_notifier.notify = ich9_cpu_added_req; qemu_register_cpu_added_notifier(&pm->cpu_added_notifier); + + if (pm->acpi_memory_hotplug.is_enabled) { + acpi_memory_hotplug_init(pci_address_space_io(lpc_pci), OBJECT(lpc_pci), + &pm->acpi_memory_hotplug); + } } static void ich9_pm_get_gpe0_blk(Object *obj, Visitor *v, @@ -235,9 +265,25 @@ static void ich9_pm_get_gpe0_blk(Object *obj, Visitor *v, visit_type_uint32(v, &value, name, errp); } +static bool ich9_pm_get_memory_hotplug_support(Object *obj, Error **errp) +{ + ICH9LPCState *s = ICH9_LPC_DEVICE(obj); + + return s->pm.acpi_memory_hotplug.is_enabled; +} + +static void ich9_pm_set_memory_hotplug_support(Object *obj, bool value, + Error **errp) +{ + ICH9LPCState *s = ICH9_LPC_DEVICE(obj); + + s->pm.acpi_memory_hotplug.is_enabled = value; +} + void ich9_pm_add_properties(Object *obj, ICH9LPCPMRegs *pm, Error **errp) { static const uint32_t gpe0_len = ICH9_PMIO_GPE0_LEN; + pm->acpi_memory_hotplug.is_enabled = true; object_property_add_uint32_ptr(obj, ACPI_PM_PROP_PM_IO_BASE, &pm->pm_io_base, errp); @@ -246,4 +292,27 @@ void ich9_pm_add_properties(Object *obj, ICH9LPCPMRegs *pm, Error **errp) NULL, NULL, pm, NULL); object_property_add_uint32_ptr(obj, ACPI_PM_PROP_GPE0_BLK_LEN, &gpe0_len, errp); + object_property_add_bool(obj, "memory-hotplug-support", + ich9_pm_get_memory_hotplug_support, + ich9_pm_set_memory_hotplug_support, + NULL); +} + +void ich9_pm_device_plug_cb(ICH9LPCPMRegs *pm, DeviceState *dev, Error **errp) +{ + if (pm->acpi_memory_hotplug.is_enabled && + object_dynamic_cast(OBJECT(dev), TYPE_PC_DIMM)) { + acpi_memory_plug_cb(&pm->acpi_regs, pm->irq, &pm->acpi_memory_hotplug, + dev, errp); + } else { + error_setg(errp, "acpi: device plug request for not supported device" + " type: %s", object_get_typename(OBJECT(dev))); + } +} + +void ich9_pm_ospm_status(AcpiDeviceIf *adev, ACPIOSTInfoList ***list) +{ + ICH9LPCState *s = ICH9_LPC_DEVICE(adev); + + acpi_memory_ospm_status(&s->pm.acpi_memory_hotplug, list); } diff --git a/hw/acpi/memory_hotplug.c b/hw/acpi/memory_hotplug.c new file mode 100644 index 0000000000..de4ddc204f --- /dev/null +++ b/hw/acpi/memory_hotplug.c @@ -0,0 +1,245 @@ +#include "hw/acpi/memory_hotplug.h" +#include "hw/acpi/pc-hotplug.h" +#include "hw/mem/pc-dimm.h" +#include "hw/boards.h" +#include "trace.h" +#include "qapi-visit.h" +#include "monitor/monitor.h" +#include "qapi/dealloc-visitor.h" +#include "qapi/qmp-output-visitor.h" + +static ACPIOSTInfo *acpi_memory_device_status(int slot, MemStatus *mdev) +{ + ACPIOSTInfo *info = g_new0(ACPIOSTInfo, 1); + + info->slot_type = ACPI_SLOT_TYPE_DIMM; + info->slot = g_strdup_printf("%d", slot); + info->source = mdev->ost_event; + info->status = mdev->ost_status; + if (mdev->dimm) { + DeviceState *dev = DEVICE(mdev->dimm); + if (dev->id) { + info->device = g_strdup(dev->id); + info->has_device = true; + } + } + return info; +} + +void acpi_memory_ospm_status(MemHotplugState *mem_st, ACPIOSTInfoList ***list) +{ + int i; + + for (i = 0; i < mem_st->dev_count; i++) { + ACPIOSTInfoList *elem = g_new0(ACPIOSTInfoList, 1); + elem->value = acpi_memory_device_status(i, &mem_st->devs[i]); + elem->next = NULL; + **list = elem; + *list = &elem->next; + } +} + +static void acpi_memory_ost_mon_event(const MemHotplugState *mem_st) +{ + Visitor *v; + QObject *out_info; + QapiDeallocVisitor *md; + QmpOutputVisitor *mo = qmp_output_visitor_new(); + MemStatus *mdev = &mem_st->devs[mem_st->selector]; + ACPIOSTInfo *info = acpi_memory_device_status(mem_st->selector, mdev); + + v = qmp_output_get_visitor(mo); + visit_type_ACPIOSTInfo(v, &info, "unused", NULL); + + out_info = qmp_output_get_qobject(mo); + monitor_protocol_event(QEVENT_ACPI_OST, out_info); + qobject_decref(out_info); + + qmp_output_visitor_cleanup(mo); + md = qapi_dealloc_visitor_new(); + v = qapi_dealloc_get_visitor(md); + visit_type_ACPIOSTInfo(v, &info, "unused", NULL); + qapi_dealloc_visitor_cleanup(md); +} + +static uint64_t acpi_memory_hotplug_read(void *opaque, hwaddr addr, + unsigned int size) +{ + uint32_t val = 0; + MemHotplugState *mem_st = opaque; + MemStatus *mdev; + Object *o; + + if (mem_st->selector >= mem_st->dev_count) { + trace_mhp_acpi_invalid_slot_selected(mem_st->selector); + return 0; + } + + mdev = &mem_st->devs[mem_st->selector]; + o = OBJECT(mdev->dimm); + switch (addr) { + case 0x0: /* Lo part of phys address where DIMM is mapped */ + val = o ? object_property_get_int(o, PC_DIMM_ADDR_PROP, NULL) : 0; + trace_mhp_acpi_read_addr_lo(mem_st->selector, val); + break; + case 0x4: /* Hi part of phys address where DIMM is mapped */ + val = o ? object_property_get_int(o, PC_DIMM_ADDR_PROP, NULL) >> 32 : 0; + trace_mhp_acpi_read_addr_hi(mem_st->selector, val); + break; + case 0x8: /* Lo part of DIMM size */ + val = o ? object_property_get_int(o, PC_DIMM_SIZE_PROP, NULL) : 0; + trace_mhp_acpi_read_size_lo(mem_st->selector, val); + break; + case 0xc: /* Hi part of DIMM size */ + val = o ? object_property_get_int(o, PC_DIMM_SIZE_PROP, NULL) >> 32 : 0; + trace_mhp_acpi_read_size_hi(mem_st->selector, val); + break; + case 0x10: /* node proximity for _PXM method */ + val = o ? object_property_get_int(o, PC_DIMM_NODE_PROP, NULL) : 0; + trace_mhp_acpi_read_pxm(mem_st->selector, val); + break; + case 0x14: /* pack and return is_* fields */ + val |= mdev->is_enabled ? 1 : 0; + val |= mdev->is_inserting ? 2 : 0; + trace_mhp_acpi_read_flags(mem_st->selector, val); + break; + default: + val = ~0; + break; + } + return val; +} + +static void acpi_memory_hotplug_write(void *opaque, hwaddr addr, uint64_t data, + unsigned int size) +{ + MemHotplugState *mem_st = opaque; + MemStatus *mdev; + + if (!mem_st->dev_count) { + return; + } + + if (addr) { + if (mem_st->selector >= mem_st->dev_count) { + trace_mhp_acpi_invalid_slot_selected(mem_st->selector); + return; + } + } + + switch (addr) { + case 0x0: /* DIMM slot selector */ + mem_st->selector = data; + trace_mhp_acpi_write_slot(mem_st->selector); + break; + case 0x4: /* _OST event */ + mdev = &mem_st->devs[mem_st->selector]; + if (data == 1) { + /* TODO: handle device insert OST event */ + } else if (data == 3) { + /* TODO: handle device remove OST event */ + } + mdev->ost_event = data; + trace_mhp_acpi_write_ost_ev(mem_st->selector, mdev->ost_event); + break; + case 0x8: /* _OST status */ + mdev = &mem_st->devs[mem_st->selector]; + mdev->ost_status = data; + trace_mhp_acpi_write_ost_status(mem_st->selector, mdev->ost_status); + /* TODO: implement memory removal on guest signal */ + acpi_memory_ost_mon_event(mem_st); + break; + case 0x14: + mdev = &mem_st->devs[mem_st->selector]; + if (data & 2) { /* clear insert event */ + mdev->is_inserting = false; + trace_mhp_acpi_clear_insert_evt(mem_st->selector); + } + break; + } + +} +static const MemoryRegionOps acpi_memory_hotplug_ops = { + .read = acpi_memory_hotplug_read, + .write = acpi_memory_hotplug_write, + .endianness = DEVICE_LITTLE_ENDIAN, + .valid = { + .min_access_size = 1, + .max_access_size = 4, + }, +}; + +void acpi_memory_hotplug_init(MemoryRegion *as, Object *owner, + MemHotplugState *state) +{ + MachineState *machine = MACHINE(qdev_get_machine()); + + state->dev_count = machine->ram_slots; + if (!state->dev_count) { + return; + } + + state->devs = g_malloc0(sizeof(*state->devs) * state->dev_count); + memory_region_init_io(&state->io, owner, &acpi_memory_hotplug_ops, state, + "apci-mem-hotplug", ACPI_MEMORY_HOTPLUG_IO_LEN); + memory_region_add_subregion(as, ACPI_MEMORY_HOTPLUG_BASE, &state->io); +} + +void acpi_memory_plug_cb(ACPIREGS *ar, qemu_irq irq, MemHotplugState *mem_st, + DeviceState *dev, Error **errp) +{ + MemStatus *mdev; + Error *local_err = NULL; + int slot = object_property_get_int(OBJECT(dev), "slot", &local_err); + + if (local_err) { + error_propagate(errp, local_err); + return; + } + + if (slot >= mem_st->dev_count) { + char *dev_path = object_get_canonical_path(OBJECT(dev)); + error_setg(errp, "acpi_memory_plug_cb: " + "device [%s] returned invalid memory slot[%d]", + dev_path, slot); + g_free(dev_path); + return; + } + + mdev = &mem_st->devs[slot]; + mdev->dimm = dev; + mdev->is_enabled = true; + mdev->is_inserting = true; + + /* do ACPI magic */ + ar->gpe.sts[0] |= ACPI_MEMORY_HOTPLUG_STATUS; + acpi_update_sci(ar, irq); + return; +} + +static const VMStateDescription vmstate_memhp_sts = { + .name = "memory hotplug device state", + .version_id = 1, + .minimum_version_id = 1, + .minimum_version_id_old = 1, + .fields = (VMStateField[]) { + VMSTATE_BOOL(is_enabled, MemStatus), + VMSTATE_BOOL(is_inserting, MemStatus), + VMSTATE_UINT32(ost_event, MemStatus), + VMSTATE_UINT32(ost_status, MemStatus), + VMSTATE_END_OF_LIST() + } +}; + +const VMStateDescription vmstate_memory_hotplug = { + .name = "memory hotplug state", + .version_id = 1, + .minimum_version_id = 1, + .minimum_version_id_old = 1, + .fields = (VMStateField[]) { + VMSTATE_UINT32(selector, MemHotplugState), + VMSTATE_STRUCT_VARRAY_POINTER_UINT32(devs, MemHotplugState, dev_count, + vmstate_memhp_sts, MemStatus), + VMSTATE_END_OF_LIST() + } +}; diff --git a/hw/acpi/piix4.c b/hw/acpi/piix4.c index 252bbf2c77..b72b34e5c9 100644 --- a/hw/acpi/piix4.c +++ b/hw/acpi/piix4.c @@ -33,6 +33,9 @@ #include "hw/acpi/pcihp.h" #include "hw/acpi/cpu_hotplug.h" #include "hw/hotplug.h" +#include "hw/mem/pc-dimm.h" +#include "hw/acpi/memory_hotplug.h" +#include "hw/acpi/acpi_dev_interface.h" //#define DEBUG @@ -81,6 +84,8 @@ typedef struct PIIX4PMState { AcpiCpuHotplug gpe_cpu; Notifier cpu_added_notifier; + + MemHotplugState acpi_memory_hotplug; } PIIX4PMState; #define TYPE_PIIX4_PM "PIIX4_PM" @@ -244,6 +249,23 @@ static bool vmstate_test_no_use_acpi_pci_hotplug(void *opaque, int version_id) return !s->use_acpi_pci_hotplug; } +static bool vmstate_test_use_memhp(void *opaque) +{ + PIIX4PMState *s = opaque; + return s->acpi_memory_hotplug.is_enabled; +} + +static const VMStateDescription vmstate_memhp_state = { + .name = "piix4_pm/memhp", + .version_id = 1, + .minimum_version_id = 1, + .minimum_version_id_old = 1, + .fields = (VMStateField[]) { + VMSTATE_MEMORY_HOTPLUG(acpi_memory_hotplug, PIIX4PMState), + VMSTATE_END_OF_LIST() + } +}; + /* qemu-kvm 1.2 uses version 3 but advertised as 2 * To support incoming qemu-kvm 1.2 migration, change version_id * and minimum_version_id to 2 below (which breaks migration from @@ -275,6 +297,13 @@ static const VMStateDescription vmstate_acpi = { VMSTATE_PCI_HOTPLUG(acpi_pci_hotplug, PIIX4PMState, vmstate_test_use_acpi_pci_hotplug), VMSTATE_END_OF_LIST() + }, + .subsections = (VMStateSubsection[]) { + { + .vmsd = &vmstate_memhp_state, + .needed = vmstate_test_use_memhp, + }, + VMSTATE_END_OF_LIST() } }; @@ -308,19 +337,35 @@ static void piix4_pm_powerdown_req(Notifier *n, void *opaque) acpi_pm1_evt_power_down(&s->ar); } -static void piix4_pci_device_plug_cb(HotplugHandler *hotplug_dev, - DeviceState *dev, Error **errp) +static void piix4_device_plug_cb(HotplugHandler *hotplug_dev, + DeviceState *dev, Error **errp) { PIIX4PMState *s = PIIX4_PM(hotplug_dev); - acpi_pcihp_device_plug_cb(&s->ar, s->irq, &s->acpi_pci_hotplug, dev, errp); + + if (s->acpi_memory_hotplug.is_enabled && + object_dynamic_cast(OBJECT(dev), TYPE_PC_DIMM)) { + acpi_memory_plug_cb(&s->ar, s->irq, &s->acpi_memory_hotplug, dev, errp); + } else if (object_dynamic_cast(OBJECT(dev), TYPE_PCI_DEVICE)) { + acpi_pcihp_device_plug_cb(&s->ar, s->irq, &s->acpi_pci_hotplug, dev, + errp); + } else { + error_setg(errp, "acpi: device plug request for not supported device" + " type: %s", object_get_typename(OBJECT(dev))); + } } -static void piix4_pci_device_unplug_cb(HotplugHandler *hotplug_dev, - DeviceState *dev, Error **errp) +static void piix4_device_unplug_cb(HotplugHandler *hotplug_dev, + DeviceState *dev, Error **errp) { PIIX4PMState *s = PIIX4_PM(hotplug_dev); - acpi_pcihp_device_unplug_cb(&s->ar, s->irq, &s->acpi_pci_hotplug, dev, - errp); + + if (object_dynamic_cast(OBJECT(dev), TYPE_PCI_DEVICE)) { + acpi_pcihp_device_unplug_cb(&s->ar, s->irq, &s->acpi_pci_hotplug, dev, + errp); + } else { + error_setg(errp, "acpi: device unplug request for not supported device" + " type: %s", object_get_typename(OBJECT(dev))); + } } static void piix4_update_bus_hotplug(PCIBus *pci_bus, void *opaque) @@ -439,13 +484,17 @@ Object *piix4_pm_find(void) I2CBus *piix4_pm_init(PCIBus *bus, int devfn, uint32_t smb_io_base, qemu_irq sci_irq, qemu_irq smi_irq, - int kvm_enabled, FWCfgState *fw_cfg) + int kvm_enabled, FWCfgState *fw_cfg, + DeviceState **piix4_pm) { DeviceState *dev; PIIX4PMState *s; dev = DEVICE(pci_create(bus, devfn, TYPE_PIIX4_PM)); qdev_prop_set_uint32(dev, "smb_io_base", smb_io_base); + if (piix4_pm) { + *piix4_pm = dev; + } s = PIIX4_PM(dev); s->irq = sci_irq; @@ -518,6 +567,17 @@ static void piix4_acpi_system_hot_add_init(MemoryRegion *parent, PIIX4_CPU_HOTPLUG_IO_BASE); s->cpu_added_notifier.notify = piix4_cpu_added_req; qemu_register_cpu_added_notifier(&s->cpu_added_notifier); + + if (s->acpi_memory_hotplug.is_enabled) { + acpi_memory_hotplug_init(parent, OBJECT(s), &s->acpi_memory_hotplug); + } +} + +static void piix4_ospm_status(AcpiDeviceIf *adev, ACPIOSTInfoList ***list) +{ + PIIX4PMState *s = PIIX4_PM(adev); + + acpi_memory_ospm_status(&s->acpi_memory_hotplug, list); } static Property piix4_pm_properties[] = { @@ -527,6 +587,8 @@ static Property piix4_pm_properties[] = { DEFINE_PROP_UINT8(ACPI_PM_PROP_S4_VAL, PIIX4PMState, s4_val, 2), DEFINE_PROP_BOOL("acpi-pci-hotplug-with-bridge-support", PIIX4PMState, use_acpi_pci_hotplug, true), + DEFINE_PROP_BOOL("memory-hotplug-support", PIIX4PMState, + acpi_memory_hotplug.is_enabled, true), DEFINE_PROP_END_OF_LIST(), }; @@ -535,6 +597,7 @@ static void piix4_pm_class_init(ObjectClass *klass, void *data) DeviceClass *dc = DEVICE_CLASS(klass); PCIDeviceClass *k = PCI_DEVICE_CLASS(klass); HotplugHandlerClass *hc = HOTPLUG_HANDLER_CLASS(klass); + AcpiDeviceIfClass *adevc = ACPI_DEVICE_IF_CLASS(klass); k->init = piix4_pm_initfn; k->config_write = pm_write_config; @@ -551,8 +614,9 @@ static void piix4_pm_class_init(ObjectClass *klass, void *data) */ dc->cannot_instantiate_with_device_add_yet = true; dc->hotpluggable = false; - hc->plug = piix4_pci_device_plug_cb; - hc->unplug = piix4_pci_device_unplug_cb; + hc->plug = piix4_device_plug_cb; + hc->unplug = piix4_device_unplug_cb; + adevc->ospm_status = piix4_ospm_status; } static const TypeInfo piix4_pm_info = { @@ -562,6 +626,7 @@ static const TypeInfo piix4_pm_info = { .class_init = piix4_pm_class_init, .interfaces = (InterfaceInfo[]) { { TYPE_HOTPLUG_HANDLER }, + { TYPE_ACPI_DEVICE_IF }, { } } }; diff --git a/hw/core/qdev.c b/hw/core/qdev.c index e65a5aa3a8..b9cd4fc814 100644 --- a/hw/core/qdev.c +++ b/hw/core/qdev.c @@ -34,6 +34,7 @@ #include "qapi/qmp/qjson.h" #include "monitor/monitor.h" #include "hw/hotplug.h" +#include "hw/boards.h" int qdev_hotplug = 0; static bool qdev_hot_added = false; @@ -567,32 +568,35 @@ static void bus_set_realized(Object *obj, bool value, Error **errp) { BusState *bus = BUS(obj); BusClass *bc = BUS_GET_CLASS(bus); + BusChild *kid; Error *local_err = NULL; if (value && !bus->realized) { if (bc->realize) { bc->realize(bus, &local_err); + } + /* TODO: recursive realization */ + } else if (!value && bus->realized) { + QTAILQ_FOREACH(kid, &bus->children, sibling) { + DeviceState *dev = kid->child; + object_property_set_bool(OBJECT(dev), false, "realized", + &local_err); if (local_err != NULL) { - goto error; + break; } - } - } else if (!value && bus->realized) { - if (bc->unrealize) { + if (bc->unrealize && local_err == NULL) { bc->unrealize(bus, &local_err); - - if (local_err != NULL) { - goto error; - } } } - bus->realized = value; - return; + if (local_err != NULL) { + error_propagate(errp, local_err); + return; + } -error: - error_propagate(errp, local_err); + bus->realized = value; } void qbus_create_inplace(void *bus, size_t size, const char *typename, @@ -813,6 +817,18 @@ static void device_set_realized(Object *obj, bool value, Error **errp) local_err == NULL) { hotplug_handler_plug(dev->parent_bus->hotplug_handler, dev, &local_err); + } else if (local_err == NULL && + object_dynamic_cast(qdev_get_machine(), TYPE_MACHINE)) { + HotplugHandler *hotplug_ctrl; + MachineState *machine = MACHINE(qdev_get_machine()); + MachineClass *mc = MACHINE_GET_CLASS(machine); + + if (mc->get_hotplug_handler) { + hotplug_ctrl = mc->get_hotplug_handler(machine, dev); + if (hotplug_ctrl) { + hotplug_handler_plug(hotplug_ctrl, dev, &local_err); + } + } } if (qdev_get_vmsd(dev) && local_err == NULL) { @@ -865,6 +881,20 @@ static bool device_get_hotpluggable(Object *obj, Error **errp) dev->parent_bus->allow_hotplug); } +static bool device_get_hotplugged(Object *obj, Error **err) +{ + DeviceState *dev = DEVICE(obj); + + return dev->hotplugged; +} + +static void device_set_hotplugged(Object *obj, bool value, Error **err) +{ + DeviceState *dev = DEVICE(obj); + + dev->hotplugged = value; +} + static void device_initfn(Object *obj) { DeviceState *dev = DEVICE(obj); @@ -883,6 +913,9 @@ static void device_initfn(Object *obj) device_get_realized, device_set_realized, NULL); object_property_add_bool(obj, "hotpluggable", device_get_hotpluggable, NULL, NULL); + object_property_add_bool(obj, "hotplugged", + device_get_hotplugged, device_set_hotplugged, + &error_abort); class = object_get_class(OBJECT(dev)); do { diff --git a/hw/i386/Makefile.objs b/hw/i386/Makefile.objs index f66c349508..48014abf0a 100644 --- a/hw/i386/Makefile.objs +++ b/hw/i386/Makefile.objs @@ -9,7 +9,8 @@ obj-y += acpi-build.o obj-y += bios-linker-loader.o hw/i386/acpi-build.o: hw/i386/acpi-build.c hw/i386/acpi-dsdt.hex \ hw/i386/ssdt-proc.hex hw/i386/ssdt-pcihp.hex hw/i386/ssdt-misc.hex \ - hw/i386/acpi-dsdt.hex hw/i386/q35-acpi-dsdt.hex + hw/i386/acpi-dsdt.hex hw/i386/q35-acpi-dsdt.hex \ + hw/i386/q35-acpi-dsdt.hex hw/i386/ssdt-mem.hex iasl-option=$(shell if test -z "`$(1) $(2) 2>&1 > /dev/null`" \ ; then echo "$(2)"; else echo "$(3)"; fi ;) @@ -17,7 +18,7 @@ iasl-option=$(shell if test -z "`$(1) $(2) 2>&1 > /dev/null`" \ ifdef IASL #IASL Present. Generate hex files from .dsl hw/i386/%.hex: $(SRC_PATH)/hw/i386/%.dsl $(SRC_PATH)/scripts/acpi_extract_preprocess.py $(SRC_PATH)/scripts/acpi_extract.py - $(call quiet-command, cpp -P $(QEMU_DGFLAGS) $(QEMU_INCLUDES) $< -o $*.dsl.i.orig, " CPP $(TARGET_DIR)$*.dsl.i.orig") + $(call quiet-command, $(CPP) -x c -P $(QEMU_DGFLAGS) $(QEMU_INCLUDES) $< -o $*.dsl.i.orig, " CPP $(TARGET_DIR)$*.dsl.i.orig") $(call quiet-command, $(PYTHON) $(SRC_PATH)/scripts/acpi_extract_preprocess.py $*.dsl.i.orig > $*.dsl.i, " ACPI_PREPROCESS $(TARGET_DIR)$*.dsl.i") $(call quiet-command, $(IASL) $(call iasl-option,$(IASL),-Pn,) -vs -l -tc -p $* $*.dsl.i $(if $(V), , > /dev/null) 2>&1 ," IASL $(TARGET_DIR)$*.dsl.i") $(call quiet-command, $(PYTHON) $(SRC_PATH)/scripts/acpi_extract.py $*.lst > $*.off, " ACPI_EXTRACT $(TARGET_DIR)$*.off") diff --git a/hw/i386/acpi-build.c b/hw/i386/acpi-build.c index 1e0aa09bc8..ebc5f034e3 100644 --- a/hw/i386/acpi-build.c +++ b/hw/i386/acpi-build.c @@ -37,6 +37,7 @@ #include "bios-linker-loader.h" #include "hw/loader.h" #include "hw/isa/isa.h" +#include "hw/acpi/memory_hotplug.h" /* Supported chipsets: */ #include "hw/acpi/piix4.h" @@ -667,6 +668,14 @@ static inline char acpi_get_hex(uint32_t val) #define ACPI_PCIQXL_SIZEOF (*ssdt_pciqxl_end - *ssdt_pciqxl_start) #define ACPI_PCIQXL_AML (ssdp_pcihp_aml + *ssdt_pciqxl_start) +#include "hw/i386/ssdt-mem.hex" + +/* 0x5B 0x82 DeviceOp PkgLength NameString DimmID */ +#define ACPI_MEM_OFFSET_HEX (*ssdt_mem_name - *ssdt_mem_start + 2) +#define ACPI_MEM_OFFSET_ID (*ssdt_mem_id - *ssdt_mem_start + 7) +#define ACPI_MEM_SIZEOF (*ssdt_mem_end - *ssdt_mem_start) +#define ACPI_MEM_AML (ssdm_mem_aml + *ssdt_mem_start) + #define ACPI_SSDT_SIGNATURE 0x54445353 /* SSDT */ #define ACPI_SSDT_HEADER_LENGTH 36 @@ -1003,6 +1012,8 @@ build_ssdt(GArray *table_data, GArray *linker, AcpiCpuInfo *cpu, AcpiPmInfo *pm, AcpiMiscInfo *misc, PcPciInfo *pci, PcGuestInfo *guest_info) { + MachineState *machine = MACHINE(qdev_get_machine()); + uint32_t nr_mem = machine->ram_slots; unsigned acpi_cpus = guest_info->apic_id_limit; int ssdt_start = table_data->len; uint8_t *ssdt_ptr; @@ -1031,6 +1042,9 @@ build_ssdt(GArray *table_data, GArray *linker, ACPI_BUILD_SET_LE(ssdt_ptr, sizeof(ssdp_misc_aml), ssdt_isa_pest[0], 16, misc->pvpanic_port); + ACPI_BUILD_SET_LE(ssdt_ptr, sizeof(ssdp_misc_aml), + ssdt_mctrl_nr_slots[0], 32, nr_mem); + { GArray *sb_scope = build_alloc_array(); uint8_t op = 0x10; /* ScopeOp */ @@ -1084,6 +1098,27 @@ build_ssdt(GArray *table_data, GArray *linker, build_free_array(package); } + if (nr_mem) { + assert(nr_mem <= ACPI_MAX_RAM_SLOTS); + /* build memory devices */ + for (i = 0; i < nr_mem; i++) { + char id[3]; + uint8_t *mem = acpi_data_push(sb_scope, ACPI_MEM_SIZEOF); + + snprintf(id, sizeof(id), "%02X", i); + memcpy(mem, ACPI_MEM_AML, ACPI_MEM_SIZEOF); + memcpy(mem + ACPI_MEM_OFFSET_HEX, id, 2); + memcpy(mem + ACPI_MEM_OFFSET_ID, id, 2); + } + + /* build Method(MEMORY_SLOT_NOTIFY_METHOD, 2) { + * If (LEqual(Arg0, 0x00)) {Notify(MP00, Arg1)} ... + */ + build_append_notify_method(sb_scope, + stringify(MEMORY_SLOT_NOTIFY_METHOD), + "MP%0.02X", nr_mem); + } + { AcpiBuildPciBusHotplugState hotplug_state; Object *pci_host; @@ -1132,15 +1167,22 @@ build_hpet(GArray *table_data, GArray *linker) (void *)hpet, "HPET", sizeof(*hpet), 1); } +typedef enum { + MEM_AFFINITY_NOFLAGS = 0, + MEM_AFFINITY_ENABLED = (1 << 0), + MEM_AFFINITY_HOTPLUGGABLE = (1 << 1), + MEM_AFFINITY_NON_VOLATILE = (1 << 2), +} MemoryAffinityFlags; + static void -acpi_build_srat_memory(AcpiSratMemoryAffinity *numamem, - uint64_t base, uint64_t len, int node, int enabled) +acpi_build_srat_memory(AcpiSratMemoryAffinity *numamem, uint64_t base, + uint64_t len, int node, MemoryAffinityFlags flags) { numamem->type = ACPI_SRAT_MEMORY; numamem->length = sizeof(*numamem); memset(numamem->proximity, 0, 4); numamem->proximity[0] = node; - numamem->flags = cpu_to_le32(!!enabled); + numamem->flags = cpu_to_le32(flags); numamem->base_addr = cpu_to_le64(base); numamem->range_length = cpu_to_le64(len); } @@ -1157,6 +1199,10 @@ build_srat(GArray *table_data, GArray *linker, uint64_t curnode; int srat_start, numa_start, slots; uint64_t mem_len, mem_base, next_base; + PCMachineState *pcms = PC_MACHINE(qdev_get_machine()); + ram_addr_t hotplugabble_address_space_size = + object_property_get_int(OBJECT(pcms), PC_MACHINE_MEMHP_REGION_SIZE, + NULL); srat_start = table_data->len; @@ -1188,7 +1234,7 @@ build_srat(GArray *table_data, GArray *linker, numa_start = table_data->len; numamem = acpi_data_push(table_data, sizeof *numamem); - acpi_build_srat_memory(numamem, 0, 640*1024, 0, 1); + acpi_build_srat_memory(numamem, 0, 640*1024, 0, MEM_AFFINITY_ENABLED); next_base = 1024 * 1024; for (i = 1; i < guest_info->numa_nodes + 1; ++i) { mem_base = next_base; @@ -1204,19 +1250,34 @@ build_srat(GArray *table_data, GArray *linker, mem_len -= next_base - guest_info->ram_size_below_4g; if (mem_len > 0) { numamem = acpi_data_push(table_data, sizeof *numamem); - acpi_build_srat_memory(numamem, mem_base, mem_len, i-1, 1); + acpi_build_srat_memory(numamem, mem_base, mem_len, i - 1, + MEM_AFFINITY_ENABLED); } mem_base = 1ULL << 32; mem_len = next_base - guest_info->ram_size_below_4g; next_base += (1ULL << 32) - guest_info->ram_size_below_4g; } numamem = acpi_data_push(table_data, sizeof *numamem); - acpi_build_srat_memory(numamem, mem_base, mem_len, i - 1, 1); + acpi_build_srat_memory(numamem, mem_base, mem_len, i - 1, + MEM_AFFINITY_ENABLED); } slots = (table_data->len - numa_start) / sizeof *numamem; for (; slots < guest_info->numa_nodes + 2; slots++) { numamem = acpi_data_push(table_data, sizeof *numamem); - acpi_build_srat_memory(numamem, 0, 0, 0, 0); + acpi_build_srat_memory(numamem, 0, 0, 0, MEM_AFFINITY_NOFLAGS); + } + + /* + * Entry is required for Windows to enable memory hotplug in OS. + * Memory devices may override proximity set by this entry, + * providing _PXM method if necessary. + */ + if (hotplugabble_address_space_size) { + numamem = acpi_data_push(table_data, sizeof *numamem); + acpi_build_srat_memory(numamem, pcms->hotplug_memory_base, + hotplugabble_address_space_size, 0, + MEM_AFFINITY_HOTPLUGGABLE | + MEM_AFFINITY_ENABLED); } build_header(linker, table_data, diff --git a/hw/i386/acpi-dsdt.dsl b/hw/i386/acpi-dsdt.dsl index 0a1e252d21..3cc0ea0f9a 100644 --- a/hw/i386/acpi-dsdt.dsl +++ b/hw/i386/acpi-dsdt.dsl @@ -306,7 +306,7 @@ DefinitionBlock ( } } -#include "hw/acpi/cpu_hotplug_defs.h" +#include "hw/acpi/pc-hotplug.h" #define CPU_STATUS_BASE PIIX4_CPU_HOTPLUG_IO_BASE #include "acpi-dsdt-cpu-hotplug.dsl" @@ -314,6 +314,7 @@ DefinitionBlock ( /**************************************************************** * General purpose events ****************************************************************/ + External(\_SB.PCI0.MEMORY_HOPTLUG_DEVICE.MEMORY_SLOT_SCAN_METHOD, MethodObj) Scope(\_GPE) { Name(_HID, "ACPI0006") @@ -330,7 +331,9 @@ DefinitionBlock ( // CPU hotplug event \_SB.PRSC() } - Method(_L03) { + Method(_E03) { + // Memory hotplug event + \_SB.PCI0.MEMORY_HOPTLUG_DEVICE.MEMORY_SLOT_SCAN_METHOD() } Method(_L04) { } diff --git a/hw/i386/acpi-dsdt.hex.generated b/hw/i386/acpi-dsdt.hex.generated index e61572a5dd..ee490e89c3 100644 --- a/hw/i386/acpi-dsdt.hex.generated +++ b/hw/i386/acpi-dsdt.hex.generated @@ -3,12 +3,12 @@ static unsigned char AcpiDsdtAmlCode[] = { 0x53, 0x44, 0x54, -0x80, +0x93, 0x11, 0x0, 0x0, 0x1, -0x60, +0xf5, 0x42, 0x58, 0x50, @@ -4285,8 +4285,8 @@ static unsigned char AcpiDsdtAmlCode[] = { 0xa, 0xb, 0x10, -0x42, -0xc, +0x45, +0xd, 0x5f, 0x47, 0x50, @@ -4389,12 +4389,31 @@ static unsigned char AcpiDsdtAmlCode[] = { 0x53, 0x43, 0x14, -0x6, +0x19, 0x5f, -0x4c, +0x45, 0x30, 0x33, 0x0, +0x5c, +0x2f, +0x4, +0x5f, +0x53, +0x42, +0x5f, +0x50, +0x43, +0x49, +0x30, +0x4d, +0x48, +0x50, +0x44, +0x4d, +0x53, +0x43, +0x4e, 0x14, 0x6, 0x5f, diff --git a/hw/i386/pc.c b/hw/i386/pc.c index 3e0ecf140d..67eb45089e 100644 --- a/hw/i386/pc.c +++ b/hw/i386/pc.c @@ -58,6 +58,9 @@ #include "hw/boards.h" #include "hw/pci/pci_host.h" #include "acpi-build.h" +#include "hw/mem/pc-dimm.h" +#include "trace.h" +#include "qapi/visitor.h" /* debug PC/ISA interrupts */ //#define DEBUG_IRQ @@ -701,14 +704,14 @@ static FWCfgState *bochs_bios_init(void) unsigned int apic_id = x86_cpu_apic_id_from_index(i); assert(apic_id < apic_id_limit); for (j = 0; j < nb_numa_nodes; j++) { - if (test_bit(i, node_cpumask[j])) { + if (test_bit(i, numa_info[j].node_cpu)) { numa_fw_cfg[apic_id + 1] = cpu_to_le64(j); break; } } } for (i = 0; i < nb_numa_nodes; i++) { - numa_fw_cfg[apic_id_limit + 1 + i] = cpu_to_le64(node_mem[i]); + numa_fw_cfg[apic_id_limit + 1 + i] = cpu_to_le64(numa_info[i].node_mem); } fw_cfg_add_bytes(fw_cfg, FW_CFG_NUMA, numa_fw_cfg, (1 + apic_id_limit + nb_numa_nodes) * @@ -1119,8 +1122,12 @@ PcGuestInfo *pc_guest_info_init(ram_addr_t below_4g_mem_size, guest_info->apic_id_limit = pc_apic_id_limit(max_cpus); guest_info->apic_xrupt_override = kvm_allows_irq0_override(); guest_info->numa_nodes = nb_numa_nodes; - guest_info->node_mem = g_memdup(node_mem, guest_info->numa_nodes * + guest_info->node_mem = g_malloc0(guest_info->numa_nodes * sizeof *guest_info->node_mem); + for (i = 0; i < nb_numa_nodes; i++) { + guest_info->node_mem[i] = numa_info[i].node_mem; + } + guest_info->node_cpu = g_malloc0(guest_info->apic_id_limit * sizeof *guest_info->node_cpu); @@ -1128,7 +1135,7 @@ PcGuestInfo *pc_guest_info_init(ram_addr_t below_4g_mem_size, unsigned int apic_id = x86_cpu_apic_id_from_index(i); assert(apic_id < guest_info->apic_id_limit); for (j = 0; j < nb_numa_nodes; j++) { - if (test_bit(i, node_cpumask[j])) { + if (test_bit(i, numa_info[j].node_cpu)) { guest_info->node_cpu[apic_id] = j; break; } @@ -1183,10 +1190,8 @@ void pc_acpi_init(const char *default_dsdt) } } -FWCfgState *pc_memory_init(MemoryRegion *system_memory, - const char *kernel_filename, - const char *kernel_cmdline, - const char *initrd_filename, +FWCfgState *pc_memory_init(MachineState *machine, + MemoryRegion *system_memory, ram_addr_t below_4g_mem_size, ram_addr_t above_4g_mem_size, MemoryRegion *rom_memory, @@ -1197,17 +1202,19 @@ FWCfgState *pc_memory_init(MemoryRegion *system_memory, MemoryRegion *ram, *option_rom_mr; MemoryRegion *ram_below_4g, *ram_above_4g; FWCfgState *fw_cfg; + PCMachineState *pcms = PC_MACHINE(machine); - linux_boot = (kernel_filename != NULL); + assert(machine->ram_size == below_4g_mem_size + above_4g_mem_size); + + linux_boot = (machine->kernel_filename != NULL); /* Allocate RAM. We allocate it as a single memory region and use * aliases to address portions of it, mostly for backwards compatibility * with older qemus that used qemu_ram_alloc(). */ ram = g_malloc(sizeof(*ram)); - memory_region_init_ram(ram, NULL, "pc.ram", - below_4g_mem_size + above_4g_mem_size); - vmstate_register_ram_global(ram); + memory_region_allocate_system_memory(ram, NULL, "pc.ram", + machine->ram_size); *ram_memory = ram; ram_below_4g = g_malloc(sizeof(*ram_below_4g)); memory_region_init_alias(ram_below_4g, NULL, "ram-below-4g", ram, @@ -1223,6 +1230,43 @@ FWCfgState *pc_memory_init(MemoryRegion *system_memory, e820_add_entry(0x100000000ULL, above_4g_mem_size, E820_RAM); } + if (!guest_info->has_reserved_memory && + (machine->ram_slots || + (machine->maxram_size > machine->ram_size))) { + MachineClass *mc = MACHINE_GET_CLASS(machine); + + error_report("\"-memory 'slots|maxmem'\" is not supported by: %s", + mc->name); + exit(EXIT_FAILURE); + } + + /* initialize hotplug memory address space */ + if (guest_info->has_reserved_memory && + (machine->ram_size < machine->maxram_size)) { + ram_addr_t hotplug_mem_size = + machine->maxram_size - machine->ram_size; + + if (machine->ram_slots > ACPI_MAX_RAM_SLOTS) { + error_report("unsupported amount of memory slots: %"PRIu64, + machine->ram_slots); + exit(EXIT_FAILURE); + } + + pcms->hotplug_memory_base = + ROUND_UP(0x100000000ULL + above_4g_mem_size, 1ULL << 30); + + if ((pcms->hotplug_memory_base + hotplug_mem_size) < + hotplug_mem_size) { + error_report("unsupported amount of maximum memory: " RAM_ADDR_FMT, + machine->maxram_size); + exit(EXIT_FAILURE); + } + + memory_region_init(&pcms->hotplug_memory, OBJECT(pcms), + "hotplug-memory", hotplug_mem_size); + memory_region_add_subregion(system_memory, pcms->hotplug_memory_base, + &pcms->hotplug_memory); + } /* Initialize PC system firmware */ pc_system_firmware_init(rom_memory, guest_info->isapc_ram_fw); @@ -1238,8 +1282,15 @@ FWCfgState *pc_memory_init(MemoryRegion *system_memory, fw_cfg = bochs_bios_init(); rom_set_fw(fw_cfg); + if (guest_info->has_reserved_memory && pcms->hotplug_memory_base) { + uint64_t *val = g_malloc(sizeof(*val)); + *val = cpu_to_le64(ROUND_UP(pcms->hotplug_memory_base, 0x1ULL << 30)); + fw_cfg_add_file(fw_cfg, "etc/reserved-memory-end", val, sizeof(*val)); + } + if (linux_boot) { - load_linux(fw_cfg, kernel_filename, initrd_filename, kernel_cmdline, below_4g_mem_size); + load_linux(fw_cfg, machine->kernel_filename, machine->initrd_filename, + machine->kernel_cmdline, below_4g_mem_size); } for (i = 0; i < nb_option_roms; i++) { @@ -1455,3 +1506,178 @@ void ioapic_init_gsi(GSIState *gsi_state, const char *parent_name) gsi_state->ioapic_irq[i] = qdev_get_gpio_in(dev, i); } } + +static void pc_generic_machine_class_init(ObjectClass *oc, void *data) +{ + MachineClass *mc = MACHINE_CLASS(oc); + QEMUMachine *qm = data; + + mc->name = qm->name; + mc->alias = qm->alias; + mc->desc = qm->desc; + mc->init = qm->init; + mc->reset = qm->reset; + mc->hot_add_cpu = qm->hot_add_cpu; + mc->kvm_type = qm->kvm_type; + mc->block_default_type = qm->block_default_type; + mc->max_cpus = qm->max_cpus; + mc->no_serial = qm->no_serial; + mc->no_parallel = qm->no_parallel; + mc->use_virtcon = qm->use_virtcon; + mc->use_sclp = qm->use_sclp; + mc->no_floppy = qm->no_floppy; + mc->no_cdrom = qm->no_cdrom; + mc->no_sdcard = qm->no_sdcard; + mc->is_default = qm->is_default; + mc->default_machine_opts = qm->default_machine_opts; + mc->default_boot_order = qm->default_boot_order; + mc->compat_props = qm->compat_props; + mc->hw_version = qm->hw_version; +} + +void qemu_register_pc_machine(QEMUMachine *m) +{ + char *name = g_strconcat(m->name, TYPE_MACHINE_SUFFIX, NULL); + TypeInfo ti = { + .name = name, + .parent = TYPE_PC_MACHINE, + .class_init = pc_generic_machine_class_init, + .class_data = (void *)m, + }; + + type_register(&ti); + g_free(name); +} + +static void pc_dimm_plug(HotplugHandler *hotplug_dev, + DeviceState *dev, Error **errp) +{ + int slot; + HotplugHandlerClass *hhc; + Error *local_err = NULL; + PCMachineState *pcms = PC_MACHINE(hotplug_dev); + MachineState *machine = MACHINE(hotplug_dev); + PCDIMMDevice *dimm = PC_DIMM(dev); + PCDIMMDeviceClass *ddc = PC_DIMM_GET_CLASS(dimm); + MemoryRegion *mr = ddc->get_memory_region(dimm); + uint64_t addr = object_property_get_int(OBJECT(dimm), PC_DIMM_ADDR_PROP, + &local_err); + if (local_err) { + goto out; + } + + addr = pc_dimm_get_free_addr(pcms->hotplug_memory_base, + memory_region_size(&pcms->hotplug_memory), + !addr ? NULL : &addr, + memory_region_size(mr), &local_err); + if (local_err) { + goto out; + } + + object_property_set_int(OBJECT(dev), addr, PC_DIMM_ADDR_PROP, &local_err); + if (local_err) { + goto out; + } + trace_mhp_pc_dimm_assigned_address(addr); + + slot = object_property_get_int(OBJECT(dev), PC_DIMM_SLOT_PROP, &local_err); + if (local_err) { + goto out; + } + + slot = pc_dimm_get_free_slot(slot == PC_DIMM_UNASSIGNED_SLOT ? NULL : &slot, + machine->ram_slots, &local_err); + if (local_err) { + goto out; + } + object_property_set_int(OBJECT(dev), slot, PC_DIMM_SLOT_PROP, &local_err); + if (local_err) { + goto out; + } + trace_mhp_pc_dimm_assigned_slot(slot); + + if (!pcms->acpi_dev) { + error_setg(&local_err, + "memory hotplug is not enabled: missing acpi device"); + goto out; + } + + memory_region_add_subregion(&pcms->hotplug_memory, + addr - pcms->hotplug_memory_base, mr); + vmstate_register_ram(mr, dev); + + hhc = HOTPLUG_HANDLER_GET_CLASS(pcms->acpi_dev); + hhc->plug(HOTPLUG_HANDLER(pcms->acpi_dev), dev, &local_err); +out: + error_propagate(errp, local_err); +} + +static void pc_machine_device_plug_cb(HotplugHandler *hotplug_dev, + DeviceState *dev, Error **errp) +{ + if (object_dynamic_cast(OBJECT(dev), TYPE_PC_DIMM)) { + pc_dimm_plug(hotplug_dev, dev, errp); + } +} + +static HotplugHandler *pc_get_hotpug_handler(MachineState *machine, + DeviceState *dev) +{ + PCMachineClass *pcmc = PC_MACHINE_GET_CLASS(machine); + + if (object_dynamic_cast(OBJECT(dev), TYPE_PC_DIMM)) { + return HOTPLUG_HANDLER(machine); + } + + return pcmc->get_hotplug_handler ? + pcmc->get_hotplug_handler(machine, dev) : NULL; +} + +static void +pc_machine_get_hotplug_memory_region_size(Object *obj, Visitor *v, void *opaque, + const char *name, Error **errp) +{ + PCMachineState *pcms = PC_MACHINE(obj); + int64_t value = memory_region_size(&pcms->hotplug_memory); + + visit_type_int(v, &value, name, errp); +} + +static void pc_machine_initfn(Object *obj) +{ + object_property_add(obj, PC_MACHINE_MEMHP_REGION_SIZE, "int", + pc_machine_get_hotplug_memory_region_size, + NULL, NULL, NULL, NULL); +} + +static void pc_machine_class_init(ObjectClass *oc, void *data) +{ + MachineClass *mc = MACHINE_CLASS(oc); + PCMachineClass *pcmc = PC_MACHINE_CLASS(oc); + HotplugHandlerClass *hc = HOTPLUG_HANDLER_CLASS(oc); + + pcmc->get_hotplug_handler = mc->get_hotplug_handler; + mc->get_hotplug_handler = pc_get_hotpug_handler; + hc->plug = pc_machine_device_plug_cb; +} + +static const TypeInfo pc_machine_info = { + .name = TYPE_PC_MACHINE, + .parent = TYPE_MACHINE, + .abstract = true, + .instance_size = sizeof(PCMachineState), + .instance_init = pc_machine_initfn, + .class_size = sizeof(PCMachineClass), + .class_init = pc_machine_class_init, + .interfaces = (InterfaceInfo[]) { + { TYPE_HOTPLUG_HANDLER }, + { } + }, +}; + +static void pc_machine_register_types(void) +{ + type_register_static(&pc_machine_info); +} + +type_init(pc_machine_register_types) diff --git a/hw/i386/pc_piix.c b/hw/i386/pc_piix.c index a48e26367d..3e7524b961 100644 --- a/hw/i386/pc_piix.c +++ b/hw/i386/pc_piix.c @@ -67,12 +67,14 @@ static bool smbios_legacy_mode; * pages in the host. */ static bool gigabyte_align = true; +static bool has_reserved_memory = true; /* PC hardware initialisation */ static void pc_init1(MachineState *machine, int pci_enabled, int kvmclock_enabled) { + PCMachineState *pc_machine = PC_MACHINE(machine); MemoryRegion *system_memory = get_system_memory(); MemoryRegion *system_io = get_system_io(); int i; @@ -143,6 +145,7 @@ static void pc_init1(MachineState *machine, guest_info->has_pci_info = has_pci_info; guest_info->isapc_ram_fw = !pci_enabled; + guest_info->has_reserved_memory = has_reserved_memory; if (smbios_defaults) { MachineClass *mc = MACHINE_GET_CLASS(machine); @@ -153,11 +156,9 @@ static void pc_init1(MachineState *machine, /* allocate ram and load rom/bios */ if (!xen_enabled()) { - fw_cfg = pc_memory_init(system_memory, - machine->kernel_filename, machine->kernel_cmdline, - machine->initrd_filename, - below_4g_mem_size, above_4g_mem_size, - rom_memory, &ram_memory, guest_info); + fw_cfg = pc_memory_init(machine, system_memory, + below_4g_mem_size, above_4g_mem_size, + rom_memory, &ram_memory, guest_info); } gsi_state = g_malloc0(sizeof(*gsi_state)); @@ -244,14 +245,23 @@ static void pc_init1(MachineState *machine, } if (pci_enabled && acpi_enabled) { + DeviceState *piix4_pm; I2CBus *smbus; smi_irq = qemu_allocate_irqs(pc_acpi_smi_interrupt, first_cpu, 1); /* TODO: Populate SPD eeprom data. */ smbus = piix4_pm_init(pci_bus, piix3_devfn + 3, 0xb100, gsi[9], *smi_irq, - kvm_enabled(), fw_cfg); + kvm_enabled(), fw_cfg, &piix4_pm); smbus_eeprom_init(smbus, 8, NULL, 0); + + object_property_add_link(OBJECT(machine), PC_MACHINE_ACPI_DEVICE_PROP, + TYPE_HOTPLUG_HANDLER, + (Object **)&pc_machine->acpi_dev, + object_property_allow_set_link, + OBJ_PROP_LINK_UNREF_ON_RELEASE, &error_abort); + object_property_set_link(OBJECT(machine), OBJECT(piix4_pm), + PC_MACHINE_ACPI_DEVICE_PROP, &error_abort); } if (pci_enabled) { @@ -267,6 +277,7 @@ static void pc_init_pci(MachineState *machine) static void pc_compat_2_0(MachineState *machine) { smbios_legacy_mode = true; + has_reserved_memory = false; } static void pc_compat_1_7(MachineState *machine) @@ -843,25 +854,25 @@ static QEMUMachine xenfv_machine = { static void pc_machine_init(void) { - qemu_register_machine(&pc_i440fx_machine_v2_1); - qemu_register_machine(&pc_i440fx_machine_v2_0); - qemu_register_machine(&pc_i440fx_machine_v1_7); - qemu_register_machine(&pc_i440fx_machine_v1_6); - qemu_register_machine(&pc_i440fx_machine_v1_5); - qemu_register_machine(&pc_i440fx_machine_v1_4); - qemu_register_machine(&pc_machine_v1_3); - qemu_register_machine(&pc_machine_v1_2); - qemu_register_machine(&pc_machine_v1_1); - qemu_register_machine(&pc_machine_v1_0); - qemu_register_machine(&pc_machine_v0_15); - qemu_register_machine(&pc_machine_v0_14); - qemu_register_machine(&pc_machine_v0_13); - qemu_register_machine(&pc_machine_v0_12); - qemu_register_machine(&pc_machine_v0_11); - qemu_register_machine(&pc_machine_v0_10); - qemu_register_machine(&isapc_machine); + qemu_register_pc_machine(&pc_i440fx_machine_v2_1); + qemu_register_pc_machine(&pc_i440fx_machine_v2_0); + qemu_register_pc_machine(&pc_i440fx_machine_v1_7); + qemu_register_pc_machine(&pc_i440fx_machine_v1_6); + qemu_register_pc_machine(&pc_i440fx_machine_v1_5); + qemu_register_pc_machine(&pc_i440fx_machine_v1_4); + qemu_register_pc_machine(&pc_machine_v1_3); + qemu_register_pc_machine(&pc_machine_v1_2); + qemu_register_pc_machine(&pc_machine_v1_1); + qemu_register_pc_machine(&pc_machine_v1_0); + qemu_register_pc_machine(&pc_machine_v0_15); + qemu_register_pc_machine(&pc_machine_v0_14); + qemu_register_pc_machine(&pc_machine_v0_13); + qemu_register_pc_machine(&pc_machine_v0_12); + qemu_register_pc_machine(&pc_machine_v0_11); + qemu_register_pc_machine(&pc_machine_v0_10); + qemu_register_pc_machine(&isapc_machine); #ifdef CONFIG_XEN - qemu_register_machine(&xenfv_machine); + qemu_register_pc_machine(&xenfv_machine); #endif } diff --git a/hw/i386/pc_q35.c b/hw/i386/pc_q35.c index b3c02c163d..aa71332ee1 100644 --- a/hw/i386/pc_q35.c +++ b/hw/i386/pc_q35.c @@ -57,10 +57,12 @@ static bool smbios_legacy_mode; * pages in the host. */ static bool gigabyte_align = true; +static bool has_reserved_memory = true; /* PC hardware initialisation */ static void pc_q35_init(MachineState *machine) { + PCMachineState *pc_machine = PC_MACHINE(machine); ram_addr_t below_4g_mem_size, above_4g_mem_size; Q35PCIHost *q35_host; PCIHostState *phb; @@ -130,6 +132,7 @@ static void pc_q35_init(MachineState *machine) guest_info->has_pci_info = has_pci_info; guest_info->isapc_ram_fw = false; guest_info->has_acpi_build = has_acpi_build; + guest_info->has_reserved_memory = has_reserved_memory; if (smbios_defaults) { MachineClass *mc = MACHINE_GET_CLASS(machine); @@ -140,9 +143,7 @@ static void pc_q35_init(MachineState *machine) /* allocate ram and load rom/bios */ if (!xen_enabled()) { - pc_memory_init(get_system_memory(), - machine->kernel_filename, machine->kernel_cmdline, - machine->initrd_filename, + pc_memory_init(machine, get_system_memory(), below_4g_mem_size, above_4g_mem_size, rom_memory, &ram_memory, guest_info); } @@ -176,6 +177,15 @@ static void pc_q35_init(MachineState *machine) lpc = pci_create_simple_multifunction(host_bus, PCI_DEVFN(ICH9_LPC_DEV, ICH9_LPC_FUNC), true, TYPE_ICH9_LPC_DEVICE); + + object_property_add_link(OBJECT(machine), PC_MACHINE_ACPI_DEVICE_PROP, + TYPE_HOTPLUG_HANDLER, + (Object **)&pc_machine->acpi_dev, + object_property_allow_set_link, + OBJ_PROP_LINK_UNREF_ON_RELEASE, &error_abort); + object_property_set_link(OBJECT(machine), OBJECT(lpc), + PC_MACHINE_ACPI_DEVICE_PROP, &error_abort); + ich9_lpc = ICH9_LPC_DEVICE(lpc); ich9_lpc->pic = gsi; ich9_lpc->ioapic = gsi_state->ioapic_irq; @@ -245,6 +255,7 @@ static void pc_q35_init(MachineState *machine) static void pc_compat_2_0(MachineState *machine) { smbios_legacy_mode = true; + has_reserved_memory = false; } static void pc_compat_1_7(MachineState *machine) @@ -384,12 +395,12 @@ static QEMUMachine pc_q35_machine_v1_4 = { static void pc_q35_machine_init(void) { - qemu_register_machine(&pc_q35_machine_v2_1); - qemu_register_machine(&pc_q35_machine_v2_0); - qemu_register_machine(&pc_q35_machine_v1_7); - qemu_register_machine(&pc_q35_machine_v1_6); - qemu_register_machine(&pc_q35_machine_v1_5); - qemu_register_machine(&pc_q35_machine_v1_4); + qemu_register_pc_machine(&pc_q35_machine_v2_1); + qemu_register_pc_machine(&pc_q35_machine_v2_0); + qemu_register_pc_machine(&pc_q35_machine_v1_7); + qemu_register_pc_machine(&pc_q35_machine_v1_6); + qemu_register_pc_machine(&pc_q35_machine_v1_5); + qemu_register_pc_machine(&pc_q35_machine_v1_4); } machine_init(pc_q35_machine_init); diff --git a/hw/i386/q35-acpi-dsdt.dsl b/hw/i386/q35-acpi-dsdt.dsl index f4d2a2daee..8c3eae73bf 100644 --- a/hw/i386/q35-acpi-dsdt.dsl +++ b/hw/i386/q35-acpi-dsdt.dsl @@ -402,7 +402,7 @@ DefinitionBlock ( define_gsi_link(GSIH, 0, 0x17) } -#include "hw/acpi/cpu_hotplug_defs.h" +#include "hw/acpi/pc-hotplug.h" #define CPU_STATUS_BASE ICH9_CPU_HOTPLUG_IO_BASE #include "acpi-dsdt-cpu-hotplug.dsl" @@ -410,6 +410,7 @@ DefinitionBlock ( /**************************************************************** * General purpose events ****************************************************************/ + External(\_SB.PCI0.MEMORY_HOPTLUG_DEVICE.MEMORY_SLOT_SCAN_METHOD, MethodObj) Scope(\_GPE) { Name(_HID, "ACPI0006") @@ -422,7 +423,9 @@ DefinitionBlock ( // CPU hotplug event \_SB.PRSC() } - Method(_L03) { + Method(_E03) { + // Memory hotplug event + \_SB.PCI0.MEMORY_HOPTLUG_DEVICE.MEMORY_SLOT_SCAN_METHOD() } Method(_L04) { } diff --git a/hw/i386/q35-acpi-dsdt.hex.generated b/hw/i386/q35-acpi-dsdt.hex.generated index 6b788c9be0..c9eb4ac6ad 100644 --- a/hw/i386/q35-acpi-dsdt.hex.generated +++ b/hw/i386/q35-acpi-dsdt.hex.generated @@ -3,12 +3,12 @@ static unsigned char Q35AcpiDsdtAmlCode[] = { 0x53, 0x44, 0x54, -0xd2, +0xe5, 0x1c, 0x0, 0x0, 0x1, -0x13, +0xb7, 0x42, 0x58, 0x50, @@ -7234,8 +7234,8 @@ static unsigned char Q35AcpiDsdtAmlCode[] = { 0xa, 0xb, 0x10, -0x4f, -0x8, +0x42, +0xa, 0x5f, 0x47, 0x50, @@ -7287,12 +7287,31 @@ static unsigned char Q35AcpiDsdtAmlCode[] = { 0x53, 0x43, 0x14, -0x6, +0x19, 0x5f, -0x4c, +0x45, 0x30, 0x33, 0x0, +0x5c, +0x2f, +0x4, +0x5f, +0x53, +0x42, +0x5f, +0x50, +0x43, +0x49, +0x30, +0x4d, +0x48, +0x50, +0x44, +0x4d, +0x53, +0x43, +0x4e, 0x14, 0x6, 0x5f, diff --git a/hw/i386/ssdt-mem.dsl b/hw/i386/ssdt-mem.dsl new file mode 100644 index 0000000000..8e17bd1f97 --- /dev/null +++ b/hw/i386/ssdt-mem.dsl @@ -0,0 +1,77 @@ +/* + * Memory hotplug ACPI DSDT static objects definitions + * + * Copyright ProfitBricks GmbH 2012 + * Copyright (C) 2013-2014 Red Hat Inc + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2 of the License, or (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library; if not, see <http://www.gnu.org/licenses/> + */ + +/* This file is the basis for the ssdt_mem[] variable in src/acpi.c. + * It defines the contents of the memory device object. At + * runtime, a dynamically generated SSDT will contain one copy of this + * AML snippet for every possible memory device in the system. The + * objects will be placed in the \_SB_ namespace. + * + * In addition to the aml code generated from this file, the + * src/acpi.c file creates a MTFY method with an entry for each memdevice: + * Method(MTFY, 2) { + * If (LEqual(Arg0, 0x00)) { Notify(MP00, Arg1) } + * If (LEqual(Arg0, 0x01)) { Notify(MP01, Arg1) } + * ... + * } + */ +#include "hw/acpi/pc-hotplug.h" + +ACPI_EXTRACT_ALL_CODE ssdm_mem_aml + +DefinitionBlock ("ssdt-mem.aml", "SSDT", 0x02, "BXPC", "CSSDT", 0x1) +{ + + External(\_SB.PCI0.MEMORY_HOPTLUG_DEVICE.MEMORY_SLOT_CRS_METHOD, MethodObj) + External(\_SB.PCI0.MEMORY_HOPTLUG_DEVICE.MEMORY_SLOT_STATUS_METHOD, MethodObj) + External(\_SB.PCI0.MEMORY_HOPTLUG_DEVICE.MEMORY_SLOT_OST_METHOD, MethodObj) + External(\_SB.PCI0.MEMORY_HOPTLUG_DEVICE.MEMORY_SLOT_PROXIMITY_METHOD, MethodObj) + + Scope(\_SB) { +/* v------------------ DO NOT EDIT ------------------v */ + ACPI_EXTRACT_DEVICE_START ssdt_mem_start + ACPI_EXTRACT_DEVICE_END ssdt_mem_end + ACPI_EXTRACT_DEVICE_STRING ssdt_mem_name + Device(MPAA) { + ACPI_EXTRACT_NAME_STRING ssdt_mem_id + Name(_UID, "0xAA") +/* ^------------------ DO NOT EDIT ------------------^ + * Don't change the above without also updating the C code. + */ + Name(_HID, EISAID("PNP0C80")) + + Method(_CRS, 0) { + Return(\_SB.PCI0.MEMORY_HOPTLUG_DEVICE.MEMORY_SLOT_CRS_METHOD(_UID)) + } + + Method(_STA, 0) { + Return(\_SB.PCI0.MEMORY_HOPTLUG_DEVICE.MEMORY_SLOT_STATUS_METHOD(_UID)) + } + + Method(_PXM, 0) { + Return(\_SB.PCI0.MEMORY_HOPTLUG_DEVICE.MEMORY_SLOT_PROXIMITY_METHOD(_UID)) + } + + Method(_OST, 3) { + \_SB.PCI0.MEMORY_HOPTLUG_DEVICE.MEMORY_SLOT_OST_METHOD(_UID, Arg0, Arg1, Arg2) + } + } + } +} diff --git a/hw/i386/ssdt-mem.hex.generated b/hw/i386/ssdt-mem.hex.generated new file mode 100644 index 0000000000..00bd34d269 --- /dev/null +++ b/hw/i386/ssdt-mem.hex.generated @@ -0,0 +1,213 @@ +static unsigned char ssdt_mem_id[] = { +0x35 +}; +static unsigned char ssdm_mem_aml[] = { +0x53, +0x53, +0x44, +0x54, +0xc7, +0x0, +0x0, +0x0, +0x2, +0x71, +0x42, +0x58, +0x50, +0x43, +0x0, +0x0, +0x43, +0x53, +0x53, +0x44, +0x54, +0x0, +0x0, +0x0, +0x1, +0x0, +0x0, +0x0, +0x49, +0x4e, +0x54, +0x4c, +0x15, +0x11, +0x13, +0x20, +0x10, +0x42, +0xa, +0x5c, +0x5f, +0x53, +0x42, +0x5f, +0x5b, +0x82, +0x49, +0x9, +0x4d, +0x50, +0x41, +0x41, +0x8, +0x5f, +0x55, +0x49, +0x44, +0xd, +0x30, +0x78, +0x41, +0x41, +0x0, +0x8, +0x5f, +0x48, +0x49, +0x44, +0xc, +0x41, +0xd0, +0xc, +0x80, +0x14, +0x1e, +0x5f, +0x43, +0x52, +0x53, +0x0, +0xa4, +0x5c, +0x2f, +0x4, +0x5f, +0x53, +0x42, +0x5f, +0x50, +0x43, +0x49, +0x30, +0x4d, +0x48, +0x50, +0x44, +0x4d, +0x43, +0x52, +0x53, +0x5f, +0x55, +0x49, +0x44, +0x14, +0x1e, +0x5f, +0x53, +0x54, +0x41, +0x0, +0xa4, +0x5c, +0x2f, +0x4, +0x5f, +0x53, +0x42, +0x5f, +0x50, +0x43, +0x49, +0x30, +0x4d, +0x48, +0x50, +0x44, +0x4d, +0x52, +0x53, +0x54, +0x5f, +0x55, +0x49, +0x44, +0x14, +0x1e, +0x5f, +0x50, +0x58, +0x4d, +0x0, +0xa4, +0x5c, +0x2f, +0x4, +0x5f, +0x53, +0x42, +0x5f, +0x50, +0x43, +0x49, +0x30, +0x4d, +0x48, +0x50, +0x44, +0x4d, +0x50, +0x58, +0x4d, +0x5f, +0x55, +0x49, +0x44, +0x14, +0x20, +0x5f, +0x4f, +0x53, +0x54, +0x3, +0x5c, +0x2f, +0x4, +0x5f, +0x53, +0x42, +0x5f, +0x50, +0x43, +0x49, +0x30, +0x4d, +0x48, +0x50, +0x44, +0x4d, +0x4f, +0x53, +0x54, +0x5f, +0x55, +0x49, +0x44, +0x68, +0x69, +0x6a +}; +static unsigned char ssdt_mem_start[] = { +0x2c +}; +static unsigned char ssdt_mem_end[] = { +0xc7 +}; +static unsigned char ssdt_mem_name[] = { +0x30 +}; diff --git a/hw/i386/ssdt-misc.dsl b/hw/i386/ssdt-misc.dsl index a4484b8176..d329b8ba57 100644 --- a/hw/i386/ssdt-misc.dsl +++ b/hw/i386/ssdt-misc.dsl @@ -12,6 +12,7 @@ * You should have received a copy of the GNU General Public License along * with this program; if not, see <http://www.gnu.org/licenses/>. */ +#include "hw/acpi/pc-hotplug.h" ACPI_EXTRACT_ALL_CODE ssdp_misc_aml @@ -116,4 +117,167 @@ DefinitionBlock ("ssdt-misc.aml", "SSDT", 0x01, "BXPC", "BXSSDTSUSP", 0x1) } } } + + External(MEMORY_SLOT_NOTIFY_METHOD, MethodObj) + Scope(\_SB.PCI0) { + Device(MEMORY_HOPTLUG_DEVICE) { + Name(_HID, "PNP0A06") + Name(_UID, "Memory hotplug resources") + + ACPI_EXTRACT_NAME_DWORD_CONST ssdt_mctrl_nr_slots + Name(MEMORY_SLOTS_NUMBER, 0x12345678) + + /* Memory hotplug IO registers */ + OperationRegion(MEMORY_HOTPLUG_IO_REGION, SystemIO, + ACPI_MEMORY_HOTPLUG_BASE, + ACPI_MEMORY_HOTPLUG_IO_LEN) + + Name(_CRS, ResourceTemplate() { + IO(Decode16, ACPI_MEMORY_HOTPLUG_BASE, ACPI_MEMORY_HOTPLUG_BASE, + 0, ACPI_MEMORY_HOTPLUG_IO_LEN, IO) + }) + + Method(_STA, 0) { + If (LEqual(MEMORY_SLOTS_NUMBER, Zero)) { + Return(0x0) + } + /* present, functioning, decoding, not shown in UI */ + Return(0xB) + } + + Field(MEMORY_HOTPLUG_IO_REGION, DWordAcc, NoLock, Preserve) { + MEMORY_SLOT_ADDR_LOW, 32, // read only + MEMORY_SLOT_ADDR_HIGH, 32, // read only + MEMORY_SLOT_SIZE_LOW, 32, // read only + MEMORY_SLOT_SIZE_HIGH, 32, // read only + MEMORY_SLOT_PROXIMITY, 32, // read only + } + Field(MEMORY_HOTPLUG_IO_REGION, ByteAcc, NoLock, Preserve) { + Offset(20), + MEMORY_SLOT_ENABLED, 1, // 1 if enabled, read only + MEMORY_SLOT_INSERT_EVENT, 1, // (read) 1 if has a insert event. (write) 1 to clear event + } + + Mutex (MEMORY_SLOT_LOCK, 0) + Field (MEMORY_HOTPLUG_IO_REGION, DWordAcc, NoLock, Preserve) { + MEMORY_SLOT_SLECTOR, 32, // DIMM selector, write only + MEMORY_SLOT_OST_EVENT, 32, // _OST event code, write only + MEMORY_SLOT_OST_STATUS, 32, // _OST status code, write only + } + + Method(MEMORY_SLOT_SCAN_METHOD, 0) { + If (LEqual(MEMORY_SLOTS_NUMBER, Zero)) { + Return(Zero) + } + + Store(Zero, Local0) // Mem devs iterrator + Acquire(MEMORY_SLOT_LOCK, 0xFFFF) + while (LLess(Local0, MEMORY_SLOTS_NUMBER)) { + Store(Local0, MEMORY_SLOT_SLECTOR) // select Local0 DIMM + If (LEqual(MEMORY_SLOT_INSERT_EVENT, One)) { // Memory device needs check + MEMORY_SLOT_NOTIFY_METHOD(Local0, 1) + Store(1, MEMORY_SLOT_INSERT_EVENT) + } + // TODO: handle memory eject request + Add(Local0, One, Local0) // goto next DIMM + } + Release(MEMORY_SLOT_LOCK) + Return(One) + } + + Method(MEMORY_SLOT_STATUS_METHOD, 1) { + Store(Zero, Local0) + + Acquire(MEMORY_SLOT_LOCK, 0xFFFF) + Store(ToInteger(Arg0), MEMORY_SLOT_SLECTOR) // select DIMM + + If (LEqual(MEMORY_SLOT_ENABLED, One)) { + Store(0xF, Local0) + } + + Release(MEMORY_SLOT_LOCK) + Return(Local0) + } + + Method(MEMORY_SLOT_CRS_METHOD, 1, Serialized) { + Acquire(MEMORY_SLOT_LOCK, 0xFFFF) + Store(ToInteger(Arg0), MEMORY_SLOT_SLECTOR) // select DIMM + + Name(MR64, ResourceTemplate() { + QWordMemory(ResourceProducer, PosDecode, MinFixed, MaxFixed, + Cacheable, ReadWrite, + 0x0000000000000000, // Address Space Granularity + 0x0000000000000000, // Address Range Minimum + 0xFFFFFFFFFFFFFFFE, // Address Range Maximum + 0x0000000000000000, // Address Translation Offset + 0xFFFFFFFFFFFFFFFF, // Address Length + ,, MW64, AddressRangeMemory, TypeStatic) + }) + + CreateDWordField(MR64, 14, MINL) + CreateDWordField(MR64, 18, MINH) + CreateDWordField(MR64, 38, LENL) + CreateDWordField(MR64, 42, LENH) + CreateDWordField(MR64, 22, MAXL) + CreateDWordField(MR64, 26, MAXH) + + Store(MEMORY_SLOT_ADDR_HIGH, MINH) + Store(MEMORY_SLOT_ADDR_LOW, MINL) + Store(MEMORY_SLOT_SIZE_HIGH, LENH) + Store(MEMORY_SLOT_SIZE_LOW, LENL) + + // 64-bit math: MAX = MIN + LEN - 1 + Add(MINL, LENL, MAXL) + Add(MINH, LENH, MAXH) + If (LLess(MAXL, MINL)) { + Add(MAXH, One, MAXH) + } + If (LLess(MAXL, One)) { + Subtract(MAXH, One, MAXH) + } + Subtract(MAXL, One, MAXL) + + If (LEqual(MAXH, Zero)){ + Name(MR32, ResourceTemplate() { + DWordMemory(ResourceProducer, PosDecode, MinFixed, MaxFixed, + Cacheable, ReadWrite, + 0x00000000, // Address Space Granularity + 0x00000000, // Address Range Minimum + 0xFFFFFFFE, // Address Range Maximum + 0x00000000, // Address Translation Offset + 0xFFFFFFFF, // Address Length + ,, MW32, AddressRangeMemory, TypeStatic) + }) + CreateDWordField(MR32, MW32._MIN, MIN) + CreateDWordField(MR32, MW32._MAX, MAX) + CreateDWordField(MR32, MW32._LEN, LEN) + Store(MINL, MIN) + Store(MAXL, MAX) + Store(LENL, LEN) + + Release(MEMORY_SLOT_LOCK) + Return(MR32) + } + + Release(MEMORY_SLOT_LOCK) + Return(MR64) + } + + Method(MEMORY_SLOT_PROXIMITY_METHOD, 1) { + Acquire(MEMORY_SLOT_LOCK, 0xFFFF) + Store(ToInteger(Arg0), MEMORY_SLOT_SLECTOR) // select DIMM + Store(MEMORY_SLOT_PROXIMITY, Local0) + Release(MEMORY_SLOT_LOCK) + Return(Local0) + } + + Method(MEMORY_SLOT_OST_METHOD, 4) { + Acquire(MEMORY_SLOT_LOCK, 0xFFFF) + Store(ToInteger(Arg0), MEMORY_SLOT_SLECTOR) // select DIMM + Store(Arg1, MEMORY_SLOT_OST_EVENT) + Store(Arg2, MEMORY_SLOT_OST_STATUS) + Release(MEMORY_SLOT_LOCK) + } + } // Device() + } // Scope() } diff --git a/hw/i386/ssdt-misc.hex.generated b/hw/i386/ssdt-misc.hex.generated index 55e3bd2aa6..ba4268a60b 100644 --- a/hw/i386/ssdt-misc.hex.generated +++ b/hw/i386/ssdt-misc.hex.generated @@ -4,6 +4,9 @@ static unsigned char acpi_pci64_length[] = { static unsigned char acpi_s4_pkg[] = { 0x8f }; +static unsigned short ssdt_mctrl_nr_slots[] = { +0x1aa +}; static unsigned char acpi_s3_name[] = { 0x7c }; @@ -18,12 +21,12 @@ static unsigned char ssdp_misc_aml[] = { 0x53, 0x44, 0x54, -0x62, -0x1, +0x7e, +0x4, 0x0, 0x0, 0x1, -0x76, +0x8b, 0x42, 0x58, 0x50, @@ -46,8 +49,8 @@ static unsigned char ssdp_misc_aml[] = { 0x4e, 0x54, 0x4c, -0x23, -0x8, +0x15, +0x11, 0x13, 0x20, 0x10, @@ -367,7 +370,803 @@ static unsigned char ssdp_misc_aml[] = { 0x49, 0x4f, 0x4d, -0x58 +0x58, +0x10, +0x4b, +0x31, +0x5c, +0x2e, +0x5f, +0x53, +0x42, +0x5f, +0x50, +0x43, +0x49, +0x30, +0x5b, +0x82, +0x4d, +0x30, +0x4d, +0x48, +0x50, +0x44, +0x8, +0x5f, +0x48, +0x49, +0x44, +0xd, +0x50, +0x4e, +0x50, +0x30, +0x41, +0x30, +0x36, +0x0, +0x8, +0x5f, +0x55, +0x49, +0x44, +0xd, +0x4d, +0x65, +0x6d, +0x6f, +0x72, +0x79, +0x20, +0x68, +0x6f, +0x74, +0x70, +0x6c, +0x75, +0x67, +0x20, +0x72, +0x65, +0x73, +0x6f, +0x75, +0x72, +0x63, +0x65, +0x73, +0x0, +0x8, +0x4d, +0x44, +0x4e, +0x52, +0xc, +0x78, +0x56, +0x34, +0x12, +0x5b, +0x80, +0x48, +0x50, +0x4d, +0x52, +0x1, +0xb, +0x0, +0xa, +0xa, +0x18, +0x8, +0x5f, +0x43, +0x52, +0x53, +0x11, +0xd, +0xa, +0xa, +0x47, +0x1, +0x0, +0xa, +0x0, +0xa, +0x0, +0x18, +0x79, +0x0, +0x14, +0x13, +0x5f, +0x53, +0x54, +0x41, +0x0, +0xa0, +0x9, +0x93, +0x4d, +0x44, +0x4e, +0x52, +0x0, +0xa4, +0x0, +0xa4, +0xa, +0xb, +0x5b, +0x81, +0x1f, +0x48, +0x50, +0x4d, +0x52, +0x3, +0x4d, +0x52, +0x42, +0x4c, +0x20, +0x4d, +0x52, +0x42, +0x48, +0x20, +0x4d, +0x52, +0x4c, +0x4c, +0x20, +0x4d, +0x52, +0x4c, +0x48, +0x20, +0x4d, +0x50, +0x58, +0x5f, +0x20, +0x5b, +0x81, +0x13, +0x48, +0x50, +0x4d, +0x52, +0x1, +0x0, +0x40, +0xa, +0x4d, +0x45, +0x53, +0x5f, +0x1, +0x4d, +0x49, +0x4e, +0x53, +0x1, +0x5b, +0x1, +0x4d, +0x4c, +0x43, +0x4b, +0x0, +0x5b, +0x81, +0x15, +0x48, +0x50, +0x4d, +0x52, +0x3, +0x4d, +0x53, +0x45, +0x4c, +0x20, +0x4d, +0x4f, +0x45, +0x56, +0x20, +0x4d, +0x4f, +0x53, +0x43, +0x20, +0x14, +0x4a, +0x4, +0x4d, +0x53, +0x43, +0x4e, +0x0, +0xa0, +0x9, +0x93, +0x4d, +0x44, +0x4e, +0x52, +0x0, +0xa4, +0x0, +0x70, +0x0, +0x60, +0x5b, +0x23, +0x4d, +0x4c, +0x43, +0x4b, +0xff, +0xff, +0xa2, +0x25, +0x95, +0x60, +0x4d, +0x44, +0x4e, +0x52, +0x70, +0x60, +0x4d, +0x53, +0x45, +0x4c, +0xa0, +0x13, +0x93, +0x4d, +0x49, +0x4e, +0x53, +0x1, +0x4d, +0x54, +0x46, +0x59, +0x60, +0x1, +0x70, +0x1, +0x4d, +0x49, +0x4e, +0x53, +0x72, +0x60, +0x1, +0x60, +0x5b, +0x27, +0x4d, +0x4c, +0x43, +0x4b, +0xa4, +0x1, +0x14, +0x2d, +0x4d, +0x52, +0x53, +0x54, +0x1, +0x70, +0x0, +0x60, +0x5b, +0x23, +0x4d, +0x4c, +0x43, +0x4b, +0xff, +0xff, +0x70, +0x99, +0x68, +0x0, +0x4d, +0x53, +0x45, +0x4c, +0xa0, +0xb, +0x93, +0x4d, +0x45, +0x53, +0x5f, +0x1, +0x70, +0xa, +0xf, +0x60, +0x5b, +0x27, +0x4d, +0x4c, +0x43, +0x4b, +0xa4, +0x60, +0x14, +0x41, +0x18, +0x4d, +0x43, +0x52, +0x53, +0x9, +0x5b, +0x23, +0x4d, +0x4c, +0x43, +0x4b, +0xff, +0xff, +0x70, +0x99, +0x68, +0x0, +0x4d, +0x53, +0x45, +0x4c, +0x8, +0x4d, +0x52, +0x36, +0x34, +0x11, +0x33, +0xa, +0x30, +0x8a, +0x2b, +0x0, +0x0, +0xc, +0x3, +0x0, +0x0, +0x0, +0x0, +0x0, +0x0, +0x0, +0x0, +0x0, +0x0, +0x0, +0x0, +0x0, +0x0, +0x0, +0x0, +0xfe, +0xff, +0xff, +0xff, +0xff, +0xff, +0xff, +0xff, +0x0, +0x0, +0x0, +0x0, +0x0, +0x0, +0x0, +0x0, +0xff, +0xff, +0xff, +0xff, +0xff, +0xff, +0xff, +0xff, +0x79, +0x0, +0x8a, +0x4d, +0x52, +0x36, +0x34, +0xa, +0xe, +0x4d, +0x49, +0x4e, +0x4c, +0x8a, +0x4d, +0x52, +0x36, +0x34, +0xa, +0x12, +0x4d, +0x49, +0x4e, +0x48, +0x8a, +0x4d, +0x52, +0x36, +0x34, +0xa, +0x26, +0x4c, +0x45, +0x4e, +0x4c, +0x8a, +0x4d, +0x52, +0x36, +0x34, +0xa, +0x2a, +0x4c, +0x45, +0x4e, +0x48, +0x8a, +0x4d, +0x52, +0x36, +0x34, +0xa, +0x16, +0x4d, +0x41, +0x58, +0x4c, +0x8a, +0x4d, +0x52, +0x36, +0x34, +0xa, +0x1a, +0x4d, +0x41, +0x58, +0x48, +0x70, +0x4d, +0x52, +0x42, +0x48, +0x4d, +0x49, +0x4e, +0x48, +0x70, +0x4d, +0x52, +0x42, +0x4c, +0x4d, +0x49, +0x4e, +0x4c, +0x70, +0x4d, +0x52, +0x4c, +0x48, +0x4c, +0x45, +0x4e, +0x48, +0x70, +0x4d, +0x52, +0x4c, +0x4c, +0x4c, +0x45, +0x4e, +0x4c, +0x72, +0x4d, +0x49, +0x4e, +0x4c, +0x4c, +0x45, +0x4e, +0x4c, +0x4d, +0x41, +0x58, +0x4c, +0x72, +0x4d, +0x49, +0x4e, +0x48, +0x4c, +0x45, +0x4e, +0x48, +0x4d, +0x41, +0x58, +0x48, +0xa0, +0x14, +0x95, +0x4d, +0x41, +0x58, +0x4c, +0x4d, +0x49, +0x4e, +0x4c, +0x72, +0x4d, +0x41, +0x58, +0x48, +0x1, +0x4d, +0x41, +0x58, +0x48, +0xa0, +0x11, +0x95, +0x4d, +0x41, +0x58, +0x4c, +0x1, +0x74, +0x4d, +0x41, +0x58, +0x48, +0x1, +0x4d, +0x41, +0x58, +0x48, +0x74, +0x4d, +0x41, +0x58, +0x4c, +0x1, +0x4d, +0x41, +0x58, +0x4c, +0xa0, +0x44, +0x7, +0x93, +0x4d, +0x41, +0x58, +0x48, +0x0, +0x8, +0x4d, +0x52, +0x33, +0x32, +0x11, +0x1f, +0xa, +0x1c, +0x87, +0x17, +0x0, +0x0, +0xc, +0x3, +0x0, +0x0, +0x0, +0x0, +0x0, +0x0, +0x0, +0x0, +0xfe, +0xff, +0xff, +0xff, +0x0, +0x0, +0x0, +0x0, +0xff, +0xff, +0xff, +0xff, +0x79, +0x0, +0x8a, +0x4d, +0x52, +0x33, +0x32, +0xa, +0xa, +0x4d, +0x49, +0x4e, +0x5f, +0x8a, +0x4d, +0x52, +0x33, +0x32, +0xa, +0xe, +0x4d, +0x41, +0x58, +0x5f, +0x8a, +0x4d, +0x52, +0x33, +0x32, +0xa, +0x16, +0x4c, +0x45, +0x4e, +0x5f, +0x70, +0x4d, +0x49, +0x4e, +0x4c, +0x4d, +0x49, +0x4e, +0x5f, +0x70, +0x4d, +0x41, +0x58, +0x4c, +0x4d, +0x41, +0x58, +0x5f, +0x70, +0x4c, +0x45, +0x4e, +0x4c, +0x4c, +0x45, +0x4e, +0x5f, +0x5b, +0x27, +0x4d, +0x4c, +0x43, +0x4b, +0xa4, +0x4d, +0x52, +0x33, +0x32, +0x5b, +0x27, +0x4d, +0x4c, +0x43, +0x4b, +0xa4, +0x4d, +0x52, +0x36, +0x34, +0x14, +0x24, +0x4d, +0x50, +0x58, +0x4d, +0x1, +0x5b, +0x23, +0x4d, +0x4c, +0x43, +0x4b, +0xff, +0xff, +0x70, +0x99, +0x68, +0x0, +0x4d, +0x53, +0x45, +0x4c, +0x70, +0x4d, +0x50, +0x58, +0x5f, +0x60, +0x5b, +0x27, +0x4d, +0x4c, +0x43, +0x4b, +0xa4, +0x60, +0x14, +0x28, +0x4d, +0x4f, +0x53, +0x54, +0x4, +0x5b, +0x23, +0x4d, +0x4c, +0x43, +0x4b, +0xff, +0xff, +0x70, +0x99, +0x68, +0x0, +0x4d, +0x53, +0x45, +0x4c, +0x70, +0x69, +0x4d, +0x4f, +0x45, +0x56, +0x70, +0x6a, +0x4d, +0x4f, +0x53, +0x43, +0x5b, +0x27, +0x4d, +0x4c, +0x43, +0x4b }; static unsigned char ssdt_isa_pest[] = { 0xd0 diff --git a/hw/i386/ssdt-pcihp.hex.generated b/hw/i386/ssdt-pcihp.hex.generated index b599b4663c..72ffa84800 100644 --- a/hw/i386/ssdt-pcihp.hex.generated +++ b/hw/i386/ssdt-pcihp.hex.generated @@ -32,7 +32,7 @@ static unsigned char ssdp_pcihp_aml[] = { 0x0, 0x0, 0x1, -0x6b, +0x70, 0x42, 0x58, 0x50, @@ -55,8 +55,8 @@ static unsigned char ssdp_pcihp_aml[] = { 0x4e, 0x54, 0x4c, -0x23, -0x8, +0x15, +0x11, 0x13, 0x20, 0x10, diff --git a/hw/i386/ssdt-proc.hex.generated b/hw/i386/ssdt-proc.hex.generated index 97e28d4820..4df0734c79 100644 --- a/hw/i386/ssdt-proc.hex.generated +++ b/hw/i386/ssdt-proc.hex.generated @@ -11,7 +11,7 @@ static unsigned char ssdp_proc_aml[] = { 0x0, 0x0, 0x1, -0x78, +0x7d, 0x42, 0x58, 0x50, @@ -34,8 +34,8 @@ static unsigned char ssdp_proc_aml[] = { 0x4e, 0x54, 0x4c, -0x23, -0x8, +0x15, +0x11, 0x13, 0x20, 0x5b, diff --git a/hw/isa/lpc_ich9.c b/hw/isa/lpc_ich9.c index 97f69d6001..b846d81990 100644 --- a/hw/isa/lpc_ich9.c +++ b/hw/isa/lpc_ich9.c @@ -563,7 +563,14 @@ static void ich9_lpc_add_properties(ICH9LPCState *lpc) ich9_pm_add_properties(OBJECT(lpc), &lpc->pm, NULL); } -static int ich9_lpc_initfn(PCIDevice *d) +static void ich9_lpc_initfn(Object *obj) +{ + ICH9LPCState *lpc = ICH9_LPC_DEVICE(obj); + + ich9_lpc_add_properties(lpc); +} + +static int ich9_lpc_init(PCIDevice *d) { ICH9LPCState *lpc = ICH9_LPC_DEVICE(d); ISABus *isa_bus; @@ -589,10 +596,22 @@ static int ich9_lpc_initfn(PCIDevice *d) memory_region_add_subregion_overlap(pci_address_space_io(d), ICH9_RST_CNT_IOPORT, &lpc->rst_cnt_mem, 1); + return 0; +} - ich9_lpc_add_properties(lpc); +static void ich9_device_plug_cb(HotplugHandler *hotplug_dev, + DeviceState *dev, Error **errp) +{ + ICH9LPCState *lpc = ICH9_LPC_DEVICE(hotplug_dev); - return 0; + ich9_pm_device_plug_cb(&lpc->pm, dev, errp); +} + +static void ich9_device_unplug_cb(HotplugHandler *hotplug_dev, + DeviceState *dev, Error **errp) +{ + error_setg(errp, "acpi: device unplug request for not supported device" + " type: %s", object_get_typename(OBJECT(dev))); } static bool ich9_rst_cnt_needed(void *opaque) @@ -638,10 +657,12 @@ static void ich9_lpc_class_init(ObjectClass *klass, void *data) { DeviceClass *dc = DEVICE_CLASS(klass); PCIDeviceClass *k = PCI_DEVICE_CLASS(klass); + HotplugHandlerClass *hc = HOTPLUG_HANDLER_CLASS(klass); + AcpiDeviceIfClass *adevc = ACPI_DEVICE_IF_CLASS(klass); set_bit(DEVICE_CATEGORY_BRIDGE, dc->categories); dc->reset = ich9_lpc_reset; - k->init = ich9_lpc_initfn; + k->init = ich9_lpc_init; dc->vmsd = &vmstate_ich9_lpc; k->config_write = ich9_lpc_config_write; dc->desc = "ICH9 LPC bridge"; @@ -654,13 +675,22 @@ static void ich9_lpc_class_init(ObjectClass *klass, void *data) * pc_q35_init() */ dc->cannot_instantiate_with_device_add_yet = true; + hc->plug = ich9_device_plug_cb; + hc->unplug = ich9_device_unplug_cb; + adevc->ospm_status = ich9_pm_ospm_status; } static const TypeInfo ich9_lpc_info = { .name = TYPE_ICH9_LPC_DEVICE, .parent = TYPE_PCI_DEVICE, .instance_size = sizeof(struct ICH9LPCState), + .instance_init = ich9_lpc_initfn, .class_init = ich9_lpc_class_init, + .interfaces = (InterfaceInfo[]) { + { TYPE_HOTPLUG_HANDLER }, + { TYPE_ACPI_DEVICE_IF }, + { } + } }; static void ich9_lpc_register(void) diff --git a/hw/mem/Makefile.objs b/hw/mem/Makefile.objs new file mode 100644 index 0000000000..b000fb42bf --- /dev/null +++ b/hw/mem/Makefile.objs @@ -0,0 +1 @@ +common-obj-$(CONFIG_MEM_HOTPLUG) += pc-dimm.o diff --git a/hw/mem/pc-dimm.c b/hw/mem/pc-dimm.c new file mode 100644 index 0000000000..ad176b700b --- /dev/null +++ b/hw/mem/pc-dimm.c @@ -0,0 +1,281 @@ +/* + * Dimm device for Memory Hotplug + * + * Copyright ProfitBricks GmbH 2012 + * Copyright (C) 2014 Red Hat Inc + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2 of the License, or (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library; if not, see <http://www.gnu.org/licenses/> + */ + +#include "hw/mem/pc-dimm.h" +#include "qemu/config-file.h" +#include "qapi/visitor.h" +#include "qemu/range.h" + +int qmp_pc_dimm_device_list(Object *obj, void *opaque) +{ + MemoryDeviceInfoList ***prev = opaque; + + if (object_dynamic_cast(obj, TYPE_PC_DIMM)) { + DeviceState *dev = DEVICE(obj); + + if (dev->realized) { + MemoryDeviceInfoList *elem = g_new0(MemoryDeviceInfoList, 1); + MemoryDeviceInfo *info = g_new0(MemoryDeviceInfo, 1); + PCDIMMDeviceInfo *di = g_new0(PCDIMMDeviceInfo, 1); + DeviceClass *dc = DEVICE_GET_CLASS(obj); + PCDIMMDevice *dimm = PC_DIMM(obj); + + if (dev->id) { + di->has_id = true; + di->id = g_strdup(dev->id); + } + di->hotplugged = dev->hotplugged; + di->hotpluggable = dc->hotpluggable; + di->addr = dimm->addr; + di->slot = dimm->slot; + di->node = dimm->node; + di->size = object_property_get_int(OBJECT(dimm), PC_DIMM_SIZE_PROP, + NULL); + di->memdev = object_get_canonical_path(OBJECT(dimm->hostmem)); + + info->dimm = di; + elem->value = info; + elem->next = NULL; + **prev = elem; + *prev = &elem->next; + } + } + + object_child_foreach(obj, qmp_pc_dimm_device_list, opaque); + return 0; +} + +static int pc_dimm_slot2bitmap(Object *obj, void *opaque) +{ + unsigned long *bitmap = opaque; + + if (object_dynamic_cast(obj, TYPE_PC_DIMM)) { + DeviceState *dev = DEVICE(obj); + if (dev->realized) { /* count only realized DIMMs */ + PCDIMMDevice *d = PC_DIMM(obj); + set_bit(d->slot, bitmap); + } + } + + object_child_foreach(obj, pc_dimm_slot2bitmap, opaque); + return 0; +} + +int pc_dimm_get_free_slot(const int *hint, int max_slots, Error **errp) +{ + unsigned long *bitmap = bitmap_new(max_slots); + int slot = 0; + + object_child_foreach(qdev_get_machine(), pc_dimm_slot2bitmap, bitmap); + + /* check if requested slot is not occupied */ + if (hint) { + if (*hint >= max_slots) { + error_setg(errp, "invalid slot# %d, should be less than %d", + *hint, max_slots); + } else if (!test_bit(*hint, bitmap)) { + slot = *hint; + } else { + error_setg(errp, "slot %d is busy", *hint); + } + goto out; + } + + /* search for free slot */ + slot = find_first_zero_bit(bitmap, max_slots); + if (slot == max_slots) { + error_setg(errp, "no free slots available"); + } +out: + g_free(bitmap); + return slot; +} + +static gint pc_dimm_addr_sort(gconstpointer a, gconstpointer b) +{ + PCDIMMDevice *x = PC_DIMM(a); + PCDIMMDevice *y = PC_DIMM(b); + Int128 diff = int128_sub(int128_make64(x->addr), int128_make64(y->addr)); + + if (int128_lt(diff, int128_zero())) { + return -1; + } else if (int128_gt(diff, int128_zero())) { + return 1; + } + return 0; +} + +static int pc_dimm_built_list(Object *obj, void *opaque) +{ + GSList **list = opaque; + + if (object_dynamic_cast(obj, TYPE_PC_DIMM)) { + DeviceState *dev = DEVICE(obj); + if (dev->realized) { /* only realized DIMMs matter */ + *list = g_slist_insert_sorted(*list, dev, pc_dimm_addr_sort); + } + } + + object_child_foreach(obj, pc_dimm_built_list, opaque); + return 0; +} + +uint64_t pc_dimm_get_free_addr(uint64_t address_space_start, + uint64_t address_space_size, + uint64_t *hint, uint64_t size, + Error **errp) +{ + GSList *list = NULL, *item; + uint64_t new_addr, ret = 0; + uint64_t address_space_end = address_space_start + address_space_size; + + assert(address_space_end > address_space_size); + object_child_foreach(qdev_get_machine(), pc_dimm_built_list, &list); + + if (hint) { + new_addr = *hint; + } else { + new_addr = address_space_start; + } + + /* find address range that will fit new DIMM */ + for (item = list; item; item = g_slist_next(item)) { + PCDIMMDevice *dimm = item->data; + uint64_t dimm_size = object_property_get_int(OBJECT(dimm), + PC_DIMM_SIZE_PROP, + errp); + if (errp && *errp) { + goto out; + } + + if (ranges_overlap(dimm->addr, dimm_size, new_addr, size)) { + if (hint) { + DeviceState *d = DEVICE(dimm); + error_setg(errp, "address range conflicts with '%s'", d->id); + goto out; + } + new_addr = dimm->addr + dimm_size; + } + } + ret = new_addr; + + if (new_addr < address_space_start) { + error_setg(errp, "can't add memory [0x%" PRIx64 ":0x%" PRIx64 + "] at 0x%" PRIx64, new_addr, size, address_space_start); + } else if ((new_addr + size) > address_space_end) { + error_setg(errp, "can't add memory [0x%" PRIx64 ":0x%" PRIx64 + "] beyond 0x%" PRIx64, new_addr, size, address_space_end); + } + +out: + g_slist_free(list); + return ret; +} + +static Property pc_dimm_properties[] = { + DEFINE_PROP_UINT64(PC_DIMM_ADDR_PROP, PCDIMMDevice, addr, 0), + DEFINE_PROP_UINT32(PC_DIMM_NODE_PROP, PCDIMMDevice, node, 0), + DEFINE_PROP_INT32(PC_DIMM_SLOT_PROP, PCDIMMDevice, slot, + PC_DIMM_UNASSIGNED_SLOT), + DEFINE_PROP_END_OF_LIST(), +}; + +static void pc_dimm_get_size(Object *obj, Visitor *v, void *opaque, + const char *name, Error **errp) +{ + int64_t value; + MemoryRegion *mr; + PCDIMMDevice *dimm = PC_DIMM(obj); + + mr = host_memory_backend_get_memory(dimm->hostmem, errp); + value = memory_region_size(mr); + + visit_type_int(v, &value, name, errp); +} + +static void pc_dimm_check_memdev_is_busy(Object *obj, const char *name, + Object *val, Error **errp) +{ + MemoryRegion *mr; + + mr = host_memory_backend_get_memory(MEMORY_BACKEND(val), errp); + if (memory_region_is_mapped(mr)) { + char *path = object_get_canonical_path_component(val); + error_setg(errp, "can't use already busy memdev: %s", path); + g_free(path); + } else { + qdev_prop_allow_set_link_before_realize(obj, name, val, errp); + } +} + +static void pc_dimm_init(Object *obj) +{ + PCDIMMDevice *dimm = PC_DIMM(obj); + + object_property_add(obj, PC_DIMM_SIZE_PROP, "int", pc_dimm_get_size, + NULL, NULL, NULL, &error_abort); + object_property_add_link(obj, PC_DIMM_MEMDEV_PROP, TYPE_MEMORY_BACKEND, + (Object **)&dimm->hostmem, + pc_dimm_check_memdev_is_busy, + OBJ_PROP_LINK_UNREF_ON_RELEASE, + &error_abort); +} + +static void pc_dimm_realize(DeviceState *dev, Error **errp) +{ + PCDIMMDevice *dimm = PC_DIMM(dev); + + if (!dimm->hostmem) { + error_setg(errp, "'" PC_DIMM_MEMDEV_PROP "' property is not set"); + return; + } +} + +static MemoryRegion *pc_dimm_get_memory_region(PCDIMMDevice *dimm) +{ + return host_memory_backend_get_memory(dimm->hostmem, &error_abort); +} + +static void pc_dimm_class_init(ObjectClass *oc, void *data) +{ + DeviceClass *dc = DEVICE_CLASS(oc); + PCDIMMDeviceClass *ddc = PC_DIMM_CLASS(oc); + + dc->realize = pc_dimm_realize; + dc->props = pc_dimm_properties; + + ddc->get_memory_region = pc_dimm_get_memory_region; +} + +static TypeInfo pc_dimm_info = { + .name = TYPE_PC_DIMM, + .parent = TYPE_DEVICE, + .instance_size = sizeof(PCDIMMDevice), + .instance_init = pc_dimm_init, + .class_init = pc_dimm_class_init, + .class_size = sizeof(PCDIMMDeviceClass), +}; + +static void pc_dimm_register_types(void) +{ + type_register_static(&pc_dimm_info); +} + +type_init(pc_dimm_register_types) diff --git a/hw/mips/mips_malta.c b/hw/mips/mips_malta.c index f4a7d47129..3c04342236 100644 --- a/hw/mips/mips_malta.c +++ b/hw/mips/mips_malta.c @@ -1104,7 +1104,7 @@ void mips_malta_init(MachineState *machine) pci_piix4_ide_init(pci_bus, hd, piix4_devfn + 1); pci_create_simple(pci_bus, piix4_devfn + 2, "piix4-usb-uhci"); smbus = piix4_pm_init(pci_bus, piix4_devfn + 3, 0x1100, - isa_get_irq(NULL, 9), NULL, 0, NULL); + isa_get_irq(NULL, 9), NULL, 0, NULL, NULL); smbus_eeprom_init(smbus, 8, smbus_eeprom_buf, smbus_eeprom_size); g_free(smbus_eeprom_buf); pit = pit_init(isa_bus, 0x40, 0, NULL); diff --git a/hw/net/vhost_net.c b/hw/net/vhost_net.c index a1de2f43a0..7ac7c21bdb 100644 --- a/hw/net/vhost_net.c +++ b/hw/net/vhost_net.c @@ -15,6 +15,7 @@ #include "net/net.h" #include "net/tap.h" +#include "net/vhost-user.h" #include "hw/virtio/virtio-net.h" #include "net/vhost_net.h" @@ -27,7 +28,6 @@ #include <sys/socket.h> #include <linux/kvm.h> #include <fcntl.h> -#include <sys/ioctl.h> #include <linux/virtio_ring.h> #include <netpacket/packet.h> #include <net/ethernet.h> @@ -46,39 +46,76 @@ struct vhost_net { NetClientState *nc; }; -unsigned vhost_net_get_features(struct vhost_net *net, unsigned features) +/* Features supported by host kernel. */ +static const int kernel_feature_bits[] = { + VIRTIO_F_NOTIFY_ON_EMPTY, + VIRTIO_RING_F_INDIRECT_DESC, + VIRTIO_RING_F_EVENT_IDX, + VIRTIO_NET_F_MRG_RXBUF, + VHOST_INVALID_FEATURE_BIT +}; + +/* Features supported by others. */ +const int user_feature_bits[] = { + VIRTIO_F_NOTIFY_ON_EMPTY, + VIRTIO_RING_F_INDIRECT_DESC, + VIRTIO_RING_F_EVENT_IDX, + + VIRTIO_F_ANY_LAYOUT, + VIRTIO_NET_F_CSUM, + VIRTIO_NET_F_GUEST_CSUM, + VIRTIO_NET_F_GSO, + VIRTIO_NET_F_GUEST_TSO4, + VIRTIO_NET_F_GUEST_TSO6, + VIRTIO_NET_F_GUEST_ECN, + VIRTIO_NET_F_GUEST_UFO, + VIRTIO_NET_F_HOST_TSO4, + VIRTIO_NET_F_HOST_TSO6, + VIRTIO_NET_F_HOST_ECN, + VIRTIO_NET_F_HOST_UFO, + VIRTIO_NET_F_MRG_RXBUF, + VIRTIO_NET_F_STATUS, + VIRTIO_NET_F_CTRL_VQ, + VIRTIO_NET_F_CTRL_RX, + VIRTIO_NET_F_CTRL_VLAN, + VIRTIO_NET_F_CTRL_RX_EXTRA, + VIRTIO_NET_F_CTRL_MAC_ADDR, + VIRTIO_NET_F_CTRL_GUEST_OFFLOADS, + + VIRTIO_NET_F_MQ, + + VHOST_INVALID_FEATURE_BIT +}; + +static const int *vhost_net_get_feature_bits(struct vhost_net *net) { - /* Clear features not supported by host kernel. */ - if (!(net->dev.features & (1 << VIRTIO_F_NOTIFY_ON_EMPTY))) { - features &= ~(1 << VIRTIO_F_NOTIFY_ON_EMPTY); - } - if (!(net->dev.features & (1 << VIRTIO_RING_F_INDIRECT_DESC))) { - features &= ~(1 << VIRTIO_RING_F_INDIRECT_DESC); - } - if (!(net->dev.features & (1 << VIRTIO_RING_F_EVENT_IDX))) { - features &= ~(1 << VIRTIO_RING_F_EVENT_IDX); - } - if (!(net->dev.features & (1 << VIRTIO_NET_F_MRG_RXBUF))) { - features &= ~(1 << VIRTIO_NET_F_MRG_RXBUF); + const int *feature_bits = 0; + + switch (net->nc->info->type) { + case NET_CLIENT_OPTIONS_KIND_TAP: + feature_bits = kernel_feature_bits; + break; + case NET_CLIENT_OPTIONS_KIND_VHOST_USER: + feature_bits = user_feature_bits; + break; + default: + error_report("Feature bits not defined for this type: %d", + net->nc->info->type); + break; } - return features; + + return feature_bits; +} + +unsigned vhost_net_get_features(struct vhost_net *net, unsigned features) +{ + return vhost_get_features(&net->dev, vhost_net_get_feature_bits(net), + features); } void vhost_net_ack_features(struct vhost_net *net, unsigned features) { - net->dev.acked_features = net->dev.backend_features; - if (features & (1 << VIRTIO_F_NOTIFY_ON_EMPTY)) { - net->dev.acked_features |= (1 << VIRTIO_F_NOTIFY_ON_EMPTY); - } - if (features & (1 << VIRTIO_RING_F_INDIRECT_DESC)) { - net->dev.acked_features |= (1 << VIRTIO_RING_F_INDIRECT_DESC); - } - if (features & (1 << VIRTIO_RING_F_EVENT_IDX)) { - net->dev.acked_features |= (1 << VIRTIO_RING_F_EVENT_IDX); - } - if (features & (1 << VIRTIO_NET_F_MRG_RXBUF)) { - net->dev.acked_features |= (1 << VIRTIO_NET_F_MRG_RXBUF); - } + vhost_ack_features(&net->dev, vhost_net_get_feature_bits(net), features); } static int vhost_net_get_fd(NetClientState *backend) @@ -92,42 +129,52 @@ static int vhost_net_get_fd(NetClientState *backend) } } -struct vhost_net *vhost_net_init(NetClientState *backend, int devfd, - bool force) +struct vhost_net *vhost_net_init(VhostNetOptions *options) { int r; + bool backend_kernel = options->backend_type == VHOST_BACKEND_TYPE_KERNEL; struct vhost_net *net = g_malloc(sizeof *net); - if (!backend) { - fprintf(stderr, "vhost-net requires backend to be setup\n"); + + if (!options->net_backend) { + fprintf(stderr, "vhost-net requires net backend to be setup\n"); goto fail; } - r = vhost_net_get_fd(backend); - if (r < 0) { - goto fail; + + if (backend_kernel) { + r = vhost_net_get_fd(options->net_backend); + if (r < 0) { + goto fail; + } + net->dev.backend_features = qemu_has_vnet_hdr(options->net_backend) + ? 0 : (1 << VHOST_NET_F_VIRTIO_NET_HDR); + net->backend = r; + } else { + net->dev.backend_features = 0; + net->backend = -1; } - net->nc = backend; - net->dev.backend_features = qemu_has_vnet_hdr(backend) ? 0 : - (1 << VHOST_NET_F_VIRTIO_NET_HDR); - net->backend = r; + net->nc = options->net_backend; net->dev.nvqs = 2; net->dev.vqs = net->vqs; - r = vhost_dev_init(&net->dev, devfd, "/dev/vhost-net", force); + r = vhost_dev_init(&net->dev, options->opaque, + options->backend_type, options->force); if (r < 0) { goto fail; } - if (!qemu_has_vnet_hdr_len(backend, + if (!qemu_has_vnet_hdr_len(options->net_backend, sizeof(struct virtio_net_hdr_mrg_rxbuf))) { net->dev.features &= ~(1 << VIRTIO_NET_F_MRG_RXBUF); } - if (~net->dev.features & net->dev.backend_features) { - fprintf(stderr, "vhost lacks feature mask %" PRIu64 " for backend\n", - (uint64_t)(~net->dev.features & net->dev.backend_features)); - vhost_dev_cleanup(&net->dev); - goto fail; + if (backend_kernel) { + if (~net->dev.features & net->dev.backend_features) { + fprintf(stderr, "vhost lacks feature mask %" PRIu64 + " for backend\n", + (uint64_t)(~net->dev.features & net->dev.backend_features)); + vhost_dev_cleanup(&net->dev); + goto fail; + } } - /* Set sane init value. Override when guest acks. */ vhost_net_ack_features(net, 0); return net; @@ -166,24 +213,37 @@ static int vhost_net_start_one(struct vhost_net *net, goto fail_start; } - net->nc->info->poll(net->nc, false); - qemu_set_fd_handler(net->backend, NULL, NULL, NULL); - file.fd = net->backend; - for (file.index = 0; file.index < net->dev.nvqs; ++file.index) { - r = ioctl(net->dev.control, VHOST_NET_SET_BACKEND, &file); - if (r < 0) { - r = -errno; - goto fail; + if (net->nc->info->poll) { + net->nc->info->poll(net->nc, false); + } + + if (net->nc->info->type == NET_CLIENT_OPTIONS_KIND_TAP) { + qemu_set_fd_handler(net->backend, NULL, NULL, NULL); + file.fd = net->backend; + for (file.index = 0; file.index < net->dev.nvqs; ++file.index) { + const VhostOps *vhost_ops = net->dev.vhost_ops; + r = vhost_ops->vhost_call(&net->dev, VHOST_NET_SET_BACKEND, + &file); + if (r < 0) { + r = -errno; + goto fail; + } } } return 0; fail: file.fd = -1; - while (file.index-- > 0) { - int r = ioctl(net->dev.control, VHOST_NET_SET_BACKEND, &file); - assert(r >= 0); + if (net->nc->info->type == NET_CLIENT_OPTIONS_KIND_TAP) { + while (file.index-- > 0) { + const VhostOps *vhost_ops = net->dev.vhost_ops; + int r = vhost_ops->vhost_call(&net->dev, VHOST_NET_SET_BACKEND, + &file); + assert(r >= 0); + } + } + if (net->nc->info->poll) { + net->nc->info->poll(net->nc, true); } - net->nc->info->poll(net->nc, true); vhost_dev_stop(&net->dev, dev); fail_start: vhost_dev_disable_notifiers(&net->dev, dev); @@ -200,11 +260,17 @@ static void vhost_net_stop_one(struct vhost_net *net, return; } - for (file.index = 0; file.index < net->dev.nvqs; ++file.index) { - int r = ioctl(net->dev.control, VHOST_NET_SET_BACKEND, &file); - assert(r >= 0); + if (net->nc->info->type == NET_CLIENT_OPTIONS_KIND_TAP) { + for (file.index = 0; file.index < net->dev.nvqs; ++file.index) { + const VhostOps *vhost_ops = net->dev.vhost_ops; + int r = vhost_ops->vhost_call(&net->dev, VHOST_NET_SET_BACKEND, + &file); + assert(r >= 0); + } + } + if (net->nc->info->poll) { + net->nc->info->poll(net->nc, true); } - net->nc->info->poll(net->nc, true); vhost_dev_stop(&net->dev, dev); vhost_dev_disable_notifiers(&net->dev, dev); } @@ -224,7 +290,7 @@ int vhost_net_start(VirtIODevice *dev, NetClientState *ncs, } for (i = 0; i < total_queues; i++) { - r = vhost_net_start_one(tap_get_vhost_net(ncs[i].peer), dev, i * 2); + r = vhost_net_start_one(get_vhost_net(ncs[i].peer), dev, i * 2); if (r < 0) { goto err; @@ -241,7 +307,7 @@ int vhost_net_start(VirtIODevice *dev, NetClientState *ncs, err: while (--i >= 0) { - vhost_net_stop_one(tap_get_vhost_net(ncs[i].peer), dev); + vhost_net_stop_one(get_vhost_net(ncs[i].peer), dev); } return r; } @@ -262,7 +328,7 @@ void vhost_net_stop(VirtIODevice *dev, NetClientState *ncs, assert(r >= 0); for (i = 0; i < total_queues; i++) { - vhost_net_stop_one(tap_get_vhost_net(ncs[i].peer), dev); + vhost_net_stop_one(get_vhost_net(ncs[i].peer), dev); } } @@ -282,9 +348,30 @@ void vhost_net_virtqueue_mask(VHostNetState *net, VirtIODevice *dev, { vhost_virtqueue_mask(&net->dev, dev, idx, mask); } + +VHostNetState *get_vhost_net(NetClientState *nc) +{ + VHostNetState *vhost_net = 0; + + if (!nc) { + return 0; + } + + switch (nc->info->type) { + case NET_CLIENT_OPTIONS_KIND_TAP: + vhost_net = tap_get_vhost_net(nc); + break; + case NET_CLIENT_OPTIONS_KIND_VHOST_USER: + vhost_net = vhost_user_get_vhost_net(nc); + break; + default: + break; + } + + return vhost_net; +} #else -struct vhost_net *vhost_net_init(NetClientState *backend, int devfd, - bool force) +struct vhost_net *vhost_net_init(VhostNetOptions *options) { error_report("vhost-net support is not compiled in"); return NULL; @@ -328,4 +415,9 @@ void vhost_net_virtqueue_mask(VHostNetState *net, VirtIODevice *dev, int idx, bool mask) { } + +VHostNetState *get_vhost_net(NetClientState *nc) +{ + return 0; +} #endif diff --git a/hw/net/virtio-net.c b/hw/net/virtio-net.c index 940a7cfe54..d8588f3808 100644 --- a/hw/net/virtio-net.c +++ b/hw/net/virtio-net.c @@ -99,20 +99,23 @@ static bool virtio_net_started(VirtIONet *n, uint8_t status) (n->status & VIRTIO_NET_S_LINK_UP) && vdev->vm_running; } +static void virtio_net_announce_timer(void *opaque) +{ + VirtIONet *n = opaque; + VirtIODevice *vdev = VIRTIO_DEVICE(n); + + n->announce_counter--; + n->status |= VIRTIO_NET_S_ANNOUNCE; + virtio_notify_config(vdev); +} + static void virtio_net_vhost_status(VirtIONet *n, uint8_t status) { VirtIODevice *vdev = VIRTIO_DEVICE(n); NetClientState *nc = qemu_get_queue(n->nic); int queues = n->multiqueue ? n->max_queues : 1; - if (!nc->peer) { - return; - } - if (nc->peer->info->type != NET_CLIENT_OPTIONS_KIND_TAP) { - return; - } - - if (!tap_get_vhost_net(nc->peer)) { + if (!get_vhost_net(nc->peer)) { return; } @@ -122,7 +125,7 @@ static void virtio_net_vhost_status(VirtIONet *n, uint8_t status) } if (!n->vhost_started) { int r; - if (!vhost_net_query(tap_get_vhost_net(nc->peer), vdev)) { + if (!vhost_net_query(get_vhost_net(nc->peer), vdev)) { return; } n->vhost_started = 1; @@ -322,6 +325,9 @@ static void virtio_net_reset(VirtIODevice *vdev) n->nobcast = 0; /* multiqueue is disabled by default */ n->curr_queues = 1; + timer_del(n->announce_timer); + n->announce_counter = 0; + n->status &= ~VIRTIO_NET_S_ANNOUNCE; /* Flush any MAC and VLAN filter table state */ n->mac_table.in_use = 0; @@ -452,13 +458,10 @@ static uint32_t virtio_net_get_features(VirtIODevice *vdev, uint32_t features) features &= ~(0x1 << VIRTIO_NET_F_HOST_UFO); } - if (!nc->peer || nc->peer->info->type != NET_CLIENT_OPTIONS_KIND_TAP) { + if (!get_vhost_net(nc->peer)) { return features; } - if (!tap_get_vhost_net(nc->peer)) { - return features; - } - return vhost_net_get_features(tap_get_vhost_net(nc->peer), features); + return vhost_net_get_features(get_vhost_net(nc->peer), features); } static uint32_t virtio_net_bad_features(VirtIODevice *vdev) @@ -522,13 +525,10 @@ static void virtio_net_set_features(VirtIODevice *vdev, uint32_t features) for (i = 0; i < n->max_queues; i++) { NetClientState *nc = qemu_get_subqueue(n->nic, i); - if (!nc->peer || nc->peer->info->type != NET_CLIENT_OPTIONS_KIND_TAP) { - continue; - } - if (!tap_get_vhost_net(nc->peer)) { + if (!get_vhost_net(nc->peer)) { continue; } - vhost_net_ack_features(tap_get_vhost_net(nc->peer), features); + vhost_net_ack_features(get_vhost_net(nc->peer), features); } if ((1 << VIRTIO_NET_F_CTRL_VLAN) & features) { @@ -731,6 +731,23 @@ static int virtio_net_handle_vlan_table(VirtIONet *n, uint8_t cmd, return VIRTIO_NET_OK; } +static int virtio_net_handle_announce(VirtIONet *n, uint8_t cmd, + struct iovec *iov, unsigned int iov_cnt) +{ + if (cmd == VIRTIO_NET_CTRL_ANNOUNCE_ACK && + n->status & VIRTIO_NET_S_ANNOUNCE) { + n->status &= ~VIRTIO_NET_S_ANNOUNCE; + if (n->announce_counter) { + timer_mod(n->announce_timer, + qemu_clock_get_ms(QEMU_CLOCK_VIRTUAL) + + self_announce_delay(n->announce_counter)); + } + return VIRTIO_NET_OK; + } else { + return VIRTIO_NET_ERR; + } +} + static int virtio_net_handle_mq(VirtIONet *n, uint8_t cmd, struct iovec *iov, unsigned int iov_cnt) { @@ -794,6 +811,8 @@ static void virtio_net_handle_ctrl(VirtIODevice *vdev, VirtQueue *vq) status = virtio_net_handle_mac(n, ctrl.cmd, iov, iov_cnt); } else if (ctrl.class == VIRTIO_NET_CTRL_VLAN) { status = virtio_net_handle_vlan_table(n, ctrl.cmd, iov, iov_cnt); + } else if (ctrl.class == VIRTIO_NET_CTRL_ANNOUNCE) { + status = virtio_net_handle_announce(n, ctrl.cmd, iov, iov_cnt); } else if (ctrl.class == VIRTIO_NET_CTRL_MQ) { status = virtio_net_handle_mq(n, ctrl.cmd, iov, iov_cnt); } else if (ctrl.class == VIRTIO_NET_CTRL_GUEST_OFFLOADS) { @@ -1451,6 +1470,12 @@ static int virtio_net_load(QEMUFile *f, void *opaque, int version_id) qemu_get_subqueue(n->nic, i)->link_down = link_down; } + if (vdev->guest_features & (0x1 << VIRTIO_NET_F_GUEST_ANNOUNCE) && + vdev->guest_features & (0x1 << VIRTIO_NET_F_CTRL_VQ)) { + n->announce_counter = SELF_ANNOUNCE_ROUNDS; + timer_mod(n->announce_timer, qemu_clock_get_ms(QEMU_CLOCK_VIRTUAL)); + } + return 0; } @@ -1476,7 +1501,7 @@ static bool virtio_net_guest_notifier_pending(VirtIODevice *vdev, int idx) VirtIONet *n = VIRTIO_NET(vdev); NetClientState *nc = qemu_get_subqueue(n->nic, vq2q(idx)); assert(n->vhost_started); - return vhost_net_virtqueue_pending(tap_get_vhost_net(nc->peer), idx); + return vhost_net_virtqueue_pending(get_vhost_net(nc->peer), idx); } static void virtio_net_guest_notifier_mask(VirtIODevice *vdev, int idx, @@ -1485,7 +1510,7 @@ static void virtio_net_guest_notifier_mask(VirtIODevice *vdev, int idx, VirtIONet *n = VIRTIO_NET(vdev); NetClientState *nc = qemu_get_subqueue(n->nic, vq2q(idx)); assert(n->vhost_started); - vhost_net_virtqueue_mask(tap_get_vhost_net(nc->peer), + vhost_net_virtqueue_mask(get_vhost_net(nc->peer), vdev, idx, mask); } @@ -1509,18 +1534,9 @@ void virtio_net_set_netclient_name(VirtIONet *n, const char *name, */ assert(type != NULL); - if (n->netclient_name) { - g_free(n->netclient_name); - n->netclient_name = NULL; - } - if (n->netclient_type) { - g_free(n->netclient_type); - n->netclient_type = NULL; - } - - if (name != NULL) { - n->netclient_name = g_strdup(name); - } + g_free(n->netclient_name); + g_free(n->netclient_type); + n->netclient_name = g_strdup(name); n->netclient_type = g_strdup(type); } @@ -1562,6 +1578,8 @@ static void virtio_net_device_realize(DeviceState *dev, Error **errp) qemu_macaddr_default_if_unset(&n->nic_conf.macaddr); memcpy(&n->mac[0], &n->nic_conf.macaddr, sizeof(n->mac)); n->status = VIRTIO_NET_S_LINK_UP; + n->announce_timer = timer_new_ms(QEMU_CLOCK_VIRTUAL, + virtio_net_announce_timer, n); if (n->netclient_type) { /* @@ -1616,14 +1634,10 @@ static void virtio_net_device_unrealize(DeviceState *dev, Error **errp) unregister_savevm(dev, "virtio-net", n); - if (n->netclient_name) { - g_free(n->netclient_name); - n->netclient_name = NULL; - } - if (n->netclient_type) { - g_free(n->netclient_type); - n->netclient_type = NULL; - } + g_free(n->netclient_name); + n->netclient_name = NULL; + g_free(n->netclient_type); + n->netclient_type = NULL; g_free(n->mac_table.macs); g_free(n->vlans); @@ -1642,6 +1656,8 @@ static void virtio_net_device_unrealize(DeviceState *dev, Error **errp) } } + timer_del(n->announce_timer); + timer_free(n->announce_timer); g_free(n->vqs); qemu_del_nic(n->nic); virtio_cleanup(vdev); diff --git a/hw/ppc/spapr.c b/hw/ppc/spapr.c index e06321cf15..82f183f173 100644 --- a/hw/ppc/spapr.c +++ b/hw/ppc/spapr.c @@ -673,8 +673,8 @@ static int spapr_populate_memory(sPAPREnvironment *spapr, void *fdt) int i, off; /* memory node(s) */ - if (nb_numa_nodes > 1 && node_mem[0] < ram_size) { - node0_size = node_mem[0]; + if (nb_numa_nodes > 1 && numa_info[0].node_mem < ram_size) { + node0_size = numa_info[0].node_mem; } else { node0_size = ram_size; } @@ -712,7 +712,7 @@ static int spapr_populate_memory(sPAPREnvironment *spapr, void *fdt) if (mem_start >= ram_size) { node_size = 0; } else { - node_size = node_mem[i]; + node_size = numa_info[i].node_mem; if (node_size > ram_size - mem_start) { node_size = ram_size - mem_start; } @@ -857,7 +857,8 @@ static void spapr_reset_htab(sPAPREnvironment *spapr) /* Update the RMA size if necessary */ if (spapr->vrma_adjust) { - hwaddr node0_size = (nb_numa_nodes > 1) ? node_mem[0] : ram_size; + hwaddr node0_size = (nb_numa_nodes > 1) ? + numa_info[0].node_mem : ram_size; spapr->rma_size = kvmppc_rma_size(node0_size, spapr->htab_shift); } } @@ -1289,7 +1290,7 @@ static void ppc_spapr_init(MachineState *machine) MemoryRegion *sysmem = get_system_memory(); MemoryRegion *ram = g_new(MemoryRegion, 1); hwaddr rma_alloc_size; - hwaddr node0_size = (nb_numa_nodes > 1) ? node_mem[0] : ram_size; + hwaddr node0_size = (nb_numa_nodes > 1) ? numa_info[0].node_mem : ram_size; uint32_t initrd_base = 0; long kernel_size = 0, initrd_size = 0; long load_limit, rtas_limit, fw_size; diff --git a/hw/scsi/vhost-scsi.c b/hw/scsi/vhost-scsi.c index cc8df4ef03..ddfe76aed0 100644 --- a/hw/scsi/vhost-scsi.c +++ b/hw/scsi/vhost-scsi.c @@ -24,15 +24,25 @@ #include "hw/virtio/virtio-scsi.h" #include "hw/virtio/virtio-bus.h" +/* Features supported by host kernel. */ +static const int kernel_feature_bits[] = { + VIRTIO_F_NOTIFY_ON_EMPTY, + VIRTIO_RING_F_INDIRECT_DESC, + VIRTIO_RING_F_EVENT_IDX, + VIRTIO_SCSI_F_HOTPLUG, + VHOST_INVALID_FEATURE_BIT +}; + static int vhost_scsi_set_endpoint(VHostSCSI *s) { VirtIOSCSICommon *vs = VIRTIO_SCSI_COMMON(s); + const VhostOps *vhost_ops = s->dev.vhost_ops; struct vhost_scsi_target backend; int ret; memset(&backend, 0, sizeof(backend)); pstrcpy(backend.vhost_wwpn, sizeof(backend.vhost_wwpn), vs->conf.wwpn); - ret = ioctl(s->dev.control, VHOST_SCSI_SET_ENDPOINT, &backend); + ret = vhost_ops->vhost_call(&s->dev, VHOST_SCSI_SET_ENDPOINT, &backend); if (ret < 0) { return -errno; } @@ -43,10 +53,11 @@ static void vhost_scsi_clear_endpoint(VHostSCSI *s) { VirtIOSCSICommon *vs = VIRTIO_SCSI_COMMON(s); struct vhost_scsi_target backend; + const VhostOps *vhost_ops = s->dev.vhost_ops; memset(&backend, 0, sizeof(backend)); pstrcpy(backend.vhost_wwpn, sizeof(backend.vhost_wwpn), vs->conf.wwpn); - ioctl(s->dev.control, VHOST_SCSI_CLEAR_ENDPOINT, &backend); + vhost_ops->vhost_call(&s->dev, VHOST_SCSI_CLEAR_ENDPOINT, &backend); } static int vhost_scsi_start(VHostSCSI *s) @@ -55,13 +66,15 @@ static int vhost_scsi_start(VHostSCSI *s) VirtIODevice *vdev = VIRTIO_DEVICE(s); BusState *qbus = BUS(qdev_get_parent_bus(DEVICE(vdev))); VirtioBusClass *k = VIRTIO_BUS_GET_CLASS(qbus); + const VhostOps *vhost_ops = s->dev.vhost_ops; if (!k->set_guest_notifiers) { error_report("binding does not support guest notifiers"); return -ENOSYS; } - ret = ioctl(s->dev.control, VHOST_SCSI_GET_ABI_VERSION, &abi_version); + ret = vhost_ops->vhost_call(&s->dev, + VHOST_SCSI_GET_ABI_VERSION, &abi_version); if (ret < 0) { return -errno; } @@ -141,21 +154,7 @@ static uint32_t vhost_scsi_get_features(VirtIODevice *vdev, { VHostSCSI *s = VHOST_SCSI(vdev); - /* Clear features not supported by host kernel. */ - if (!(s->dev.features & (1 << VIRTIO_F_NOTIFY_ON_EMPTY))) { - features &= ~(1 << VIRTIO_F_NOTIFY_ON_EMPTY); - } - if (!(s->dev.features & (1 << VIRTIO_RING_F_INDIRECT_DESC))) { - features &= ~(1 << VIRTIO_RING_F_INDIRECT_DESC); - } - if (!(s->dev.features & (1 << VIRTIO_RING_F_EVENT_IDX))) { - features &= ~(1 << VIRTIO_RING_F_EVENT_IDX); - } - if (!(s->dev.features & (1 << VIRTIO_SCSI_F_HOTPLUG))) { - features &= ~(1 << VIRTIO_SCSI_F_HOTPLUG); - } - - return features; + return vhost_get_features(&s->dev, kernel_feature_bits, features); } static void vhost_scsi_set_config(VirtIODevice *vdev, @@ -219,6 +218,13 @@ static void vhost_scsi_realize(DeviceState *dev, Error **errp) error_setg(errp, "vhost-scsi: unable to parse vhostfd"); return; } + } else { + vhostfd = open("/dev/vhost-scsi", O_RDWR); + if (vhostfd < 0) { + error_setg(errp, "vhost-scsi: open vhost char device failed: %s", + strerror(errno)); + return; + } } virtio_scsi_common_realize(dev, &err, vhost_dummy_handle_output, @@ -233,7 +239,8 @@ static void vhost_scsi_realize(DeviceState *dev, Error **errp) s->dev.vqs = g_new(struct vhost_virtqueue, s->dev.nvqs); s->dev.vq_index = 0; - ret = vhost_dev_init(&s->dev, vhostfd, "/dev/vhost-scsi", true); + ret = vhost_dev_init(&s->dev, (void *)(uintptr_t)vhostfd, + VHOST_BACKEND_TYPE_KERNEL, true); if (ret < 0) { error_setg(errp, "vhost-scsi: vhost initialization failed: %s", strerror(-ret)); diff --git a/hw/virtio/Makefile.objs b/hw/virtio/Makefile.objs index 1ba53d9cc3..ec9e855bc1 100644 --- a/hw/virtio/Makefile.objs +++ b/hw/virtio/Makefile.objs @@ -5,4 +5,4 @@ common-obj-y += virtio-mmio.o common-obj-$(CONFIG_VIRTIO_BLK_DATA_PLANE) += dataplane/ obj-y += virtio.o virtio-balloon.o -obj-$(CONFIG_LINUX) += vhost.o +obj-$(CONFIG_LINUX) += vhost.o vhost-backend.o vhost-user.o diff --git a/hw/virtio/vhost-backend.c b/hw/virtio/vhost-backend.c new file mode 100644 index 0000000000..35316c40d9 --- /dev/null +++ b/hw/virtio/vhost-backend.c @@ -0,0 +1,71 @@ +/* + * vhost-backend + * + * Copyright (c) 2013 Virtual Open Systems Sarl. + * + * This work is licensed under the terms of the GNU GPL, version 2 or later. + * See the COPYING file in the top-level directory. + * + */ + +#include "hw/virtio/vhost.h" +#include "hw/virtio/vhost-backend.h" +#include "qemu/error-report.h" + +#include <sys/ioctl.h> + +extern const VhostOps user_ops; + +static int vhost_kernel_call(struct vhost_dev *dev, unsigned long int request, + void *arg) +{ + int fd = (uintptr_t) dev->opaque; + + assert(dev->vhost_ops->backend_type == VHOST_BACKEND_TYPE_KERNEL); + + return ioctl(fd, request, arg); +} + +static int vhost_kernel_init(struct vhost_dev *dev, void *opaque) +{ + assert(dev->vhost_ops->backend_type == VHOST_BACKEND_TYPE_KERNEL); + + dev->opaque = opaque; + + return 0; +} + +static int vhost_kernel_cleanup(struct vhost_dev *dev) +{ + int fd = (uintptr_t) dev->opaque; + + assert(dev->vhost_ops->backend_type == VHOST_BACKEND_TYPE_KERNEL); + + return close(fd); +} + +static const VhostOps kernel_ops = { + .backend_type = VHOST_BACKEND_TYPE_KERNEL, + .vhost_call = vhost_kernel_call, + .vhost_backend_init = vhost_kernel_init, + .vhost_backend_cleanup = vhost_kernel_cleanup +}; + +int vhost_set_backend_type(struct vhost_dev *dev, VhostBackendType backend_type) +{ + int r = 0; + + switch (backend_type) { + case VHOST_BACKEND_TYPE_KERNEL: + dev->vhost_ops = &kernel_ops; + break; + case VHOST_BACKEND_TYPE_USER: + dev->vhost_ops = &user_ops; + break; + default: + error_report("Unknown vhost backend type\n"); + r = -1; + } + + return r; +} diff --git a/hw/virtio/vhost-user.c b/hw/virtio/vhost-user.c new file mode 100644 index 0000000000..0df6a936a0 --- /dev/null +++ b/hw/virtio/vhost-user.c @@ -0,0 +1,342 @@ +/* + * vhost-user + * + * Copyright (c) 2013 Virtual Open Systems Sarl. + * + * This work is licensed under the terms of the GNU GPL, version 2 or later. + * See the COPYING file in the top-level directory. + * + */ + +#include "hw/virtio/vhost.h" +#include "hw/virtio/vhost-backend.h" +#include "sysemu/char.h" +#include "sysemu/kvm.h" +#include "qemu/error-report.h" +#include "qemu/sockets.h" + +#include <fcntl.h> +#include <unistd.h> +#include <sys/ioctl.h> +#include <sys/socket.h> +#include <sys/un.h> +#include <linux/vhost.h> + +#define VHOST_MEMORY_MAX_NREGIONS 8 + +typedef enum VhostUserRequest { + VHOST_USER_NONE = 0, + VHOST_USER_GET_FEATURES = 1, + VHOST_USER_SET_FEATURES = 2, + VHOST_USER_SET_OWNER = 3, + VHOST_USER_RESET_OWNER = 4, + VHOST_USER_SET_MEM_TABLE = 5, + VHOST_USER_SET_LOG_BASE = 6, + VHOST_USER_SET_LOG_FD = 7, + VHOST_USER_SET_VRING_NUM = 8, + VHOST_USER_SET_VRING_ADDR = 9, + VHOST_USER_SET_VRING_BASE = 10, + VHOST_USER_GET_VRING_BASE = 11, + VHOST_USER_SET_VRING_KICK = 12, + VHOST_USER_SET_VRING_CALL = 13, + VHOST_USER_SET_VRING_ERR = 14, + VHOST_USER_MAX +} VhostUserRequest; + +typedef struct VhostUserMemoryRegion { + uint64_t guest_phys_addr; + uint64_t memory_size; + uint64_t userspace_addr; +} VhostUserMemoryRegion; + +typedef struct VhostUserMemory { + uint32_t nregions; + uint32_t padding; + VhostUserMemoryRegion regions[VHOST_MEMORY_MAX_NREGIONS]; +} VhostUserMemory; + +typedef struct VhostUserMsg { + VhostUserRequest request; + +#define VHOST_USER_VERSION_MASK (0x3) +#define VHOST_USER_REPLY_MASK (0x1<<2) + uint32_t flags; + uint32_t size; /* the following payload size */ + union { +#define VHOST_USER_VRING_IDX_MASK (0xff) +#define VHOST_USER_VRING_NOFD_MASK (0x1<<8) + uint64_t u64; + struct vhost_vring_state state; + struct vhost_vring_addr addr; + VhostUserMemory memory; + }; +} QEMU_PACKED VhostUserMsg; + +static VhostUserMsg m __attribute__ ((unused)); +#define VHOST_USER_HDR_SIZE (sizeof(m.request) \ + + sizeof(m.flags) \ + + sizeof(m.size)) + +#define VHOST_USER_PAYLOAD_SIZE (sizeof(m) - VHOST_USER_HDR_SIZE) + +/* The version of the protocol we support */ +#define VHOST_USER_VERSION (0x1) + +static bool ioeventfd_enabled(void) +{ + return kvm_enabled() && kvm_eventfds_enabled(); +} + +static unsigned long int ioctl_to_vhost_user_request[VHOST_USER_MAX] = { + -1, /* VHOST_USER_NONE */ + VHOST_GET_FEATURES, /* VHOST_USER_GET_FEATURES */ + VHOST_SET_FEATURES, /* VHOST_USER_SET_FEATURES */ + VHOST_SET_OWNER, /* VHOST_USER_SET_OWNER */ + VHOST_RESET_OWNER, /* VHOST_USER_RESET_OWNER */ + VHOST_SET_MEM_TABLE, /* VHOST_USER_SET_MEM_TABLE */ + VHOST_SET_LOG_BASE, /* VHOST_USER_SET_LOG_BASE */ + VHOST_SET_LOG_FD, /* VHOST_USER_SET_LOG_FD */ + VHOST_SET_VRING_NUM, /* VHOST_USER_SET_VRING_NUM */ + VHOST_SET_VRING_ADDR, /* VHOST_USER_SET_VRING_ADDR */ + VHOST_SET_VRING_BASE, /* VHOST_USER_SET_VRING_BASE */ + VHOST_GET_VRING_BASE, /* VHOST_USER_GET_VRING_BASE */ + VHOST_SET_VRING_KICK, /* VHOST_USER_SET_VRING_KICK */ + VHOST_SET_VRING_CALL, /* VHOST_USER_SET_VRING_CALL */ + VHOST_SET_VRING_ERR /* VHOST_USER_SET_VRING_ERR */ +}; + +static VhostUserRequest vhost_user_request_translate(unsigned long int request) +{ + VhostUserRequest idx; + + for (idx = 0; idx < VHOST_USER_MAX; idx++) { + if (ioctl_to_vhost_user_request[idx] == request) { + break; + } + } + + return (idx == VHOST_USER_MAX) ? VHOST_USER_NONE : idx; +} + +static int vhost_user_read(struct vhost_dev *dev, VhostUserMsg *msg) +{ + CharDriverState *chr = dev->opaque; + uint8_t *p = (uint8_t *) msg; + int r, size = VHOST_USER_HDR_SIZE; + + r = qemu_chr_fe_read_all(chr, p, size); + if (r != size) { + error_report("Failed to read msg header. Read %d instead of %d.\n", r, + size); + goto fail; + } + + /* validate received flags */ + if (msg->flags != (VHOST_USER_REPLY_MASK | VHOST_USER_VERSION)) { + error_report("Failed to read msg header." + " Flags 0x%x instead of 0x%x.\n", msg->flags, + VHOST_USER_REPLY_MASK | VHOST_USER_VERSION); + goto fail; + } + + /* validate message size is sane */ + if (msg->size > VHOST_USER_PAYLOAD_SIZE) { + error_report("Failed to read msg header." + " Size %d exceeds the maximum %zu.\n", msg->size, + VHOST_USER_PAYLOAD_SIZE); + goto fail; + } + + if (msg->size) { + p += VHOST_USER_HDR_SIZE; + size = msg->size; + r = qemu_chr_fe_read_all(chr, p, size); + if (r != size) { + error_report("Failed to read msg payload." + " Read %d instead of %d.\n", r, msg->size); + goto fail; + } + } + + return 0; + +fail: + return -1; +} + +static int vhost_user_write(struct vhost_dev *dev, VhostUserMsg *msg, + int *fds, int fd_num) +{ + CharDriverState *chr = dev->opaque; + int size = VHOST_USER_HDR_SIZE + msg->size; + + if (fd_num) { + qemu_chr_fe_set_msgfds(chr, fds, fd_num); + } + + return qemu_chr_fe_write_all(chr, (const uint8_t *) msg, size) == size ? + 0 : -1; +} + +static int vhost_user_call(struct vhost_dev *dev, unsigned long int request, + void *arg) +{ + VhostUserMsg msg; + VhostUserRequest msg_request; + RAMBlock *block = 0; + struct vhost_vring_file *file = 0; + int need_reply = 0; + int fds[VHOST_MEMORY_MAX_NREGIONS]; + size_t fd_num = 0; + + assert(dev->vhost_ops->backend_type == VHOST_BACKEND_TYPE_USER); + + msg_request = vhost_user_request_translate(request); + msg.request = msg_request; + msg.flags = VHOST_USER_VERSION; + msg.size = 0; + + switch (request) { + case VHOST_GET_FEATURES: + need_reply = 1; + break; + + case VHOST_SET_FEATURES: + case VHOST_SET_LOG_BASE: + msg.u64 = *((__u64 *) arg); + msg.size = sizeof(m.u64); + break; + + case VHOST_SET_OWNER: + case VHOST_RESET_OWNER: + break; + + case VHOST_SET_MEM_TABLE: + QTAILQ_FOREACH(block, &ram_list.blocks, next) + { + if (block->fd > 0) { + msg.memory.regions[fd_num].userspace_addr = + (uintptr_t) block->host; + msg.memory.regions[fd_num].memory_size = block->length; + msg.memory.regions[fd_num].guest_phys_addr = block->offset; + fds[fd_num++] = block->fd; + } + } + + msg.memory.nregions = fd_num; + + if (!fd_num) { + error_report("Failed initializing vhost-user memory map\n" + "consider using -object memory-backend-file share=on\n"); + return -1; + } + + msg.size = sizeof(m.memory.nregions); + msg.size += sizeof(m.memory.padding); + msg.size += fd_num * sizeof(VhostUserMemoryRegion); + + break; + + case VHOST_SET_LOG_FD: + fds[fd_num++] = *((int *) arg); + break; + + case VHOST_SET_VRING_NUM: + case VHOST_SET_VRING_BASE: + memcpy(&msg.state, arg, sizeof(struct vhost_vring_state)); + msg.size = sizeof(m.state); + break; + + case VHOST_GET_VRING_BASE: + memcpy(&msg.state, arg, sizeof(struct vhost_vring_state)); + msg.size = sizeof(m.state); + need_reply = 1; + break; + + case VHOST_SET_VRING_ADDR: + memcpy(&msg.addr, arg, sizeof(struct vhost_vring_addr)); + msg.size = sizeof(m.addr); + break; + + case VHOST_SET_VRING_KICK: + case VHOST_SET_VRING_CALL: + case VHOST_SET_VRING_ERR: + file = arg; + msg.u64 = file->index & VHOST_USER_VRING_IDX_MASK; + msg.size = sizeof(m.u64); + if (ioeventfd_enabled() && file->fd > 0) { + fds[fd_num++] = file->fd; + } else { + msg.u64 |= VHOST_USER_VRING_NOFD_MASK; + } + break; + default: + error_report("vhost-user trying to send unhandled ioctl\n"); + return -1; + break; + } + + if (vhost_user_write(dev, &msg, fds, fd_num) < 0) { + return 0; + } + + if (need_reply) { + if (vhost_user_read(dev, &msg) < 0) { + return 0; + } + + if (msg_request != msg.request) { + error_report("Received unexpected msg type." + " Expected %d received %d\n", msg_request, msg.request); + return -1; + } + + switch (msg_request) { + case VHOST_USER_GET_FEATURES: + if (msg.size != sizeof(m.u64)) { + error_report("Received bad msg size.\n"); + return -1; + } + *((__u64 *) arg) = msg.u64; + break; + case VHOST_USER_GET_VRING_BASE: + if (msg.size != sizeof(m.state)) { + error_report("Received bad msg size.\n"); + return -1; + } + memcpy(arg, &msg.state, sizeof(struct vhost_vring_state)); + break; + default: + error_report("Received unexpected msg type.\n"); + return -1; + break; + } + } + + return 0; +} + +static int vhost_user_init(struct vhost_dev *dev, void *opaque) +{ + assert(dev->vhost_ops->backend_type == VHOST_BACKEND_TYPE_USER); + + dev->opaque = opaque; + + return 0; +} + +static int vhost_user_cleanup(struct vhost_dev *dev) +{ + assert(dev->vhost_ops->backend_type == VHOST_BACKEND_TYPE_USER); + + dev->opaque = 0; + + return 0; +} + +const VhostOps user_ops = { + .backend_type = VHOST_BACKEND_TYPE_USER, + .vhost_call = vhost_user_call, + .vhost_backend_init = vhost_user_init, + .vhost_backend_cleanup = vhost_user_cleanup + }; diff --git a/hw/virtio/vhost.c b/hw/virtio/vhost.c index f62cfaf38e..c1b1aad6cf 100644 --- a/hw/virtio/vhost.c +++ b/hw/virtio/vhost.c @@ -13,7 +13,6 @@ * GNU GPL, version 2 or (at your option) any later version. */ -#include <sys/ioctl.h> #include "hw/virtio/vhost.h" #include "hw/hw.h" #include "qemu/atomic.h" @@ -289,15 +288,13 @@ static inline void vhost_dev_log_resize(struct vhost_dev* dev, uint64_t size) log = g_malloc0(size * sizeof *log); log_base = (uint64_t)(unsigned long)log; - r = ioctl(dev->control, VHOST_SET_LOG_BASE, &log_base); + r = dev->vhost_ops->vhost_call(dev, VHOST_SET_LOG_BASE, &log_base); assert(r >= 0); /* Sync only the range covered by the old log */ if (dev->log_size) { vhost_log_sync_range(dev, 0, dev->log_size * VHOST_LOG_CHUNK - 1); } - if (dev->log) { - g_free(dev->log); - } + g_free(dev->log); dev->log = log; dev->log_size = size; } @@ -458,7 +455,7 @@ static void vhost_commit(MemoryListener *listener) } if (!dev->log_enabled) { - r = ioctl(dev->control, VHOST_SET_MEM_TABLE, dev->mem); + r = dev->vhost_ops->vhost_call(dev, VHOST_SET_MEM_TABLE, dev->mem); assert(r >= 0); dev->memory_changed = false; return; @@ -471,7 +468,7 @@ static void vhost_commit(MemoryListener *listener) if (dev->log_size < log_size) { vhost_dev_log_resize(dev, log_size + VHOST_LOG_BUFFER); } - r = ioctl(dev->control, VHOST_SET_MEM_TABLE, dev->mem); + r = dev->vhost_ops->vhost_call(dev, VHOST_SET_MEM_TABLE, dev->mem); assert(r >= 0); /* To log less, can only decrease log size after table update. */ if (dev->log_size > log_size + VHOST_LOG_BUFFER) { @@ -539,7 +536,7 @@ static int vhost_virtqueue_set_addr(struct vhost_dev *dev, .log_guest_addr = vq->used_phys, .flags = enable_log ? (1 << VHOST_VRING_F_LOG) : 0, }; - int r = ioctl(dev->control, VHOST_SET_VRING_ADDR, &addr); + int r = dev->vhost_ops->vhost_call(dev, VHOST_SET_VRING_ADDR, &addr); if (r < 0) { return -errno; } @@ -553,7 +550,7 @@ static int vhost_dev_set_features(struct vhost_dev *dev, bool enable_log) if (enable_log) { features |= 0x1 << VHOST_F_LOG_ALL; } - r = ioctl(dev->control, VHOST_SET_FEATURES, &features); + r = dev->vhost_ops->vhost_call(dev, VHOST_SET_FEATURES, &features); return r < 0 ? -errno : 0; } @@ -601,9 +598,7 @@ static int vhost_migration_log(MemoryListener *listener, int enable) if (r < 0) { return r; } - if (dev->log) { - g_free(dev->log); - } + g_free(dev->log); dev->log = NULL; dev->log_size = 0; } else { @@ -668,13 +663,13 @@ static int vhost_virtqueue_start(struct vhost_dev *dev, assert(idx >= dev->vq_index && idx < dev->vq_index + dev->nvqs); vq->num = state.num = virtio_queue_get_num(vdev, idx); - r = ioctl(dev->control, VHOST_SET_VRING_NUM, &state); + r = dev->vhost_ops->vhost_call(dev, VHOST_SET_VRING_NUM, &state); if (r) { return -errno; } state.num = virtio_queue_get_last_avail_idx(vdev, idx); - r = ioctl(dev->control, VHOST_SET_VRING_BASE, &state); + r = dev->vhost_ops->vhost_call(dev, VHOST_SET_VRING_BASE, &state); if (r) { return -errno; } @@ -716,7 +711,7 @@ static int vhost_virtqueue_start(struct vhost_dev *dev, } file.fd = event_notifier_get_fd(virtio_queue_get_host_notifier(vvq)); - r = ioctl(dev->control, VHOST_SET_VRING_KICK, &file); + r = dev->vhost_ops->vhost_call(dev, VHOST_SET_VRING_KICK, &file); if (r) { r = -errno; goto fail_kick; @@ -754,7 +749,7 @@ static void vhost_virtqueue_stop(struct vhost_dev *dev, }; int r; assert(idx >= dev->vq_index && idx < dev->vq_index + dev->nvqs); - r = ioctl(dev->control, VHOST_GET_VRING_BASE, &state); + r = dev->vhost_ops->vhost_call(dev, VHOST_GET_VRING_BASE, &state); if (r < 0) { fprintf(stderr, "vhost VQ %d ring restore failed: %d\n", idx, r); fflush(stderr); @@ -796,7 +791,7 @@ static int vhost_virtqueue_init(struct vhost_dev *dev, } file.fd = event_notifier_get_fd(&vq->masked_notifier); - r = ioctl(dev->control, VHOST_SET_VRING_CALL, &file); + r = dev->vhost_ops->vhost_call(dev, VHOST_SET_VRING_CALL, &file); if (r) { r = -errno; goto fail_call; @@ -812,25 +807,26 @@ static void vhost_virtqueue_cleanup(struct vhost_virtqueue *vq) event_notifier_cleanup(&vq->masked_notifier); } -int vhost_dev_init(struct vhost_dev *hdev, int devfd, const char *devpath, - bool force) +int vhost_dev_init(struct vhost_dev *hdev, void *opaque, + VhostBackendType backend_type, bool force) { uint64_t features; int i, r; - if (devfd >= 0) { - hdev->control = devfd; - } else { - hdev->control = open(devpath, O_RDWR); - if (hdev->control < 0) { - return -errno; - } + + if (vhost_set_backend_type(hdev, backend_type) < 0) { + return -1; } - r = ioctl(hdev->control, VHOST_SET_OWNER, NULL); + + if (hdev->vhost_ops->vhost_backend_init(hdev, opaque) < 0) { + return -errno; + } + + r = hdev->vhost_ops->vhost_call(hdev, VHOST_SET_OWNER, NULL); if (r < 0) { goto fail; } - r = ioctl(hdev->control, VHOST_GET_FEATURES, &features); + r = hdev->vhost_ops->vhost_call(hdev, VHOST_GET_FEATURES, &features); if (r < 0) { goto fail; } @@ -875,7 +871,7 @@ fail_vq: } fail: r = -errno; - close(hdev->control); + hdev->vhost_ops->vhost_backend_cleanup(hdev); return r; } @@ -888,7 +884,7 @@ void vhost_dev_cleanup(struct vhost_dev *hdev) memory_listener_unregister(&hdev->memory_listener); g_free(hdev->mem); g_free(hdev->mem_sections); - close(hdev->control); + hdev->vhost_ops->vhost_backend_cleanup(hdev); } bool vhost_dev_query(struct vhost_dev *hdev, VirtIODevice *vdev) @@ -990,10 +986,37 @@ void vhost_virtqueue_mask(struct vhost_dev *hdev, VirtIODevice *vdev, int n, } else { file.fd = event_notifier_get_fd(virtio_queue_get_guest_notifier(vvq)); } - r = ioctl(hdev->control, VHOST_SET_VRING_CALL, &file); + r = hdev->vhost_ops->vhost_call(hdev, VHOST_SET_VRING_CALL, &file); assert(r >= 0); } +unsigned vhost_get_features(struct vhost_dev *hdev, const int *feature_bits, + unsigned features) +{ + const int *bit = feature_bits; + while (*bit != VHOST_INVALID_FEATURE_BIT) { + unsigned bit_mask = (1 << *bit); + if (!(hdev->features & bit_mask)) { + features &= ~bit_mask; + } + bit++; + } + return features; +} + +void vhost_ack_features(struct vhost_dev *hdev, const int *feature_bits, + unsigned features) +{ + const int *bit = feature_bits; + while (*bit != VHOST_INVALID_FEATURE_BIT) { + unsigned bit_mask = (1 << *bit); + if (features & bit_mask) { + hdev->acked_features |= bit_mask; + } + bit++; + } +} + /* Host notifiers must be enabled at this point. */ int vhost_dev_start(struct vhost_dev *hdev, VirtIODevice *vdev) { @@ -1005,7 +1028,7 @@ int vhost_dev_start(struct vhost_dev *hdev, VirtIODevice *vdev) if (r < 0) { goto fail_features; } - r = ioctl(hdev->control, VHOST_SET_MEM_TABLE, hdev->mem); + r = hdev->vhost_ops->vhost_call(hdev, VHOST_SET_MEM_TABLE, hdev->mem); if (r < 0) { r = -errno; goto fail_mem; @@ -1024,8 +1047,7 @@ int vhost_dev_start(struct vhost_dev *hdev, VirtIODevice *vdev) hdev->log_size = vhost_get_log_size(hdev); hdev->log = hdev->log_size ? g_malloc0(hdev->log_size * sizeof *hdev->log) : NULL; - r = ioctl(hdev->control, VHOST_SET_LOG_BASE, - (uint64_t)(unsigned long)hdev->log); + r = hdev->vhost_ops->vhost_call(hdev, VHOST_SET_LOG_BASE, hdev->log); if (r < 0) { r = -errno; goto fail_log; diff --git a/hw/virtio/virtio.c b/hw/virtio/virtio.c index a07ae8ad91..a3082d569d 100644 --- a/hw/virtio/virtio.c +++ b/hw/virtio/virtio.c @@ -1164,14 +1164,8 @@ EventNotifier *virtio_queue_get_host_notifier(VirtQueue *vq) void virtio_device_set_child_bus_name(VirtIODevice *vdev, char *bus_name) { - if (vdev->bus_name) { - g_free(vdev->bus_name); - vdev->bus_name = NULL; - } - - if (bus_name) { - vdev->bus_name = g_strdup(bus_name); - } + g_free(vdev->bus_name); + vdev->bus_name = g_strdup(bus_name); } static void virtio_device_realize(DeviceState *dev, Error **errp) @@ -1206,10 +1200,8 @@ static void virtio_device_unrealize(DeviceState *dev, Error **errp) } } - if (vdev->bus_name) { - g_free(vdev->bus_name); - vdev->bus_name = NULL; - } + g_free(vdev->bus_name); + vdev->bus_name = NULL; } static void virtio_device_class_init(ObjectClass *klass, void *data) diff --git a/include/exec/cpu-all.h b/include/exec/cpu-all.h index e8363d7248..f91581fc65 100644 --- a/include/exec/cpu-all.h +++ b/include/exec/cpu-all.h @@ -297,11 +297,6 @@ CPUArchState *cpu_copy(CPUArchState *env); /* memory API */ -extern ram_addr_t ram_size; - -/* RAM is pre-allocated and passed into qemu_ram_alloc_from_ptr */ -#define RAM_PREALLOC_MASK (1 << 0) - typedef struct RAMBlock { struct MemoryRegion *mr; uint8_t *host; @@ -327,9 +322,6 @@ typedef struct RAMList { } RAMList; extern RAMList ram_list; -extern const char *mem_path; -extern int mem_prealloc; - /* Flags stored in the low bits of the TLB virtual address. These are defined so that fast path ram access is all zeros. */ /* Zero if TLB entry is valid. */ diff --git a/include/exec/cpu-common.h b/include/exec/cpu-common.h index 89ec6404cf..e3ec4c8e0c 100644 --- a/include/exec/cpu-common.h +++ b/include/exec/cpu-common.h @@ -45,6 +45,8 @@ typedef uintptr_t ram_addr_t; # define RAM_ADDR_FMT "%" PRIxPTR #endif +extern ram_addr_t ram_size; + /* memory API */ typedef void CPUWriteMemoryFunc(void *opaque, hwaddr addr, uint32_t value); diff --git a/include/exec/memory.h b/include/exec/memory.h index 549ae734e6..3d778d70f0 100644 --- a/include/exec/memory.h +++ b/include/exec/memory.h @@ -31,6 +31,7 @@ #include "qemu/queue.h" #include "qemu/int128.h" #include "qemu/notify.h" +#include "qapi/error.h" #define MAX_PHYS_ADDR_SPACE_BITS 62 #define MAX_PHYS_ADDR (((hwaddr)1 << MAX_PHYS_ADDR_SPACE_BITS) - 1) @@ -311,6 +312,28 @@ void memory_region_init_ram(MemoryRegion *mr, const char *name, uint64_t size); +#ifdef __linux__ +/** + * memory_region_init_ram_from_file: Initialize RAM memory region with a + * mmap-ed backend. + * + * @mr: the #MemoryRegion to be initialized. + * @owner: the object that tracks the region's reference count + * @name: the name of the region. + * @size: size of the region. + * @share: %true if memory must be mmaped with the MAP_SHARED flag + * @path: the path in which to allocate the RAM. + * @errp: pointer to Error*, to store an error if it happens. + */ +void memory_region_init_ram_from_file(MemoryRegion *mr, + struct Object *owner, + const char *name, + uint64_t size, + bool share, + const char *path, + Error **errp); +#endif + /** * memory_region_init_ram_ptr: Initialize RAM memory region from a * user-provided pointer. Accesses into the @@ -513,6 +536,16 @@ bool memory_region_is_logging(MemoryRegion *mr); bool memory_region_is_rom(MemoryRegion *mr); /** + * memory_region_get_fd: Get a file descriptor backing a RAM memory region. + * + * Returns a file descriptor backing a file-based RAM memory region, + * or -1 if the region is not a file-based RAM memory region. + * + * @mr: the RAM or alias memory region being queried. + */ +int memory_region_get_fd(MemoryRegion *mr); + +/** * memory_region_get_ram_ptr: Get a pointer into a RAM memory region. * * Returns a host pointer to a RAM memory region (created with @@ -848,6 +881,14 @@ void memory_region_set_alias_offset(MemoryRegion *mr, bool memory_region_present(MemoryRegion *container, hwaddr addr); /** + * memory_region_is_mapped: returns true if #MemoryRegion is mapped + * into any address space. + * + * @mr: a #MemoryRegion which should be checked if it's mapped + */ +bool memory_region_is_mapped(MemoryRegion *mr); + +/** * memory_region_find: translate an address/size relative to a * MemoryRegion into a #MemoryRegionSection. * diff --git a/include/exec/ram_addr.h b/include/exec/ram_addr.h index b94de02ea7..55ca67681f 100644 --- a/include/exec/ram_addr.h +++ b/include/exec/ram_addr.h @@ -22,9 +22,13 @@ #ifndef CONFIG_USER_ONLY #include "hw/xen/xen.h" +ram_addr_t qemu_ram_alloc_from_file(ram_addr_t size, MemoryRegion *mr, + bool share, const char *mem_path, + Error **errp); ram_addr_t qemu_ram_alloc_from_ptr(ram_addr_t size, void *host, MemoryRegion *mr); ram_addr_t qemu_ram_alloc(ram_addr_t size, MemoryRegion *mr); +int qemu_get_ram_fd(ram_addr_t addr); void *qemu_get_ram_ptr(ram_addr_t addr); void qemu_ram_free(ram_addr_t addr); void qemu_ram_free_from_ptr(ram_addr_t addr); diff --git a/include/hw/acpi/acpi.h b/include/hw/acpi/acpi.h index a9fae9d5c5..1f678b4bf2 100644 --- a/include/hw/acpi/acpi.h +++ b/include/hw/acpi/acpi.h @@ -26,6 +26,11 @@ #include "exec/memory.h" #include "hw/irq.h" +/* + * current device naming scheme supports up to 256 memory devices + */ +#define ACPI_MAX_RAM_SLOTS 256 + /* from linux include/acpi/actype.h */ /* Default ACPI register widths */ diff --git a/include/hw/acpi/acpi_dev_interface.h b/include/hw/acpi/acpi_dev_interface.h new file mode 100644 index 0000000000..f245f8d236 --- /dev/null +++ b/include/hw/acpi/acpi_dev_interface.h @@ -0,0 +1,43 @@ +#ifndef ACPI_DEV_INTERFACE_H +#define ACPI_DEV_INTERFACE_H + +#include "qom/object.h" +#include "qapi-types.h" + +#define TYPE_ACPI_DEVICE_IF "acpi-device-interface" + +#define ACPI_DEVICE_IF_CLASS(klass) \ + OBJECT_CLASS_CHECK(AcpiDeviceIfClass, (klass), \ + TYPE_ACPI_DEVICE_IF) +#define ACPI_DEVICE_IF_GET_CLASS(obj) \ + OBJECT_GET_CLASS(AcpiDeviceIfClass, (obj), \ + TYPE_ACPI_DEVICE_IF) +#define ACPI_DEVICE_IF(obj) \ + INTERFACE_CHECK(AcpiDeviceIf, (obj), \ + TYPE_ACPI_DEVICE_IF) + + +typedef struct AcpiDeviceIf { + /* <private> */ + Object Parent; +} AcpiDeviceIf; + +/** + * AcpiDeviceIfClass: + * + * ospm_status: returns status of ACPI device objects, reported + * via _OST method if device supports it. + * + * Interface is designed for providing unified interface + * to generic ACPI functionality that could be used without + * knowledge about internals of actual device that implements + * ACPI interface. + */ +typedef struct AcpiDeviceIfClass { + /* <private> */ + InterfaceClass parent_class; + + /* <public> */ + void (*ospm_status)(AcpiDeviceIf *adev, ACPIOSTInfoList ***list); +} AcpiDeviceIfClass; +#endif diff --git a/include/hw/acpi/cpu_hotplug.h b/include/hw/acpi/cpu_hotplug.h index 4576400fd7..9e5d30c9df 100644 --- a/include/hw/acpi/cpu_hotplug.h +++ b/include/hw/acpi/cpu_hotplug.h @@ -13,7 +13,7 @@ #define ACPI_HOTPLUG_H #include "hw/acpi/acpi.h" -#include "hw/acpi/cpu_hotplug_defs.h" +#include "hw/acpi/pc-hotplug.h" typedef struct AcpiCpuHotplug { MemoryRegion io; diff --git a/include/hw/acpi/cpu_hotplug_defs.h b/include/hw/acpi/cpu_hotplug_defs.h deleted file mode 100644 index 9f33663511..0000000000 --- a/include/hw/acpi/cpu_hotplug_defs.h +++ /dev/null @@ -1,32 +0,0 @@ -/* - * QEMU ACPI hotplug utilities shared defines - * - * Copyright (C) 2013 Red Hat Inc - * - * Authors: - * Igor Mammedov <imammedo@redhat.com> - * - * This work is licensed under the terms of the GNU GPL, version 2 or later. - * See the COPYING file in the top-level directory. - */ -#ifndef ACPI_HOTPLUG_DEFS_H -#define ACPI_HOTPLUG_DEFS_H - -/* - * ONLY DEFINEs are permited in this file since it's shared - * between C and ASL code. - */ -#define ACPI_CPU_HOTPLUG_STATUS 4 - -/* Limit for CPU arch IDs for CPU hotplug. All hotpluggable CPUs should - * have CPUClass.get_arch_id() < ACPI_CPU_HOTPLUG_ID_LIMIT. - */ -#define ACPI_CPU_HOTPLUG_ID_LIMIT 256 - -/* 256 CPU IDs, 8 bits per entry: */ -#define ACPI_GPE_PROC_LEN 32 - -#define ICH9_CPU_HOTPLUG_IO_BASE 0x0CD8 -#define PIIX4_CPU_HOTPLUG_IO_BASE 0xaf00 - -#endif diff --git a/include/hw/acpi/ich9.h b/include/hw/acpi/ich9.h index 104f419852..7e42448ef9 100644 --- a/include/hw/acpi/ich9.h +++ b/include/hw/acpi/ich9.h @@ -23,6 +23,8 @@ #include "hw/acpi/acpi.h" #include "hw/acpi/cpu_hotplug.h" +#include "hw/acpi/memory_hotplug.h" +#include "hw/acpi/acpi_dev_interface.h" typedef struct ICH9LPCPMRegs { /* @@ -46,6 +48,8 @@ typedef struct ICH9LPCPMRegs { AcpiCpuHotplug gpe_cpu; Notifier cpu_added_notifier; + + MemHotplugState acpi_memory_hotplug; } ICH9LPCPMRegs; void ich9_pm_init(PCIDevice *lpc_pci, ICH9LPCPMRegs *pm, @@ -55,4 +59,7 @@ extern const VMStateDescription vmstate_ich9_pm; void ich9_pm_add_properties(Object *obj, ICH9LPCPMRegs *pm, Error **errp); +void ich9_pm_device_plug_cb(ICH9LPCPMRegs *pm, DeviceState *dev, Error **errp); + +void ich9_pm_ospm_status(AcpiDeviceIf *adev, ACPIOSTInfoList ***list); #endif /* HW_ACPI_ICH9_H */ diff --git a/include/hw/acpi/memory_hotplug.h b/include/hw/acpi/memory_hotplug.h new file mode 100644 index 0000000000..7bbf8a0064 --- /dev/null +++ b/include/hw/acpi/memory_hotplug.h @@ -0,0 +1,38 @@ +#ifndef QEMU_HW_ACPI_MEMORY_HOTPLUG_H +#define QEMU_HW_ACPI_MEMORY_HOTPLUG_H + +#include "hw/qdev-core.h" +#include "hw/acpi/acpi.h" +#include "migration/vmstate.h" + +#define ACPI_MEMORY_HOTPLUG_STATUS 8 + +typedef struct MemStatus { + DeviceState *dimm; + bool is_enabled; + bool is_inserting; + uint32_t ost_event; + uint32_t ost_status; +} MemStatus; + +typedef struct MemHotplugState { + bool is_enabled; /* true if memory hotplug is supported */ + MemoryRegion io; + uint32_t selector; + uint32_t dev_count; + MemStatus *devs; +} MemHotplugState; + +void acpi_memory_hotplug_init(MemoryRegion *as, Object *owner, + MemHotplugState *state); + +void acpi_memory_plug_cb(ACPIREGS *ar, qemu_irq irq, MemHotplugState *mem_st, + DeviceState *dev, Error **errp); + +extern const VMStateDescription vmstate_memory_hotplug; +#define VMSTATE_MEMORY_HOTPLUG(memhp, state) \ + VMSTATE_STRUCT(memhp, state, 1, \ + vmstate_memory_hotplug, MemHotplugState) + +void acpi_memory_ospm_status(MemHotplugState *mem_st, ACPIOSTInfoList ***list); +#endif diff --git a/include/hw/acpi/pc-hotplug.h b/include/hw/acpi/pc-hotplug.h new file mode 100644 index 0000000000..bf5157d7c3 --- /dev/null +++ b/include/hw/acpi/pc-hotplug.h @@ -0,0 +1,56 @@ +/* + * QEMU ACPI hotplug utilities shared defines + * + * Copyright (C) 2014 Red Hat Inc + * + * Authors: + * Igor Mammedov <imammedo@redhat.com> + * + * This work is licensed under the terms of the GNU GPL, version 2 or later. + * See the COPYING file in the top-level directory. + */ +#ifndef PC_HOTPLUG_H +#define PC_HOTPLUG_H + +/* + * ONLY DEFINEs are permited in this file since it's shared + * between C and ASL code. + */ +#define ACPI_CPU_HOTPLUG_STATUS 4 + +/* Limit for CPU arch IDs for CPU hotplug. All hotpluggable CPUs should + * have CPUClass.get_arch_id() < ACPI_CPU_HOTPLUG_ID_LIMIT. + */ +#define ACPI_CPU_HOTPLUG_ID_LIMIT 256 + +/* 256 CPU IDs, 8 bits per entry: */ +#define ACPI_GPE_PROC_LEN 32 + +#define ICH9_CPU_HOTPLUG_IO_BASE 0x0CD8 +#define PIIX4_CPU_HOTPLUG_IO_BASE 0xaf00 + +#define ACPI_MEMORY_HOTPLUG_IO_LEN 24 +#define ACPI_MEMORY_HOTPLUG_BASE 0x0a00 + +#define MEMORY_HOPTLUG_DEVICE MHPD +#define MEMORY_SLOTS_NUMBER MDNR +#define MEMORY_HOTPLUG_IO_REGION HPMR +#define MEMORY_SLOT_ADDR_LOW MRBL +#define MEMORY_SLOT_ADDR_HIGH MRBH +#define MEMORY_SLOT_SIZE_LOW MRLL +#define MEMORY_SLOT_SIZE_HIGH MRLH +#define MEMORY_SLOT_PROXIMITY MPX +#define MEMORY_SLOT_ENABLED MES +#define MEMORY_SLOT_INSERT_EVENT MINS +#define MEMORY_SLOT_SLECTOR MSEL +#define MEMORY_SLOT_OST_EVENT MOEV +#define MEMORY_SLOT_OST_STATUS MOSC +#define MEMORY_SLOT_LOCK MLCK +#define MEMORY_SLOT_STATUS_METHOD MRST +#define MEMORY_SLOT_CRS_METHOD MCRS +#define MEMORY_SLOT_OST_METHOD MOST +#define MEMORY_SLOT_PROXIMITY_METHOD MPXM +#define MEMORY_SLOT_NOTIFY_METHOD MTFY +#define MEMORY_SLOT_SCAN_METHOD MSCN + +#endif diff --git a/include/hw/boards.h b/include/hw/boards.h index 2d2e2bef19..605a970934 100644 --- a/include/hw/boards.h +++ b/include/hw/boards.h @@ -43,9 +43,13 @@ struct QEMUMachine { const char *hw_version; }; -#define TYPE_MACHINE_SUFFIX "-machine" +void memory_region_allocate_system_memory(MemoryRegion *mr, Object *owner, + const char *name, + uint64_t ram_size); + int qemu_register_machine(QEMUMachine *m); +#define TYPE_MACHINE_SUFFIX "-machine" #define TYPE_MACHINE "machine" #undef MACHINE /* BSD defines it and QEMU does not use it */ #define MACHINE(obj) \ @@ -61,6 +65,11 @@ extern MachineState *current_machine; /** * MachineClass: * @qemu_machine: #QEMUMachine + * @get_hotplug_handler: this function is called during bus-less + * device hotplug. If defined it returns pointer to an instance + * of HotplugHandler object, which handles hotplug operation + * for a given @dev. It may return NULL if @dev doesn't require + * any actions to be performed by hotplug handler. */ struct MachineClass { /*< private >*/ @@ -90,6 +99,9 @@ struct MachineClass { const char *default_boot_order; GlobalProperty *compat_props; const char *hw_version; + + HotplugHandler *(*get_hotplug_handler)(MachineState *machine, + DeviceState *dev); }; /** @@ -113,6 +125,8 @@ struct MachineState { char *firmware; ram_addr_t ram_size; + ram_addr_t maxram_size; + uint64_t ram_slots; const char *boot_order; char *kernel_filename; char *kernel_cmdline; diff --git a/include/hw/i386/ich9.h b/include/hw/i386/ich9.h index e19143555e..59ea25b49a 100644 --- a/include/hw/i386/ich9.h +++ b/include/hw/i386/ich9.h @@ -24,7 +24,7 @@ I2CBus *ich9_smb_init(PCIBus *bus, int devfn, uint32_t smb_io_base); #define ICH9_CC_SIZE (16 * 1024) /* 16KB */ -#define TYPE_ICH9_LPC_DEVICE "ICH9 LPC" +#define TYPE_ICH9_LPC_DEVICE "ICH9-LPC" #define ICH9_LPC_DEVICE(obj) \ OBJECT_CHECK(ICH9LPCState, (obj), TYPE_ICH9_LPC_DEVICE) diff --git a/include/hw/i386/pc.h b/include/hw/i386/pc.h index ca7a0bdd1a..19f78ea336 100644 --- a/include/hw/i386/pc.h +++ b/include/hw/i386/pc.h @@ -3,6 +3,7 @@ #include "qemu-common.h" #include "exec/memory.h" +#include "hw/boards.h" #include "hw/isa/isa.h" #include "hw/block/fdc.h" #include "net/net.h" @@ -12,9 +13,57 @@ #include "qemu/bitmap.h" #include "sysemu/sysemu.h" #include "hw/pci/pci.h" +#include "hw/boards.h" #define HPET_INTCAP "hpet-intcap" +/** + * PCMachineState: + * @hotplug_memory_base: address in guest RAM address space where hotplug memory + * address space begins. + * @hotplug_memory: hotplug memory addess space container + * @acpi_dev: link to ACPI PM device that performs ACPI hotplug handling + */ +struct PCMachineState { + /*< private >*/ + MachineState parent_obj; + + /* <public> */ + ram_addr_t hotplug_memory_base; + MemoryRegion hotplug_memory; + + HotplugHandler *acpi_dev; +}; + +#define PC_MACHINE_ACPI_DEVICE_PROP "acpi-device" +#define PC_MACHINE_MEMHP_REGION_SIZE "hotplug-memory-region-size" + +/** + * PCMachineClass: + * @get_hotplug_handler: pointer to parent class callback @get_hotplug_handler + */ +struct PCMachineClass { + /*< private >*/ + MachineClass parent_class; + + /*< public >*/ + HotplugHandler *(*get_hotplug_handler)(MachineState *machine, + DeviceState *dev); +}; + +typedef struct PCMachineState PCMachineState; +typedef struct PCMachineClass PCMachineClass; + +#define TYPE_PC_MACHINE "generic-pc-machine" +#define PC_MACHINE(obj) \ + OBJECT_CHECK(PCMachineState, (obj), TYPE_PC_MACHINE) +#define PC_MACHINE_GET_CLASS(obj) \ + OBJECT_GET_CLASS(PCMachineClass, (obj), TYPE_PC_MACHINE) +#define PC_MACHINE_CLASS(klass) \ + OBJECT_CLASS_CHECK(PCMachineClass, (klass), TYPE_PC_MACHINE) + +void qemu_register_pc_machine(QEMUMachine *m); + /* PC-style peripherals (also used by other machines). */ typedef struct PcPciInfo { @@ -43,6 +92,7 @@ struct PcGuestInfo { uint64_t *node_cpu; FWCfgState *fw_cfg; bool has_acpi_build; + bool has_reserved_memory; }; /* parallel.c */ @@ -134,10 +184,8 @@ PcGuestInfo *pc_guest_info_init(ram_addr_t below_4g_mem_size, void pc_pci_as_mapping_init(Object *owner, MemoryRegion *system_memory, MemoryRegion *pci_address_space); -FWCfgState *pc_memory_init(MemoryRegion *system_memory, - const char *kernel_filename, - const char *kernel_cmdline, - const char *initrd_filename, +FWCfgState *pc_memory_init(MachineState *machine, + MemoryRegion *system_memory, ram_addr_t below_4g_mem_size, ram_addr_t above_4g_mem_size, MemoryRegion *rom_memory, @@ -167,7 +215,8 @@ void ioapic_init_gsi(GSIState *gsi_state, const char *parent_name); I2CBus *piix4_pm_init(PCIBus *bus, int devfn, uint32_t smb_io_base, qemu_irq sci_irq, qemu_irq smi_irq, - int kvm_enabled, FWCfgState *fw_cfg); + int kvm_enabled, FWCfgState *fw_cfg, + DeviceState **piix4_pm); void piix4_smbus_register_device(SMBusDevice *dev, uint8_t addr); /* hpet.c */ @@ -243,7 +292,12 @@ int e820_get_num_entries(void); bool e820_get_entry(int, uint32_t, uint64_t *, uint64_t *); #define PC_Q35_COMPAT_2_0 \ - PC_COMPAT_2_0 + PC_COMPAT_2_0, \ + {\ + .driver = "ICH9-LPC",\ + .property = "memory-hotplug-support",\ + .value = "off",\ + } #define PC_Q35_COMPAT_1_7 \ PC_COMPAT_1_7, \ @@ -272,10 +326,16 @@ bool e820_get_entry(int, uint32_t, uint64_t *, uint64_t *); .property = "any_layout",\ .value = "off",\ },{\ + .driver = "PIIX4_PM",\ + .property = "memory-hotplug-support",\ + .value = "off",\ + },\ + {\ .driver = "apic",\ .property = "version",\ .value = stringify(0x11),\ - },{\ + },\ + {\ .driver = "nec-usb-xhci",\ .property = "superspeed-ports-first",\ .value = "off",\ @@ -294,6 +354,11 @@ bool e820_get_entry(int, uint32_t, uint64_t *, uint64_t *); .driver = "pci-serial-4x",\ .property = "prog_if",\ .value = stringify(0),\ + },\ + {\ + .driver = "virtio-net-pci",\ + .property = "guest_announce",\ + .value = "off",\ } #define PC_COMPAT_1_7 \ diff --git a/include/hw/mem/pc-dimm.h b/include/hw/mem/pc-dimm.h new file mode 100644 index 0000000000..761eeef801 --- /dev/null +++ b/include/hw/mem/pc-dimm.h @@ -0,0 +1,81 @@ +/* + * PC DIMM device + * + * Copyright ProfitBricks GmbH 2012 + * Copyright (C) 2013-2014 Red Hat Inc + * + * Authors: + * Vasilis Liaskovitis <vasilis.liaskovitis@profitbricks.com> + * Igor Mammedov <imammedo@redhat.com> + * + * This work is licensed under the terms of the GNU GPL, version 2 or later. + * See the COPYING file in the top-level directory. + * + */ + +#ifndef QEMU_PC_DIMM_H +#define QEMU_PC_DIMM_H + +#include "exec/memory.h" +#include "sysemu/hostmem.h" +#include "hw/qdev.h" + +#define DEFAULT_PC_DIMMSIZE (1024*1024*1024) + +#define TYPE_PC_DIMM "pc-dimm" +#define PC_DIMM(obj) \ + OBJECT_CHECK(PCDIMMDevice, (obj), TYPE_PC_DIMM) +#define PC_DIMM_CLASS(oc) \ + OBJECT_CLASS_CHECK(PCDIMMDeviceClass, (oc), TYPE_PC_DIMM) +#define PC_DIMM_GET_CLASS(obj) \ + OBJECT_GET_CLASS(PCDIMMDeviceClass, (obj), TYPE_PC_DIMM) + +#define PC_DIMM_ADDR_PROP "addr" +#define PC_DIMM_SLOT_PROP "slot" +#define PC_DIMM_NODE_PROP "node" +#define PC_DIMM_SIZE_PROP "size" +#define PC_DIMM_MEMDEV_PROP "memdev" + +#define PC_DIMM_UNASSIGNED_SLOT -1 + +/** + * PCDIMMDevice: + * @addr: starting guest physical address, where @PCDIMMDevice is mapped. + * Default value: 0, means that address is auto-allocated. + * @node: numa node to which @PCDIMMDevice is attached. + * @slot: slot number into which @PCDIMMDevice is plugged in. + * Default value: -1, means that slot is auto-allocated. + * @hostmem: host memory backend providing memory for @PCDIMMDevice + */ +typedef struct PCDIMMDevice { + /* private */ + DeviceState parent_obj; + + /* public */ + uint64_t addr; + uint32_t node; + int32_t slot; + HostMemoryBackend *hostmem; +} PCDIMMDevice; + +/** + * PCDIMMDeviceClass: + * @get_memory_region: returns #MemoryRegion associated with @dimm + */ +typedef struct PCDIMMDeviceClass { + /* private */ + DeviceClass parent_class; + + /* public */ + MemoryRegion *(*get_memory_region)(PCDIMMDevice *dimm); +} PCDIMMDeviceClass; + +uint64_t pc_dimm_get_free_addr(uint64_t address_space_start, + uint64_t address_space_size, + uint64_t *hint, uint64_t size, + Error **errp); + +int pc_dimm_get_free_slot(const int *hint, int max_slots, Error **errp); + +int qmp_pc_dimm_device_list(Object *obj, void *opaque); +#endif diff --git a/include/hw/virtio/vhost-backend.h b/include/hw/virtio/vhost-backend.h new file mode 100644 index 0000000000..d31768a1d4 --- /dev/null +++ b/include/hw/virtio/vhost-backend.h @@ -0,0 +1,38 @@ +/* + * vhost-backend + * + * Copyright (c) 2013 Virtual Open Systems Sarl. + * + * This work is licensed under the terms of the GNU GPL, version 2 or later. + * See the COPYING file in the top-level directory. + * + */ + +#ifndef VHOST_BACKEND_H_ +#define VHOST_BACKEND_H_ + +typedef enum VhostBackendType { + VHOST_BACKEND_TYPE_NONE = 0, + VHOST_BACKEND_TYPE_KERNEL = 1, + VHOST_BACKEND_TYPE_USER = 2, + VHOST_BACKEND_TYPE_MAX = 3, +} VhostBackendType; + +struct vhost_dev; + +typedef int (*vhost_call)(struct vhost_dev *dev, unsigned long int request, + void *arg); +typedef int (*vhost_backend_init)(struct vhost_dev *dev, void *opaque); +typedef int (*vhost_backend_cleanup)(struct vhost_dev *dev); + +typedef struct VhostOps { + VhostBackendType backend_type; + vhost_call vhost_call; + vhost_backend_init vhost_backend_init; + vhost_backend_cleanup vhost_backend_cleanup; +} VhostOps; + +int vhost_set_backend_type(struct vhost_dev *dev, + VhostBackendType backend_type); + +#endif /* VHOST_BACKEND_H_ */ diff --git a/include/hw/virtio/vhost.h b/include/hw/virtio/vhost.h index de24746c7e..33028ec8c2 100644 --- a/include/hw/virtio/vhost.h +++ b/include/hw/virtio/vhost.h @@ -2,6 +2,7 @@ #define VHOST_H #include "hw/hw.h" +#include "hw/virtio/vhost-backend.h" #include "hw/virtio/virtio.h" #include "exec/memory.h" @@ -25,11 +26,11 @@ typedef unsigned long vhost_log_chunk_t; #define VHOST_LOG_PAGE 0x1000 #define VHOST_LOG_BITS (8 * sizeof(vhost_log_chunk_t)) #define VHOST_LOG_CHUNK (VHOST_LOG_PAGE * VHOST_LOG_BITS) +#define VHOST_INVALID_FEATURE_BIT (0xff) struct vhost_memory; struct vhost_dev { MemoryListener memory_listener; - int control; struct vhost_memory *mem; int n_mem_sections; MemoryRegionSection *mem_sections; @@ -48,10 +49,12 @@ struct vhost_dev { bool memory_changed; hwaddr mem_changed_start_addr; hwaddr mem_changed_end_addr; + const VhostOps *vhost_ops; + void *opaque; }; -int vhost_dev_init(struct vhost_dev *hdev, int devfd, const char *devpath, - bool force); +int vhost_dev_init(struct vhost_dev *hdev, void *opaque, + VhostBackendType backend_type, bool force); void vhost_dev_cleanup(struct vhost_dev *hdev); bool vhost_dev_query(struct vhost_dev *hdev, VirtIODevice *vdev); int vhost_dev_start(struct vhost_dev *hdev, VirtIODevice *vdev); @@ -68,4 +71,8 @@ bool vhost_virtqueue_pending(struct vhost_dev *hdev, int n); */ void vhost_virtqueue_mask(struct vhost_dev *hdev, VirtIODevice *vdev, int n, bool mask); +unsigned vhost_get_features(struct vhost_dev *hdev, const int *feature_bits, + unsigned features); +void vhost_ack_features(struct vhost_dev *hdev, const int *feature_bits, + unsigned features); #endif diff --git a/include/hw/virtio/virtio-net.h b/include/hw/virtio/virtio-net.h index 4b32440837..f7fccc08a4 100644 --- a/include/hw/virtio/virtio-net.h +++ b/include/hw/virtio/virtio-net.h @@ -49,12 +49,14 @@ #define VIRTIO_NET_F_CTRL_RX 18 /* Control channel RX mode support */ #define VIRTIO_NET_F_CTRL_VLAN 19 /* Control channel VLAN filtering */ #define VIRTIO_NET_F_CTRL_RX_EXTRA 20 /* Extra RX mode control support */ +#define VIRTIO_NET_F_GUEST_ANNOUNCE 21 /* Guest can announce itself */ #define VIRTIO_NET_F_MQ 22 /* Device supports Receive Flow * Steering */ #define VIRTIO_NET_F_CTRL_MAC_ADDR 23 /* Set MAC address */ #define VIRTIO_NET_S_LINK_UP 1 /* Link is up */ +#define VIRTIO_NET_S_ANNOUNCE 2 /* Announcement is needed */ #define TX_TIMER_INTERVAL 150000 /* 150 us */ @@ -193,6 +195,8 @@ typedef struct VirtIONet { char *netclient_name; char *netclient_type; uint64_t curr_guest_offloads; + QEMUTimer *announce_timer; + int announce_counter; } VirtIONet; #define VIRTIO_NET_CTRL_MAC 1 @@ -213,6 +217,18 @@ typedef struct VirtIONet { #define VIRTIO_NET_CTRL_VLAN_DEL 1 /* + * Control link announce acknowledgement + * + * VIRTIO_NET_S_ANNOUNCE bit in the status field requests link announcement from + * guest driver. The driver is notified by config space change interrupt. The + * command VIRTIO_NET_CTRL_ANNOUNCE_ACK is used to indicate that the driver has + * received the notification. It makes the device clear the bit + * VIRTIO_NET_S_ANNOUNCE in the status field. + */ +#define VIRTIO_NET_CTRL_ANNOUNCE 3 + #define VIRTIO_NET_CTRL_ANNOUNCE_ACK 0 + +/* * Control Multiqueue * * The command VIRTIO_NET_CTRL_MQ_VQ_PAIRS_SET @@ -251,6 +267,7 @@ struct virtio_net_ctrl_mq { DEFINE_PROP_BIT("guest_tso6", _state, _field, VIRTIO_NET_F_GUEST_TSO6, true), \ DEFINE_PROP_BIT("guest_ecn", _state, _field, VIRTIO_NET_F_GUEST_ECN, true), \ DEFINE_PROP_BIT("guest_ufo", _state, _field, VIRTIO_NET_F_GUEST_UFO, true), \ + DEFINE_PROP_BIT("guest_announce", _state, _field, VIRTIO_NET_F_GUEST_ANNOUNCE, true), \ DEFINE_PROP_BIT("host_tso4", _state, _field, VIRTIO_NET_F_HOST_TSO4, true), \ DEFINE_PROP_BIT("host_tso6", _state, _field, VIRTIO_NET_F_HOST_TSO6, true), \ DEFINE_PROP_BIT("host_ecn", _state, _field, VIRTIO_NET_F_HOST_ECN, true), \ diff --git a/include/migration/vmstate.h b/include/migration/vmstate.h index 7e45048355..799d2d0f03 100644 --- a/include/migration/vmstate.h +++ b/include/migration/vmstate.h @@ -752,6 +752,8 @@ extern const VMStateInfo vmstate_info_bitmap; #define VMSTATE_END_OF_LIST() \ {} +#define SELF_ANNOUNCE_ROUNDS 5 + int vmstate_load_state(QEMUFile *f, const VMStateDescription *vmsd, void *opaque, int version_id); void vmstate_save_state(QEMUFile *f, const VMStateDescription *vmsd, @@ -778,4 +780,12 @@ void vmstate_register_ram(struct MemoryRegion *memory, DeviceState *dev); void vmstate_unregister_ram(struct MemoryRegion *memory, DeviceState *dev); void vmstate_register_ram_global(struct MemoryRegion *memory); +static inline +int64_t self_announce_delay(int round) +{ + assert(round < SELF_ANNOUNCE_ROUNDS && round > 0); + /* delay 50ms, 150ms, 250ms, ... */ + return 50 + (SELF_ANNOUNCE_ROUNDS - round - 1) * 100; +} + #endif diff --git a/include/monitor/monitor.h b/include/monitor/monitor.h index 1c1f56f36b..97696ea693 100644 --- a/include/monitor/monitor.h +++ b/include/monitor/monitor.h @@ -51,6 +51,7 @@ typedef enum MonitorEvent { QEVENT_BLOCK_IMAGE_CORRUPTED, QEVENT_QUORUM_FAILURE, QEVENT_QUORUM_REPORT_BAD, + QEVENT_ACPI_OST, /* Add to 'monitor_event_names' array in monitor.c when * defining new events here */ diff --git a/include/net/vhost-user.h b/include/net/vhost-user.h new file mode 100644 index 0000000000..85109f63aa --- /dev/null +++ b/include/net/vhost-user.h @@ -0,0 +1,17 @@ +/* + * vhost-user.h + * + * Copyright (c) 2013 Virtual Open Systems Sarl. + * + * This work is licensed under the terms of the GNU GPL, version 2 or later. + * See the COPYING file in the top-level directory. + * + */ + +#ifndef VHOST_USER_H_ +#define VHOST_USER_H_ + +struct vhost_net; +struct vhost_net *vhost_user_get_vhost_net(NetClientState *nc); + +#endif /* VHOST_USER_H_ */ diff --git a/include/net/vhost_net.h b/include/net/vhost_net.h index 2d936bb5f5..b1c18a3f3b 100644 --- a/include/net/vhost_net.h +++ b/include/net/vhost_net.h @@ -2,11 +2,19 @@ #define VHOST_NET_H #include "net/net.h" +#include "hw/virtio/vhost-backend.h" struct vhost_net; typedef struct vhost_net VHostNetState; -VHostNetState *vhost_net_init(NetClientState *backend, int devfd, bool force); +typedef struct VhostNetOptions { + VhostBackendType backend_type; + NetClientState *net_backend; + void *opaque; + bool force; +} VhostNetOptions; + +struct vhost_net *vhost_net_init(VhostNetOptions *options); bool vhost_net_query(VHostNetState *net, VirtIODevice *dev); int vhost_net_start(VirtIODevice *dev, NetClientState *ncs, int total_queues); @@ -20,4 +28,5 @@ void vhost_net_ack_features(VHostNetState *net, unsigned features); bool vhost_net_virtqueue_pending(VHostNetState *net, int n); void vhost_net_virtqueue_mask(VHostNetState *net, VirtIODevice *dev, int idx, bool mask); +VHostNetState *get_vhost_net(NetClientState *nc); #endif diff --git a/include/qemu/osdep.h b/include/qemu/osdep.h index ffb296692d..6d35c1bcba 100644 --- a/include/qemu/osdep.h +++ b/include/qemu/osdep.h @@ -116,6 +116,16 @@ void qemu_anon_ram_free(void *ptr, size_t size); #else #define QEMU_MADV_MERGEABLE QEMU_MADV_INVALID #endif +#ifdef MADV_UNMERGEABLE +#define QEMU_MADV_UNMERGEABLE MADV_UNMERGEABLE +#else +#define QEMU_MADV_UNMERGEABLE QEMU_MADV_INVALID +#endif +#ifdef MADV_DODUMP +#define QEMU_MADV_DODUMP MADV_DODUMP +#else +#define QEMU_MADV_DODUMP QEMU_MADV_INVALID +#endif #ifdef MADV_DONTDUMP #define QEMU_MADV_DONTDUMP MADV_DONTDUMP #else @@ -133,6 +143,8 @@ void qemu_anon_ram_free(void *ptr, size_t size); #define QEMU_MADV_DONTNEED POSIX_MADV_DONTNEED #define QEMU_MADV_DONTFORK QEMU_MADV_INVALID #define QEMU_MADV_MERGEABLE QEMU_MADV_INVALID +#define QEMU_MADV_UNMERGEABLE QEMU_MADV_INVALID +#define QEMU_MADV_DODUMP QEMU_MADV_INVALID #define QEMU_MADV_DONTDUMP QEMU_MADV_INVALID #define QEMU_MADV_HUGEPAGE QEMU_MADV_INVALID @@ -142,6 +154,8 @@ void qemu_anon_ram_free(void *ptr, size_t size); #define QEMU_MADV_DONTNEED QEMU_MADV_INVALID #define QEMU_MADV_DONTFORK QEMU_MADV_INVALID #define QEMU_MADV_MERGEABLE QEMU_MADV_INVALID +#define QEMU_MADV_UNMERGEABLE QEMU_MADV_INVALID +#define QEMU_MADV_DODUMP QEMU_MADV_INVALID #define QEMU_MADV_DONTDUMP QEMU_MADV_INVALID #define QEMU_MADV_HUGEPAGE QEMU_MADV_INVALID @@ -251,4 +265,6 @@ void qemu_init_auxval(char **envp); void qemu_set_tty_echo(int fd, bool echo); +void os_mem_prealloc(int fd, char *area, size_t sz); + #endif diff --git a/include/qemu/range.h b/include/qemu/range.h index aae9720161..cfa021fd48 100644 --- a/include/qemu/range.h +++ b/include/qemu/range.h @@ -3,6 +3,7 @@ #include <inttypes.h> #include <qemu/typedefs.h> +#include "qemu/queue.h" /* * Operations on 64 bit address ranges. @@ -60,4 +61,75 @@ static inline int ranges_overlap(uint64_t first1, uint64_t len1, return !(last2 < first1 || last1 < first2); } +/* 0,1 can merge with 1,2 but don't overlap */ +static inline bool ranges_can_merge(Range *range1, Range *range2) +{ + return !(range1->end < range2->begin || range2->end < range1->begin); +} + +static inline int range_merge(Range *range1, Range *range2) +{ + if (ranges_can_merge(range1, range2)) { + if (range1->end < range2->end) { + range1->end = range2->end; + } + if (range1->begin > range2->begin) { + range1->begin = range2->begin; + } + return 0; + } + + return -1; +} + +static inline GList *g_list_insert_sorted_merged(GList *list, + gpointer data, + GCompareFunc func) +{ + GList *l, *next = NULL; + Range *r, *nextr; + + if (!list) { + list = g_list_insert_sorted(list, data, func); + return list; + } + + nextr = data; + l = list; + while (l && l != next && nextr) { + r = l->data; + if (ranges_can_merge(r, nextr)) { + range_merge(r, nextr); + l = g_list_remove_link(l, next); + next = g_list_next(l); + if (next) { + nextr = next->data; + } else { + nextr = NULL; + } + } else { + l = g_list_next(l); + } + } + + if (!l) { + list = g_list_insert_sorted(list, data, func); + } + + return list; +} + +static inline gint range_compare(gconstpointer a, gconstpointer b) +{ + Range *ra = (Range *)a, *rb = (Range *)b; + if (ra->begin == rb->begin && ra->end == rb->end) { + return 0; + } else if (range_get_last(ra->begin, ra->end) < + range_get_last(rb->begin, rb->end)) { + return -1; + } else { + return 1; + } +} + #endif diff --git a/include/qom/object.h b/include/qom/object.h index a641dcde10..b882ccc85f 100644 --- a/include/qom/object.h +++ b/include/qom/object.h @@ -917,6 +917,34 @@ int64_t object_property_get_int(Object *obj, const char *name, Error **errp); /** + * object_property_get_enum: + * @obj: the object + * @name: the name of the property + * @strings: strings corresponding to enums + * @errp: returns an error if this function fails + * + * Returns: the value of the property, converted to an integer, or + * undefined if an error occurs (including when the property value is not + * an enum). + */ +int object_property_get_enum(Object *obj, const char *name, + const char *strings[], Error **errp); + +/** + * object_property_get_uint16List: + * @obj: the object + * @name: the name of the property + * @list: the returned int list + * @errp: returns an error if this function fails + * + * Returns: the value of the property, converted to integers, or + * undefined if an error occurs (including when the property value is not + * an list of integers). + */ +void object_property_get_uint16List(Object *obj, const char *name, + uint16List **list, Error **errp); + +/** * object_property_set: * @obj: the object * @v: the visitor that will be used to write the property value. This should diff --git a/include/sysemu/char.h b/include/sysemu/char.h index 7f5eeb38b0..3b835f6fb3 100644 --- a/include/sysemu/char.h +++ b/include/sysemu/char.h @@ -56,10 +56,13 @@ typedef void IOEventHandler(void *opaque, int event); struct CharDriverState { void (*init)(struct CharDriverState *s); int (*chr_write)(struct CharDriverState *s, const uint8_t *buf, int len); + int (*chr_sync_read)(struct CharDriverState *s, + const uint8_t *buf, int len); GSource *(*chr_add_watch)(struct CharDriverState *s, GIOCondition cond); void (*chr_update_read_handler)(struct CharDriverState *s); int (*chr_ioctl)(struct CharDriverState *s, int cmd, void *arg); - int (*get_msgfd)(struct CharDriverState *s); + int (*get_msgfds)(struct CharDriverState *s, int* fds, int num); + int (*set_msgfds)(struct CharDriverState *s, int *fds, int num); int (*chr_add_client)(struct CharDriverState *chr, int fd); IOEventHandler *chr_event; IOCanReadHandler *chr_can_read; @@ -80,6 +83,7 @@ struct CharDriverState { int avail_connections; int is_mux; guint fd_in_tag; + guint fd_hup_tag; QemuOpts *opts; QTAILQ_ENTRY(CharDriverState) next; }; @@ -189,6 +193,18 @@ int qemu_chr_fe_write(CharDriverState *s, const uint8_t *buf, int len); int qemu_chr_fe_write_all(CharDriverState *s, const uint8_t *buf, int len); /** + * @qemu_chr_fe_read_all: + * + * Read data to a buffer from the back end. + * + * @buf the data buffer + * @len the number of bytes to read + * + * Returns: the number of bytes read + */ +int qemu_chr_fe_read_all(CharDriverState *s, uint8_t *buf, int len); + +/** * @qemu_chr_fe_ioctl: * * Issue a device specific ioctl to a backend. @@ -215,6 +231,32 @@ int qemu_chr_fe_ioctl(CharDriverState *s, int cmd, void *arg); int qemu_chr_fe_get_msgfd(CharDriverState *s); /** + * @qemu_chr_fe_get_msgfds: + * + * For backends capable of fd passing, return the number of file received + * descriptors and fills the fds array up to num elements + * + * Returns: -1 if fd passing isn't supported or there are no pending file + * descriptors. If file descriptors are returned, subsequent calls to + * this function will return -1 until a client sends a new set of file + * descriptors. + */ +int qemu_chr_fe_get_msgfds(CharDriverState *s, int *fds, int num); + +/** + * @qemu_chr_fe_set_msgfds: + * + * For backends capable of fd passing, set an array of fds to be passed with + * the next send operation. + * A subsequent call to this function before calling a write function will + * result in overwriting the fd array with the new value without being send. + * Upon writing the message the fd array is freed. + * + * Returns: -1 if fd passing isn't supported. + */ +int qemu_chr_fe_set_msgfds(CharDriverState *s, int *fds, int num); + +/** * @qemu_chr_fe_claim: * * Claim a backend before using it, should be called before calling diff --git a/include/sysemu/cpus.h b/include/sysemu/cpus.h index 6502488a05..4f790810bf 100644 --- a/include/sysemu/cpus.h +++ b/include/sysemu/cpus.h @@ -23,7 +23,6 @@ extern int smp_threads; #define smp_threads 1 #endif -void set_numa_modes(void); void list_cpus(FILE *f, fprintf_function cpu_fprintf, const char *optarg); #endif diff --git a/include/sysemu/hostmem.h b/include/sysemu/hostmem.h new file mode 100644 index 0000000000..1ce439415d --- /dev/null +++ b/include/sysemu/hostmem.h @@ -0,0 +1,68 @@ +/* + * QEMU Host Memory Backend + * + * Copyright (C) 2013-2014 Red Hat Inc + * + * Authors: + * Igor Mammedov <imammedo@redhat.com> + * + * This work is licensed under the terms of the GNU GPL, version 2 or later. + * See the COPYING file in the top-level directory. + */ +#ifndef QEMU_RAM_H +#define QEMU_RAM_H + +#include "sysemu/sysemu.h" /* for MAX_NODES */ +#include "qom/object.h" +#include "qapi/error.h" +#include "exec/memory.h" +#include "qemu/option.h" +#include "qemu/bitmap.h" + +#define TYPE_MEMORY_BACKEND "memory-backend" +#define MEMORY_BACKEND(obj) \ + OBJECT_CHECK(HostMemoryBackend, (obj), TYPE_MEMORY_BACKEND) +#define MEMORY_BACKEND_GET_CLASS(obj) \ + OBJECT_GET_CLASS(HostMemoryBackendClass, (obj), TYPE_MEMORY_BACKEND) +#define MEMORY_BACKEND_CLASS(klass) \ + OBJECT_CLASS_CHECK(HostMemoryBackendClass, (klass), TYPE_MEMORY_BACKEND) + +typedef struct HostMemoryBackend HostMemoryBackend; +typedef struct HostMemoryBackendClass HostMemoryBackendClass; + +/** + * HostMemoryBackendClass: + * @parent_class: opaque parent class container + */ +struct HostMemoryBackendClass { + ObjectClass parent_class; + + void (*alloc)(HostMemoryBackend *backend, Error **errp); +}; + +/** + * @HostMemoryBackend + * + * @parent: opaque parent object container + * @size: amount of memory backend provides + * @id: unique identification string in memdev namespace + * @mr: MemoryRegion representing host memory belonging to backend + */ +struct HostMemoryBackend { + /* private */ + Object parent; + + /* protected */ + uint64_t size; + bool merge, dump; + bool prealloc, force_prealloc; + DECLARE_BITMAP(host_nodes, MAX_NODES + 1); + HostMemPolicy policy; + + MemoryRegion mr; +}; + +MemoryRegion *host_memory_backend_get_memory(HostMemoryBackend *backend, + Error **errp); + +#endif diff --git a/include/sysemu/kvm.h b/include/sysemu/kvm.h index e79e92c50e..c4556ad59e 100644 --- a/include/sysemu/kvm.h +++ b/include/sysemu/kvm.h @@ -43,6 +43,7 @@ extern bool kvm_allowed; extern bool kvm_kernel_irqchip; extern bool kvm_async_interrupts_allowed; extern bool kvm_halt_in_kernel_allowed; +extern bool kvm_eventfds_allowed; extern bool kvm_irqfds_allowed; extern bool kvm_msi_via_irqfd_allowed; extern bool kvm_gsi_routing_allowed; @@ -83,6 +84,15 @@ extern bool kvm_readonly_mem_allowed; #define kvm_halt_in_kernel() (kvm_halt_in_kernel_allowed) /** + * kvm_eventfds_enabled: + * + * Returns: true if we can use eventfds to receive notifications + * from a KVM CPU (ie the kernel supports eventds and we are running + * with a configuration where it is meaningful to use them). + */ +#define kvm_eventfds_enabled() (kvm_eventfds_allowed) + +/** * kvm_irqfds_enabled: * * Returns: true if we can use irqfds to inject interrupts into @@ -128,6 +138,7 @@ extern bool kvm_readonly_mem_allowed; #define kvm_irqchip_in_kernel() (false) #define kvm_async_interrupts_enabled() (false) #define kvm_halt_in_kernel() (false) +#define kvm_eventfds_enabled() (false) #define kvm_irqfds_enabled() (false) #define kvm_msi_via_irqfd_enabled() (false) #define kvm_gsi_routing_allowed() (false) diff --git a/include/sysemu/os-win32.h b/include/sysemu/os-win32.h index bf8523ada1..af3fbc47d8 100644 --- a/include/sysemu/os-win32.h +++ b/include/sysemu/os-win32.h @@ -89,6 +89,8 @@ static inline void os_setup_post(void) {} void os_set_line_buffering(void); static inline void os_set_proc_name(const char *dummy) {} +size_t getpagesize(void); + #if !defined(EPROTONOSUPPORT) # define EPROTONOSUPPORT EINVAL #endif diff --git a/include/sysemu/sysemu.h b/include/sysemu/sysemu.h index ba5c7f8093..277230db49 100644 --- a/include/sysemu/sysemu.h +++ b/include/sysemu/sysemu.h @@ -9,6 +9,8 @@ #include "qapi-types.h" #include "qemu/notify.h" #include "qemu/main-loop.h" +#include "qemu/bitmap.h" +#include "qom/object.h" /* vl.c */ @@ -131,8 +133,10 @@ extern uint8_t *boot_splash_filedata; extern size_t boot_splash_filedata_size; extern uint8_t qemu_extra_params_fw[2]; extern QEMUClockType rtc_clock; +extern const char *mem_path; +extern int mem_prealloc; -#define MAX_NODES 64 +#define MAX_NODES 128 /* The following shall be true for all CPUs: * cpu->cpu_index < max_cpus <= MAX_CPUMASK_BITS @@ -142,8 +146,16 @@ extern QEMUClockType rtc_clock; #define MAX_CPUMASK_BITS 255 extern int nb_numa_nodes; -extern uint64_t node_mem[MAX_NODES]; -extern unsigned long *node_cpumask[MAX_NODES]; +typedef struct node_info { + uint64_t node_mem; + DECLARE_BITMAP(node_cpu, MAX_CPUMASK_BITS); + struct HostMemoryBackend *node_memdev; +} NodeInfo; +extern NodeInfo numa_info[MAX_NODES]; +void set_numa_nodes(void); +void set_numa_modes(void); +extern QemuOptsList qemu_numa_opts; +int numa_init_func(QemuOpts *opts, void *opaque); #define MAX_OPTION_ROMS 16 typedef struct QEMUOptionRom { @@ -113,6 +113,7 @@ KVMState *kvm_state; bool kvm_kernel_irqchip; bool kvm_async_interrupts_allowed; bool kvm_halt_in_kernel_allowed; +bool kvm_eventfds_allowed; bool kvm_irqfds_allowed; bool kvm_msi_via_irqfd_allowed; bool kvm_gsi_routing_allowed; @@ -1541,6 +1542,9 @@ int kvm_init(MachineClass *mc) (kvm_check_extension(s, KVM_CAP_READONLY_MEM) > 0); #endif + kvm_eventfds_allowed = + (kvm_check_extension(s, KVM_CAP_IOEVENTFD) > 0); + ret = kvm_arch_init(s); if (ret < 0) { goto err; diff --git a/kvm-stub.c b/kvm-stub.c index ac33d8666d..8e7737caa9 100644 --- a/kvm-stub.c +++ b/kvm-stub.c @@ -22,6 +22,7 @@ KVMState *kvm_state; bool kvm_kernel_irqchip; bool kvm_async_interrupts_allowed; +bool kvm_eventfds_allowed; bool kvm_irqfds_allowed; bool kvm_msi_via_irqfd_allowed; bool kvm_gsi_routing_allowed; @@ -23,6 +23,7 @@ #include "exec/memory-internal.h" #include "exec/ram_addr.h" +#include "sysemu/sysemu.h" //#define DEBUG_UNASSIGNED @@ -493,7 +494,7 @@ static AddressSpace *memory_region_to_address_space(MemoryRegion *mr) return as; } } - abort(); + return NULL; } /* Render a memory region into the global view. Ranges in @view obscure @@ -1032,6 +1033,23 @@ void memory_region_init_ram(MemoryRegion *mr, mr->ram_addr = qemu_ram_alloc(size, mr); } +#ifdef __linux__ +void memory_region_init_ram_from_file(MemoryRegion *mr, + struct Object *owner, + const char *name, + uint64_t size, + bool share, + const char *path, + Error **errp) +{ + memory_region_init(mr, owner, name, size); + mr->ram = true; + mr->terminates = true; + mr->destructor = memory_region_destructor_ram; + mr->ram_addr = qemu_ram_alloc_from_file(size, mr, share, path, errp); +} +#endif + void memory_region_init_ram_ptr(MemoryRegion *mr, Object *owner, const char *name, @@ -1254,6 +1272,17 @@ void memory_region_reset_dirty(MemoryRegion *mr, hwaddr addr, cpu_physical_memory_reset_dirty(mr->ram_addr + addr, size, client); } +int memory_region_get_fd(MemoryRegion *mr) +{ + if (mr->alias) { + return memory_region_get_fd(mr->alias); + } + + assert(mr->terminates); + + return qemu_get_ram_fd(mr->ram_addr & TARGET_PAGE_MASK); +} + void *memory_region_get_ram_ptr(MemoryRegion *mr) { if (mr->alias) { @@ -1593,6 +1622,11 @@ bool memory_region_present(MemoryRegion *container, hwaddr addr) return true; } +bool memory_region_is_mapped(MemoryRegion *mr) +{ + return mr->container ? true : false; +} + MemoryRegionSection memory_region_find(MemoryRegion *mr, hwaddr addr, uint64_t size) { @@ -1610,6 +1644,9 @@ MemoryRegionSection memory_region_find(MemoryRegion *mr, } as = memory_region_to_address_space(root); + if (!as) { + return ret; + } range = addrrange_make(int128_make64(addr), int128_make64(size)); view = address_space_get_flatview(as); @@ -487,6 +487,7 @@ static const char *monitor_event_names[] = { [QEVENT_BLOCK_IMAGE_CORRUPTED] = "BLOCK_IMAGE_CORRUPTED", [QEVENT_QUORUM_FAILURE] = "QUORUM_FAILURE", [QEVENT_QUORUM_REPORT_BAD] = "QUORUM_REPORT_BAD", + [QEVENT_ACPI_OST] = "ACPI_DEVICE_OST", }; QEMU_BUILD_BUG_ON(ARRAY_SIZE(monitor_event_names) != QEVENT_MAX) @@ -2011,7 +2012,7 @@ static void do_info_numa(Monitor *mon, const QDict *qdict) } monitor_printf(mon, "\n"); monitor_printf(mon, "node %d size: %" PRId64 " MB\n", i, - node_mem[i] >> 20); + numa_info[i].node_mem >> 20); } } @@ -2964,6 +2965,13 @@ static mon_cmd_t info_cmds[] = { .mhandler.cmd = hmp_info_tpm, }, { + .name = "memdev", + .args_type = "", + .params = "", + .help = "show the memory device", + .mhandler.cmd = hmp_info_memdev, + }, + { .name = NULL, }, }; diff --git a/net/Makefile.objs b/net/Makefile.objs index c25fe6920c..301f6b6b51 100644 --- a/net/Makefile.objs +++ b/net/Makefile.objs @@ -2,7 +2,7 @@ common-obj-y = net.o queue.o checksum.o util.o hub.o common-obj-y += socket.o common-obj-y += dump.o common-obj-y += eth.o -common-obj-$(CONFIG_POSIX) += tap.o +common-obj-$(CONFIG_POSIX) += tap.o vhost-user.o common-obj-$(CONFIG_LINUX) += tap-linux.o common-obj-$(CONFIG_WIN32) += tap-win32.o common-obj-$(CONFIG_BSD) += tap-bsd.o diff --git a/net/clients.h b/net/clients.h index 7322ff5f33..7f3d4ae9f3 100644 --- a/net/clients.h +++ b/net/clients.h @@ -57,4 +57,7 @@ int net_init_netmap(const NetClientOptions *opts, const char *name, NetClientState *peer); #endif +int net_init_vhost_user(const NetClientOptions *opts, const char *name, + NetClientState *peer); + #endif /* QEMU_NET_CLIENTS_H */ @@ -322,6 +322,7 @@ void net_hub_check_clients(void) case NET_CLIENT_OPTIONS_KIND_TAP: case NET_CLIENT_OPTIONS_KIND_SOCKET: case NET_CLIENT_OPTIONS_KIND_VDE: + case NET_CLIENT_OPTIONS_KIND_VHOST_USER: has_host_dev = 1; break; default: @@ -62,6 +62,7 @@ const char *host_net_devices[] = { #ifdef CONFIG_VDE "vde", #endif + "vhost-user", NULL, }; @@ -802,6 +803,9 @@ static int (* const net_client_init_fun[NET_CLIENT_OPTIONS_KIND_MAX])( [NET_CLIENT_OPTIONS_KIND_BRIDGE] = net_init_bridge, #endif [NET_CLIENT_OPTIONS_KIND_HUBPORT] = net_init_hubport, +#ifdef CONFIG_VHOST_NET_USED + [NET_CLIENT_OPTIONS_KIND_VHOST_USER] = net_init_vhost_user, +#endif }; @@ -835,6 +839,9 @@ static int net_client_init1(const void *object, int is_netdev, Error **errp) case NET_CLIENT_OPTIONS_KIND_BRIDGE: #endif case NET_CLIENT_OPTIONS_KIND_HUBPORT: +#ifdef CONFIG_VHOST_NET_USED + case NET_CLIENT_OPTIONS_KIND_VHOST_USER: +#endif break; default: @@ -594,6 +594,7 @@ static int net_init_tap_one(const NetdevTapOptions *tap, NetClientState *peer, int vnet_hdr, int fd) { TAPState *s; + int vhostfd; s = net_tap_fd_init(peer, model, name, fd, vnet_hdr); if (!s) { @@ -624,7 +625,11 @@ static int net_init_tap_one(const NetdevTapOptions *tap, NetClientState *peer, if (tap->has_vhost ? tap->vhost : vhostfdname || (tap->has_vhostforce && tap->vhostforce)) { - int vhostfd; + VhostNetOptions options; + + options.backend_type = VHOST_BACKEND_TYPE_KERNEL; + options.net_backend = &s->nc; + options.force = tap->has_vhostforce && tap->vhostforce; if (tap->has_vhostfd || tap->has_vhostfds) { vhostfd = monitor_handle_fd_param(cur_mon, vhostfdname); @@ -632,11 +637,16 @@ static int net_init_tap_one(const NetdevTapOptions *tap, NetClientState *peer, return -1; } } else { - vhostfd = -1; + vhostfd = open("/dev/vhost-net", O_RDWR); + if (vhostfd < 0) { + error_report("tap: open vhost char device failed: %s", + strerror(errno)); + return -1; + } } + options.opaque = (void *)(uintptr_t)vhostfd; - s->vhost_net = vhost_net_init(&s->nc, vhostfd, - tap->has_vhostforce && tap->vhostforce); + s->vhost_net = vhost_net_init(&options); if (!s->vhost_net) { error_report("vhost-net requested but could not be initialized"); return -1; diff --git a/net/vhost-user.c b/net/vhost-user.c new file mode 100644 index 0000000000..24e050c772 --- /dev/null +++ b/net/vhost-user.c @@ -0,0 +1,258 @@ +/* + * vhost-user.c + * + * Copyright (c) 2013 Virtual Open Systems Sarl. + * + * This work is licensed under the terms of the GNU GPL, version 2 or later. + * See the COPYING file in the top-level directory. + * + */ + +#include "clients.h" +#include "net/vhost_net.h" +#include "net/vhost-user.h" +#include "sysemu/char.h" +#include "qemu/config-file.h" +#include "qemu/error-report.h" + +typedef struct VhostUserState { + NetClientState nc; + CharDriverState *chr; + bool vhostforce; + VHostNetState *vhost_net; +} VhostUserState; + +typedef struct VhostUserChardevProps { + bool is_socket; + bool is_unix; + bool is_server; +} VhostUserChardevProps; + +VHostNetState *vhost_user_get_vhost_net(NetClientState *nc) +{ + VhostUserState *s = DO_UPCAST(VhostUserState, nc, nc); + assert(nc->info->type == NET_CLIENT_OPTIONS_KIND_VHOST_USER); + return s->vhost_net; +} + +static int vhost_user_running(VhostUserState *s) +{ + return (s->vhost_net) ? 1 : 0; +} + +static int vhost_user_start(VhostUserState *s) +{ + VhostNetOptions options; + + if (vhost_user_running(s)) { + return 0; + } + + options.backend_type = VHOST_BACKEND_TYPE_USER; + options.net_backend = &s->nc; + options.opaque = s->chr; + options.force = s->vhostforce; + + s->vhost_net = vhost_net_init(&options); + + return vhost_user_running(s) ? 0 : -1; +} + +static void vhost_user_stop(VhostUserState *s) +{ + if (vhost_user_running(s)) { + vhost_net_cleanup(s->vhost_net); + } + + s->vhost_net = 0; +} + +static void vhost_user_cleanup(NetClientState *nc) +{ + VhostUserState *s = DO_UPCAST(VhostUserState, nc, nc); + + vhost_user_stop(s); + qemu_purge_queued_packets(nc); +} + +static bool vhost_user_has_vnet_hdr(NetClientState *nc) +{ + assert(nc->info->type == NET_CLIENT_OPTIONS_KIND_VHOST_USER); + + return true; +} + +static bool vhost_user_has_ufo(NetClientState *nc) +{ + assert(nc->info->type == NET_CLIENT_OPTIONS_KIND_VHOST_USER); + + return true; +} + +static NetClientInfo net_vhost_user_info = { + .type = NET_CLIENT_OPTIONS_KIND_VHOST_USER, + .size = sizeof(VhostUserState), + .cleanup = vhost_user_cleanup, + .has_vnet_hdr = vhost_user_has_vnet_hdr, + .has_ufo = vhost_user_has_ufo, +}; + +static void net_vhost_link_down(VhostUserState *s, bool link_down) +{ + s->nc.link_down = link_down; + + if (s->nc.peer) { + s->nc.peer->link_down = link_down; + } + + if (s->nc.info->link_status_changed) { + s->nc.info->link_status_changed(&s->nc); + } + + if (s->nc.peer && s->nc.peer->info->link_status_changed) { + s->nc.peer->info->link_status_changed(s->nc.peer); + } +} + +static void net_vhost_user_event(void *opaque, int event) +{ + VhostUserState *s = opaque; + + switch (event) { + case CHR_EVENT_OPENED: + vhost_user_start(s); + net_vhost_link_down(s, false); + error_report("chardev \"%s\" went up\n", s->chr->label); + break; + case CHR_EVENT_CLOSED: + net_vhost_link_down(s, true); + vhost_user_stop(s); + error_report("chardev \"%s\" went down\n", s->chr->label); + break; + } +} + +static int net_vhost_user_init(NetClientState *peer, const char *device, + const char *name, CharDriverState *chr, + bool vhostforce) +{ + NetClientState *nc; + VhostUserState *s; + + nc = qemu_new_net_client(&net_vhost_user_info, peer, device, name); + + snprintf(nc->info_str, sizeof(nc->info_str), "vhost-user to %s", + chr->label); + + s = DO_UPCAST(VhostUserState, nc, nc); + + /* We don't provide a receive callback */ + s->nc.receive_disabled = 1; + s->chr = chr; + s->vhostforce = vhostforce; + + qemu_chr_add_handlers(s->chr, NULL, NULL, net_vhost_user_event, s); + + return 0; +} + +static int net_vhost_chardev_opts(const char *name, const char *value, + void *opaque) +{ + VhostUserChardevProps *props = opaque; + + if (strcmp(name, "backend") == 0 && strcmp(value, "socket") == 0) { + props->is_socket = true; + } else if (strcmp(name, "path") == 0) { + props->is_unix = true; + } else if (strcmp(name, "server") == 0) { + props->is_server = true; + } else { + error_report("vhost-user does not support a chardev" + " with the following option:\n %s = %s", + name, value); + return -1; + } + return 0; +} + +static CharDriverState *net_vhost_parse_chardev(const NetdevVhostUserOptions *opts) +{ + CharDriverState *chr = qemu_chr_find(opts->chardev); + VhostUserChardevProps props; + + if (chr == NULL) { + error_report("chardev \"%s\" not found", opts->chardev); + return NULL; + } + + /* inspect chardev opts */ + memset(&props, 0, sizeof(props)); + if (qemu_opt_foreach(chr->opts, net_vhost_chardev_opts, &props, true) != 0) { + return NULL; + } + + if (!props.is_socket || !props.is_unix) { + error_report("chardev \"%s\" is not a unix socket", + opts->chardev); + return NULL; + } + + qemu_chr_fe_claim_no_fail(chr); + + return chr; +} + +static int net_vhost_check_net(QemuOpts *opts, void *opaque) +{ + const char *name = opaque; + const char *driver, *netdev; + const char virtio_name[] = "virtio-net-"; + + driver = qemu_opt_get(opts, "driver"); + netdev = qemu_opt_get(opts, "netdev"); + + if (!driver || !netdev) { + return 0; + } + + if (strcmp(netdev, name) == 0 && + strncmp(driver, virtio_name, strlen(virtio_name)) != 0) { + error_report("vhost-user requires frontend driver virtio-net-*"); + return -1; + } + + return 0; +} + +int net_init_vhost_user(const NetClientOptions *opts, const char *name, + NetClientState *peer) +{ + const NetdevVhostUserOptions *vhost_user_opts; + CharDriverState *chr; + bool vhostforce; + + assert(opts->kind == NET_CLIENT_OPTIONS_KIND_VHOST_USER); + vhost_user_opts = opts->vhost_user; + + chr = net_vhost_parse_chardev(vhost_user_opts); + if (!chr) { + error_report("No suitable chardev found"); + return -1; + } + + /* verify net frontend */ + if (qemu_opts_foreach(qemu_find_opts("device"), net_vhost_check_net, + (char *)name, true) == -1) { + return -1; + } + + /* vhostforce for non-MSIX */ + if (vhost_user_opts->has_vhostforce) { + vhostforce = vhost_user_opts->vhostforce; + } else { + vhostforce = false; + } + + return net_vhost_user_init(peer, "vhost_user", name, chr, vhostforce); +} diff --git a/numa.c b/numa.c new file mode 100644 index 0000000000..e471afe04a --- /dev/null +++ b/numa.c @@ -0,0 +1,369 @@ +/* + * NUMA parameter parsing routines + * + * Copyright (c) 2014 Fujitsu Ltd. + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + * THE SOFTWARE. + */ + +#include "sysemu/sysemu.h" +#include "exec/cpu-common.h" +#include "qemu/bitmap.h" +#include "qom/cpu.h" +#include "qemu/error-report.h" +#include "include/exec/cpu-common.h" /* for RAM_ADDR_FMT */ +#include "qapi-visit.h" +#include "qapi/opts-visitor.h" +#include "qapi/dealloc-visitor.h" +#include "qapi/qmp/qerror.h" +#include "hw/boards.h" +#include "sysemu/hostmem.h" +#include "qmp-commands.h" + +QemuOptsList qemu_numa_opts = { + .name = "numa", + .implied_opt_name = "type", + .head = QTAILQ_HEAD_INITIALIZER(qemu_numa_opts.head), + .desc = { { 0 } } /* validated with OptsVisitor */ +}; + +static int have_memdevs = -1; + +static void numa_node_parse(NumaNodeOptions *node, QemuOpts *opts, Error **errp) +{ + uint16_t nodenr; + uint16List *cpus = NULL; + + if (node->has_nodeid) { + nodenr = node->nodeid; + } else { + nodenr = nb_numa_nodes; + } + + if (nodenr >= MAX_NODES) { + error_setg(errp, "Max number of NUMA nodes reached: %" + PRIu16 "\n", nodenr); + return; + } + + for (cpus = node->cpus; cpus; cpus = cpus->next) { + if (cpus->value > MAX_CPUMASK_BITS) { + error_setg(errp, "CPU number %" PRIu16 " is bigger than %d", + cpus->value, MAX_CPUMASK_BITS); + return; + } + bitmap_set(numa_info[nodenr].node_cpu, cpus->value, 1); + } + + if (node->has_mem && node->has_memdev) { + error_setg(errp, "qemu: cannot specify both mem= and memdev=\n"); + return; + } + + if (have_memdevs == -1) { + have_memdevs = node->has_memdev; + } + if (node->has_memdev != have_memdevs) { + error_setg(errp, "qemu: memdev option must be specified for either " + "all or no nodes\n"); + return; + } + + if (node->has_mem) { + uint64_t mem_size = node->mem; + const char *mem_str = qemu_opt_get(opts, "mem"); + /* Fix up legacy suffix-less format */ + if (g_ascii_isdigit(mem_str[strlen(mem_str) - 1])) { + mem_size <<= 20; + } + numa_info[nodenr].node_mem = mem_size; + } + if (node->has_memdev) { + Object *o; + o = object_resolve_path_type(node->memdev, TYPE_MEMORY_BACKEND, NULL); + if (!o) { + error_setg(errp, "memdev=%s is ambiguous", node->memdev); + return; + } + + object_ref(o); + numa_info[nodenr].node_mem = object_property_get_int(o, "size", NULL); + numa_info[nodenr].node_memdev = MEMORY_BACKEND(o); + } +} + +int numa_init_func(QemuOpts *opts, void *opaque) +{ + NumaOptions *object = NULL; + Error *err = NULL; + + { + OptsVisitor *ov = opts_visitor_new(opts); + visit_type_NumaOptions(opts_get_visitor(ov), &object, NULL, &err); + opts_visitor_cleanup(ov); + } + + if (err) { + goto error; + } + + switch (object->kind) { + case NUMA_OPTIONS_KIND_NODE: + numa_node_parse(object->node, opts, &err); + if (err) { + goto error; + } + nb_numa_nodes++; + break; + default: + abort(); + } + + return 0; + +error: + qerror_report_err(err); + error_free(err); + + if (object) { + QapiDeallocVisitor *dv = qapi_dealloc_visitor_new(); + visit_type_NumaOptions(qapi_dealloc_get_visitor(dv), + &object, NULL, NULL); + qapi_dealloc_visitor_cleanup(dv); + } + + return -1; +} + +void set_numa_nodes(void) +{ + if (nb_numa_nodes > 0) { + uint64_t numa_total; + int i; + + if (nb_numa_nodes > MAX_NODES) { + nb_numa_nodes = MAX_NODES; + } + + /* If no memory size if given for any node, assume the default case + * and distribute the available memory equally across all nodes + */ + for (i = 0; i < nb_numa_nodes; i++) { + if (numa_info[i].node_mem != 0) { + break; + } + } + if (i == nb_numa_nodes) { + uint64_t usedmem = 0; + + /* On Linux, the each node's border has to be 8MB aligned, + * the final node gets the rest. + */ + for (i = 0; i < nb_numa_nodes - 1; i++) { + numa_info[i].node_mem = (ram_size / nb_numa_nodes) & + ~((1 << 23UL) - 1); + usedmem += numa_info[i].node_mem; + } + numa_info[i].node_mem = ram_size - usedmem; + } + + numa_total = 0; + for (i = 0; i < nb_numa_nodes; i++) { + numa_total += numa_info[i].node_mem; + } + if (numa_total != ram_size) { + error_report("total memory for NUMA nodes (%" PRIu64 ")" + " should equal RAM size (" RAM_ADDR_FMT ")", + numa_total, ram_size); + exit(1); + } + + for (i = 0; i < nb_numa_nodes; i++) { + if (!bitmap_empty(numa_info[i].node_cpu, MAX_CPUMASK_BITS)) { + break; + } + } + /* assigning the VCPUs round-robin is easier to implement, guest OSes + * must cope with this anyway, because there are BIOSes out there in + * real machines which also use this scheme. + */ + if (i == nb_numa_nodes) { + for (i = 0; i < max_cpus; i++) { + set_bit(i, numa_info[i % nb_numa_nodes].node_cpu); + } + } + } +} + +void set_numa_modes(void) +{ + CPUState *cpu; + int i; + + CPU_FOREACH(cpu) { + for (i = 0; i < nb_numa_nodes; i++) { + if (test_bit(cpu->cpu_index, numa_info[i].node_cpu)) { + cpu->numa_node = i; + } + } + } +} + +static void allocate_system_memory_nonnuma(MemoryRegion *mr, Object *owner, + const char *name, + uint64_t ram_size) +{ + if (mem_path) { +#ifdef __linux__ + Error *err = NULL; + memory_region_init_ram_from_file(mr, owner, name, ram_size, false, + mem_path, &err); + + /* Legacy behavior: if allocation failed, fall back to + * regular RAM allocation. + */ + if (err) { + qerror_report_err(err); + error_free(err); + memory_region_init_ram(mr, owner, name, ram_size); + } +#else + fprintf(stderr, "-mem-path not supported on this host\n"); + exit(1); +#endif + } else { + memory_region_init_ram(mr, owner, name, ram_size); + } + vmstate_register_ram_global(mr); +} + +void memory_region_allocate_system_memory(MemoryRegion *mr, Object *owner, + const char *name, + uint64_t ram_size) +{ + uint64_t addr = 0; + int i; + + if (nb_numa_nodes == 0 || !have_memdevs) { + allocate_system_memory_nonnuma(mr, owner, name, ram_size); + return; + } + + memory_region_init(mr, owner, name, ram_size); + for (i = 0; i < MAX_NODES; i++) { + Error *local_err = NULL; + uint64_t size = numa_info[i].node_mem; + HostMemoryBackend *backend = numa_info[i].node_memdev; + if (!backend) { + continue; + } + MemoryRegion *seg = host_memory_backend_get_memory(backend, &local_err); + if (local_err) { + qerror_report_err(local_err); + exit(1); + } + + memory_region_add_subregion(mr, addr, seg); + vmstate_register_ram_global(seg); + addr += size; + } +} + +static int query_memdev(Object *obj, void *opaque) +{ + MemdevList **list = opaque; + Error *err = NULL; + + if (object_dynamic_cast(obj, TYPE_MEMORY_BACKEND)) { + MemdevList *m = g_malloc0(sizeof(*m)); + + m->value = g_malloc0(sizeof(*m->value)); + + m->value->size = object_property_get_int(obj, "size", + &err); + if (err) { + goto error; + } + + m->value->merge = object_property_get_bool(obj, "merge", + &err); + if (err) { + goto error; + } + + m->value->dump = object_property_get_bool(obj, "dump", + &err); + if (err) { + goto error; + } + + m->value->prealloc = object_property_get_bool(obj, + "prealloc", &err); + if (err) { + goto error; + } + + m->value->policy = object_property_get_enum(obj, + "policy", + HostMemPolicy_lookup, + &err); + if (err) { + goto error; + } + + object_property_get_uint16List(obj, "host-nodes", + &m->value->host_nodes, &err); + if (err) { + goto error; + } + + m->next = *list; + *list = m; + } + + return 0; +error: + return -1; +} + +MemdevList *qmp_query_memdev(Error **errp) +{ + Object *obj; + MemdevList *list = NULL, *m; + + obj = object_resolve_path("/objects", NULL); + if (obj == NULL) { + return NULL; + } + + if (object_child_foreach(obj, query_memdev, &list) != 0) { + goto error; + } + + return list; + +error: + while (list) { + m = list; + list = list->next; + g_free(m->value); + g_free(m); + } + return NULL; +} diff --git a/qapi-schema.json b/qapi-schema.json index dc2abe479e..98350048f6 100644 --- a/qapi-schema.json +++ b/qapi-schema.json @@ -2069,6 +2069,22 @@ '*devname': 'str' } } ## +# @NetdevVhostUserOptions +# +# Vhost-user network backend +# +# @chardev: name of a unix socket chardev +# +# @vhostforce: #optional vhost on for non-MSIX virtio guests (default: false). +# +# Since 2.1 +## +{ 'type': 'NetdevVhostUserOptions', + 'data': { + 'chardev': 'str', + '*vhostforce': 'bool' } } + +## # @NetClientOptions # # A discriminated record of network device traits. @@ -2086,7 +2102,8 @@ 'dump': 'NetdevDumpOptions', 'bridge': 'NetdevBridgeOptions', 'hubport': 'NetdevHubPortOptions', - 'netmap': 'NetdevNetmapOptions' } } + 'netmap': 'NetdevNetmapOptions', + 'vhost-user': 'NetdevVhostUserOptions' } } ## # @NetLegacy @@ -3080,3 +3097,192 @@ 'btn' : 'InputBtnEvent', 'rel' : 'InputMoveEvent', 'abs' : 'InputMoveEvent' } } + +## +# @NumaOptions +# +# A discriminated record of NUMA options. (for OptsVisitor) +# +# Since 2.1 +## +{ 'union': 'NumaOptions', + 'data': { + 'node': 'NumaNodeOptions' }} + +## +# @NumaNodeOptions +# +# Create a guest NUMA node. (for OptsVisitor) +# +# @nodeid: #optional NUMA node ID (increase by 1 from 0 if omitted) +# +# @cpus: #optional VCPUs belonging to this node (assign VCPUS round-robin +# if omitted) +# +# @mem: #optional memory size of this node; mutually exclusive with @memdev. +# Equally divide total memory among nodes if both @mem and @memdev are +# omitted. +# +# @memdev: #optional memory backend object. If specified for one node, +# it must be specified for all nodes. +# +# Since: 2.1 +## +{ 'type': 'NumaNodeOptions', + 'data': { + '*nodeid': 'uint16', + '*cpus': ['uint16'], + '*mem': 'size', + '*memdev': 'str' }} + +## +# @HostMemPolicy +# +# Host memory policy types +# +# @default: restore default policy, remove any nondefault policy +# +# @preferred: set the preferred host nodes for allocation +# +# @bind: a strict policy that restricts memory allocation to the +# host nodes specified +# +# @interleave: memory allocations are interleaved across the set +# of host nodes specified +# +# Since 2.1 +## +{ 'enum': 'HostMemPolicy', + 'data': [ 'default', 'preferred', 'bind', 'interleave' ] } + +## +# @Memdev: +# +# Information of memory device +# +# @size: memory device size +# +# @merge: enables or disables memory merge support +# +# @dump: includes memory device's memory in a core dump or not +# +# @prealloc: enables or disables memory preallocation +# +# @host-nodes: host nodes for its memory policy +# +# @policy: memory policy of memory device +# +# Since: 2.1 +## + +{ 'type': 'Memdev', + 'data': { + 'size': 'size', + 'merge': 'bool', + 'dump': 'bool', + 'prealloc': 'bool', + 'host-nodes': ['uint16'], + 'policy': 'HostMemPolicy' }} + +## +# @query-memdev: +# +# Returns information for all memory devices. +# +# Returns: a list of @Memdev. +# +# Since: 2.1 +## +{ 'command': 'query-memdev', 'returns': ['Memdev'] } +# @PCDIMMDeviceInfo: +# +# PCDIMMDevice state information +# +# @id: #optional device's ID +# +# @addr: physical address, where device is mapped +# +# @size: size of memory that the device provides +# +# @slot: slot number at which device is plugged in +# +# @node: NUMA node number where device is plugged in +# +# @memdev: memory backend linked with device +# +# @hotplugged: true if device was hotplugged +# +# @hotpluggable: true if device if could be added/removed while machine is running +# +# Since: 2.1 +## +{ 'type': 'PCDIMMDeviceInfo', + 'data': { '*id': 'str', + 'addr': 'int', + 'size': 'int', + 'slot': 'int', + 'node': 'int', + 'memdev': 'str', + 'hotplugged': 'bool', + 'hotpluggable': 'bool' + } +} + +## +# @MemoryDeviceInfo: +# +# Union containing information about a memory device +# +# Since: 2.1 +## +{ 'union': 'MemoryDeviceInfo', 'data': {'dimm': 'PCDIMMDeviceInfo'} } + +## +# @query-memory-devices +# +# Lists available memory devices and their state +# +# Since: 2.1 +## +{ 'command': 'query-memory-devices', 'returns': ['MemoryDeviceInfo'] } + +## @ACPISlotType +# +# @DIMM: memory slot +# +{ 'enum': 'ACPISlotType', 'data': [ 'DIMM' ] } + +## @ACPIOSTInfo +# +# OSPM Status Indication for a device +# For description of possible values of @source and @status fields +# see "_OST (OSPM Status Indication)" chapter of ACPI5.0 spec. +# +# @device: #optional device ID associated with slot +# +# @slot: slot ID, unique per slot of a given @slot-type +# +# @slot-type: type of the slot +# +# @source: an integer containing the source event +# +# @status: an integer containing the status code +# +# Since: 2.1 +## +{ 'type': 'ACPIOSTInfo', + 'data' : { '*device': 'str', + 'slot': 'str', + 'slot-type': 'ACPISlotType', + 'source': 'int', + 'status': 'int' } } + +## +# @query-acpi-ospm-status +# +# Lists ACPI OSPM status of ACPI device objects, +# which might be reported via _OST method +# +# Since: 2.1 +## +{ 'command': 'query-acpi-ospm-status', 'returns': ['ACPIOSTInfo'] } diff --git a/qapi/string-input-visitor.c b/qapi/string-input-visitor.c index 5780944792..d8a8db02ed 100644 --- a/qapi/string-input-visitor.c +++ b/qapi/string-input-visitor.c @@ -15,31 +15,210 @@ #include "qapi/visitor-impl.h" #include "qapi/qmp/qerror.h" #include "qemu/option.h" +#include "qemu/queue.h" +#include "qemu/range.h" + struct StringInputVisitor { Visitor visitor; + + bool head; + + GList *ranges; + GList *cur_range; + int64_t cur; + const char *string; }; +static void free_range(void *range, void *dummy) +{ + g_free(range); +} + +static void parse_str(StringInputVisitor *siv, Error **errp) +{ + char *str = (char *) siv->string; + long long start, end; + Range *cur; + char *endptr; + + if (siv->ranges) { + return; + } + + do { + errno = 0; + start = strtoll(str, &endptr, 0); + if (errno == 0 && endptr > str) { + if (*endptr == '\0') { + cur = g_malloc0(sizeof(*cur)); + cur->begin = start; + cur->end = start + 1; + siv->ranges = g_list_insert_sorted_merged(siv->ranges, cur, + range_compare); + cur = NULL; + str = NULL; + } else if (*endptr == '-') { + str = endptr + 1; + errno = 0; + end = strtoll(str, &endptr, 0); + if (errno == 0 && endptr > str && start <= end && + (start > INT64_MAX - 65536 || + end < start + 65536)) { + if (*endptr == '\0') { + cur = g_malloc0(sizeof(*cur)); + cur->begin = start; + cur->end = end + 1; + siv->ranges = + g_list_insert_sorted_merged(siv->ranges, + cur, + range_compare); + cur = NULL; + str = NULL; + } else if (*endptr == ',') { + str = endptr + 1; + cur = g_malloc0(sizeof(*cur)); + cur->begin = start; + cur->end = end + 1; + siv->ranges = + g_list_insert_sorted_merged(siv->ranges, + cur, + range_compare); + cur = NULL; + } else { + goto error; + } + } else { + goto error; + } + } else if (*endptr == ',') { + str = endptr + 1; + cur = g_malloc0(sizeof(*cur)); + cur->begin = start; + cur->end = start + 1; + siv->ranges = g_list_insert_sorted_merged(siv->ranges, + cur, + range_compare); + cur = NULL; + } else { + goto error; + } + } else { + goto error; + } + } while (str); + + return; +error: + g_list_foreach(siv->ranges, free_range, NULL); + g_list_free(siv->ranges); + siv->ranges = NULL; +} + +static void +start_list(Visitor *v, const char *name, Error **errp) +{ + StringInputVisitor *siv = DO_UPCAST(StringInputVisitor, visitor, v); + + parse_str(siv, errp); + + siv->cur_range = g_list_first(siv->ranges); + if (siv->cur_range) { + Range *r = siv->cur_range->data; + if (r) { + siv->cur = r->begin; + } + } +} + +static GenericList * +next_list(Visitor *v, GenericList **list, Error **errp) +{ + StringInputVisitor *siv = DO_UPCAST(StringInputVisitor, visitor, v); + GenericList **link; + Range *r; + + if (!siv->ranges || !siv->cur_range) { + return NULL; + } + + r = siv->cur_range->data; + if (!r) { + return NULL; + } + + if (siv->cur < r->begin || siv->cur >= r->end) { + siv->cur_range = g_list_next(siv->cur_range); + if (!siv->cur_range) { + return NULL; + } + r = siv->cur_range->data; + if (!r) { + return NULL; + } + siv->cur = r->begin; + } + + if (siv->head) { + link = list; + siv->head = false; + } else { + link = &(*list)->next; + } + + *link = g_malloc0(sizeof **link); + return *link; +} + +static void +end_list(Visitor *v, Error **errp) +{ + StringInputVisitor *siv = DO_UPCAST(StringInputVisitor, visitor, v); + siv->head = true; +} + static void parse_type_int(Visitor *v, int64_t *obj, const char *name, Error **errp) { StringInputVisitor *siv = DO_UPCAST(StringInputVisitor, visitor, v); - char *endp = (char *) siv->string; - long long val; - errno = 0; - if (siv->string) { - val = strtoll(siv->string, &endp, 0); - } - if (!siv->string || errno || endp == siv->string || *endp) { + if (!siv->string) { error_set(errp, QERR_INVALID_PARAMETER_TYPE, name ? name : "null", "integer"); return; } - *obj = val; + parse_str(siv, errp); + + if (!siv->ranges) { + goto error; + } + + if (!siv->cur_range) { + Range *r; + + siv->cur_range = g_list_first(siv->ranges); + if (!siv->cur_range) { + goto error; + } + + r = siv->cur_range->data; + if (!r) { + goto error; + } + + siv->cur = r->begin; + } + + *obj = siv->cur; + siv->cur++; + return; + +error: + error_set(errp, QERR_INVALID_PARAMETER_VALUE, name, + "an int64 value or range"); } static void parse_type_size(Visitor *v, uint64_t *obj, const char *name, @@ -140,6 +319,8 @@ Visitor *string_input_get_visitor(StringInputVisitor *v) void string_input_visitor_cleanup(StringInputVisitor *v) { + g_list_foreach(v->ranges, free_range, NULL); + g_list_free(v->ranges); g_free(v); } @@ -155,8 +336,12 @@ StringInputVisitor *string_input_visitor_new(const char *str) v->visitor.type_bool = parse_type_bool; v->visitor.type_str = parse_type_str; v->visitor.type_number = parse_type_number; + v->visitor.start_list = start_list; + v->visitor.next_list = next_list; + v->visitor.end_list = end_list; v->visitor.optional = parse_optional; v->string = str; + v->head = true; return v; } diff --git a/qapi/string-output-visitor.c b/qapi/string-output-visitor.c index fb1d2e806d..e9aca3bfdc 100644 --- a/qapi/string-output-visitor.c +++ b/qapi/string-output-visitor.c @@ -16,32 +16,181 @@ #include "qapi/qmp/qerror.h" #include "qemu/host-utils.h" #include <math.h> +#include "qemu/range.h" + +enum ListMode { + LM_NONE, /* not traversing a list of repeated options */ + LM_STARTED, /* start_list() succeeded */ + + LM_IN_PROGRESS, /* next_list() has been called. + * + * Generating the next list link will consume the most + * recently parsed QemuOpt instance of the repeated + * option. + * + * Parsing a value into the list link will examine the + * next QemuOpt instance of the repeated option, and + * possibly enter LM_SIGNED_INTERVAL or + * LM_UNSIGNED_INTERVAL. + */ + + LM_SIGNED_INTERVAL, /* next_list() has been called. + * + * Generating the next list link will consume the most + * recently stored element from the signed interval, + * parsed from the most recent QemuOpt instance of the + * repeated option. This may consume QemuOpt itself + * and return to LM_IN_PROGRESS. + * + * Parsing a value into the list link will store the + * next element of the signed interval. + */ + + LM_UNSIGNED_INTERVAL,/* Same as above, only for an unsigned interval. */ + + LM_END +}; + +typedef enum ListMode ListMode; struct StringOutputVisitor { Visitor visitor; bool human; - char *string; + GString *string; + bool head; + ListMode list_mode; + union { + int64_t s; + uint64_t u; + } range_start, range_end; + GList *ranges; }; static void string_output_set(StringOutputVisitor *sov, char *string) { - g_free(sov->string); - sov->string = string; + if (sov->string) { + g_string_free(sov->string, true); + } + sov->string = g_string_new(string); + g_free(string); +} + +static void string_output_append(StringOutputVisitor *sov, int64_t a) +{ + Range *r = g_malloc0(sizeof(*r)); + r->begin = a; + r->end = a + 1; + sov->ranges = g_list_insert_sorted_merged(sov->ranges, r, range_compare); +} + +static void string_output_append_range(StringOutputVisitor *sov, + int64_t s, int64_t e) +{ + Range *r = g_malloc0(sizeof(*r)); + r->begin = s; + r->end = e + 1; + sov->ranges = g_list_insert_sorted_merged(sov->ranges, r, range_compare); +} + +static void format_string(StringOutputVisitor *sov, Range *r, bool next, + bool human) +{ + if (r->end - r->begin > 1) { + if (human) { + g_string_append_printf(sov->string, "0x%" PRIx64 "-%" PRIx64, + r->begin, r->end - 1); + + } else { + g_string_append_printf(sov->string, "%" PRId64 "-%" PRId64, + r->begin, r->end - 1); + } + } else { + if (human) { + g_string_append_printf(sov->string, "0x%" PRIx64, r->begin); + } else { + g_string_append_printf(sov->string, "%" PRId64, r->begin); + } + } + if (next) { + g_string_append(sov->string, ","); + } } static void print_type_int(Visitor *v, int64_t *obj, const char *name, Error **errp) { StringOutputVisitor *sov = DO_UPCAST(StringOutputVisitor, visitor, v); - char *out; + GList *l; + + switch (sov->list_mode) { + case LM_NONE: + string_output_append(sov, *obj); + break; + + case LM_STARTED: + sov->range_start.s = *obj; + sov->range_end.s = *obj; + sov->list_mode = LM_IN_PROGRESS; + return; + + case LM_IN_PROGRESS: + if (sov->range_end.s + 1 == *obj) { + sov->range_end.s++; + } else { + if (sov->range_start.s == sov->range_end.s) { + string_output_append(sov, sov->range_end.s); + } else { + assert(sov->range_start.s < sov->range_end.s); + string_output_append_range(sov, sov->range_start.s, + sov->range_end.s); + } + + sov->range_start.s = *obj; + sov->range_end.s = *obj; + } + return; + + case LM_END: + if (sov->range_end.s + 1 == *obj) { + sov->range_end.s++; + assert(sov->range_start.s < sov->range_end.s); + string_output_append_range(sov, sov->range_start.s, + sov->range_end.s); + } else { + if (sov->range_start.s == sov->range_end.s) { + string_output_append(sov, sov->range_end.s); + } else { + assert(sov->range_start.s < sov->range_end.s); + + string_output_append_range(sov, sov->range_start.s, + sov->range_end.s); + } + string_output_append(sov, *obj); + } + break; + + default: + abort(); + } + + l = sov->ranges; + while (l) { + Range *r = l->data; + format_string(sov, r, l->next != NULL, false); + l = l->next; + } if (sov->human) { - out = g_strdup_printf("%lld (%#llx)", (long long) *obj, (long long) *obj); - } else { - out = g_strdup_printf("%lld", (long long) *obj); + l = sov->ranges; + g_string_append(sov->string, " ("); + while (l) { + Range *r = l->data; + format_string(sov, r, l->next != NULL, true); + l = l->next; + } + g_string_append(sov->string, ")"); } - string_output_set(sov, out); } static void print_type_size(Visitor *v, uint64_t *obj, const char *name, @@ -103,9 +252,61 @@ static void print_type_number(Visitor *v, double *obj, const char *name, string_output_set(sov, g_strdup_printf("%f", *obj)); } +static void +start_list(Visitor *v, const char *name, Error **errp) +{ + StringOutputVisitor *sov = DO_UPCAST(StringOutputVisitor, visitor, v); + + /* we can't traverse a list in a list */ + assert(sov->list_mode == LM_NONE); + sov->list_mode = LM_STARTED; + sov->head = true; +} + +static GenericList * +next_list(Visitor *v, GenericList **list, Error **errp) +{ + StringOutputVisitor *sov = DO_UPCAST(StringOutputVisitor, visitor, v); + GenericList *ret = NULL; + if (*list) { + if (sov->head) { + ret = *list; + } else { + ret = (*list)->next; + } + + if (sov->head) { + if (ret && ret->next == NULL) { + sov->list_mode = LM_NONE; + } + sov->head = false; + } else { + if (ret && ret->next == NULL) { + sov->list_mode = LM_END; + } + } + } + + return ret; +} + +static void +end_list(Visitor *v, Error **errp) +{ + StringOutputVisitor *sov = DO_UPCAST(StringOutputVisitor, visitor, v); + + assert(sov->list_mode == LM_STARTED || + sov->list_mode == LM_END || + sov->list_mode == LM_NONE || + sov->list_mode == LM_IN_PROGRESS); + sov->list_mode = LM_NONE; + sov->head = true; + +} + char *string_output_get_string(StringOutputVisitor *sov) { - char *string = sov->string; + char *string = g_string_free(sov->string, false); sov->string = NULL; return string; } @@ -115,9 +316,19 @@ Visitor *string_output_get_visitor(StringOutputVisitor *sov) return &sov->visitor; } +static void free_range(void *range, void *dummy) +{ + g_free(range); +} + void string_output_visitor_cleanup(StringOutputVisitor *sov) { - g_free(sov->string); + if (sov->string) { + g_string_free(sov->string, true); + } + + g_list_foreach(sov->ranges, free_range, NULL); + g_list_free(sov->ranges); g_free(sov); } @@ -127,6 +338,7 @@ StringOutputVisitor *string_output_visitor_new(bool human) v = g_malloc0(sizeof(*v)); + v->string = g_string_new(NULL); v->human = human; v->visitor.type_enum = output_type_enum; v->visitor.type_int = print_type_int; @@ -134,6 +346,9 @@ StringOutputVisitor *string_output_visitor_new(bool human) v->visitor.type_bool = print_type_bool; v->visitor.type_str = print_type_str; v->visitor.type_number = print_type_number; + v->visitor.start_list = start_list; + v->visitor.next_list = next_list; + v->visitor.end_list = end_list; return v; } diff --git a/qemu-char.c b/qemu-char.c index f918f90972..b3bd3b5af4 100644 --- a/qemu-char.c +++ b/qemu-char.c @@ -84,6 +84,7 @@ #include "ui/qemu-spice.h" #define READ_BUF_LEN 4096 +#define READ_RETRIES 10 /***********************************************************/ /* character device */ @@ -145,6 +146,41 @@ int qemu_chr_fe_write_all(CharDriverState *s, const uint8_t *buf, int len) return offset; } +int qemu_chr_fe_read_all(CharDriverState *s, uint8_t *buf, int len) +{ + int offset = 0, counter = 10; + int res; + + if (!s->chr_sync_read) { + return 0; + } + + while (offset < len) { + do { + res = s->chr_sync_read(s, buf + offset, len - offset); + if (res == -1 && errno == EAGAIN) { + g_usleep(100); + } + } while (res == -1 && errno == EAGAIN); + + if (res == 0) { + break; + } + + if (res < 0) { + return res; + } + + offset += res; + + if (!counter--) { + break; + } + } + + return offset; +} + int qemu_chr_fe_ioctl(CharDriverState *s, int cmd, void *arg) { if (!s->chr_ioctl) @@ -168,7 +204,18 @@ void qemu_chr_be_write(CharDriverState *s, uint8_t *buf, int len) int qemu_chr_fe_get_msgfd(CharDriverState *s) { - return s->get_msgfd ? s->get_msgfd(s) : -1; + int fd; + return (qemu_chr_fe_get_msgfds(s, &fd, 1) >= 0) ? fd : -1; +} + +int qemu_chr_fe_get_msgfds(CharDriverState *s, int *fds, int len) +{ + return s->get_msgfds ? s->get_msgfds(s, fds, len) : -1; +} + +int qemu_chr_fe_set_msgfds(CharDriverState *s, int *fds, int num) +{ + return s->set_msgfds ? s->set_msgfds(s, fds, num) : -1; } int qemu_chr_add_client(CharDriverState *s, int fd) @@ -2296,16 +2343,73 @@ typedef struct { int do_telnetopt; int do_nodelay; int is_unix; - int msgfd; + int *read_msgfds; + int read_msgfds_num; + int *write_msgfds; + int write_msgfds_num; } TCPCharDriver; static gboolean tcp_chr_accept(GIOChannel *chan, GIOCondition cond, void *opaque); +#ifndef _WIN32 +static int unix_send_msgfds(CharDriverState *chr, const uint8_t *buf, int len) +{ + TCPCharDriver *s = chr->opaque; + struct msghdr msgh; + struct iovec iov; + int r; + + size_t fd_size = s->write_msgfds_num * sizeof(int); + char control[CMSG_SPACE(fd_size)]; + struct cmsghdr *cmsg; + + memset(&msgh, 0, sizeof(msgh)); + memset(control, 0, sizeof(control)); + + /* set the payload */ + iov.iov_base = (uint8_t *) buf; + iov.iov_len = len; + + msgh.msg_iov = &iov; + msgh.msg_iovlen = 1; + + msgh.msg_control = control; + msgh.msg_controllen = sizeof(control); + + cmsg = CMSG_FIRSTHDR(&msgh); + + cmsg->cmsg_len = CMSG_LEN(fd_size); + cmsg->cmsg_level = SOL_SOCKET; + cmsg->cmsg_type = SCM_RIGHTS; + memcpy(CMSG_DATA(cmsg), s->write_msgfds, fd_size); + + do { + r = sendmsg(s->fd, &msgh, 0); + } while (r < 0 && errno == EINTR); + + /* free the written msgfds, no matter what */ + if (s->write_msgfds_num) { + g_free(s->write_msgfds); + s->write_msgfds = 0; + s->write_msgfds_num = 0; + } + + return r; +} +#endif + static int tcp_chr_write(CharDriverState *chr, const uint8_t *buf, int len) { TCPCharDriver *s = chr->opaque; if (s->connected) { - return io_channel_send(s->chan, buf, len); +#ifndef _WIN32 + if (s->is_unix && s->write_msgfds_num) { + return unix_send_msgfds(chr, buf, len); + } else +#endif + { + return io_channel_send(s->chan, buf, len); + } } else { /* XXX: indicate an error ? */ return len; @@ -2372,12 +2476,39 @@ static void tcp_chr_process_IAC_bytes(CharDriverState *chr, *size = j; } -static int tcp_get_msgfd(CharDriverState *chr) +static int tcp_get_msgfds(CharDriverState *chr, int *fds, int num) { TCPCharDriver *s = chr->opaque; - int fd = s->msgfd; - s->msgfd = -1; - return fd; + int to_copy = (s->read_msgfds_num < num) ? s->read_msgfds_num : num; + + if (to_copy) { + memcpy(fds, s->read_msgfds, to_copy * sizeof(int)); + + g_free(s->read_msgfds); + s->read_msgfds = 0; + s->read_msgfds_num = 0; + } + + return to_copy; +} + +static int tcp_set_msgfds(CharDriverState *chr, int *fds, int num) +{ + TCPCharDriver *s = chr->opaque; + + /* clear old pending fd array */ + if (s->write_msgfds) { + g_free(s->write_msgfds); + } + + if (num) { + s->write_msgfds = g_malloc(num * sizeof(int)); + memcpy(s->write_msgfds, fds, num * sizeof(int)); + } + + s->write_msgfds_num = num; + + return 0; } #ifndef _WIN32 @@ -2387,26 +2518,46 @@ static void unix_process_msgfd(CharDriverState *chr, struct msghdr *msg) struct cmsghdr *cmsg; for (cmsg = CMSG_FIRSTHDR(msg); cmsg; cmsg = CMSG_NXTHDR(msg, cmsg)) { - int fd; + int fd_size, i; - if (cmsg->cmsg_len != CMSG_LEN(sizeof(int)) || + if (cmsg->cmsg_len < CMSG_LEN(sizeof(int)) || cmsg->cmsg_level != SOL_SOCKET || - cmsg->cmsg_type != SCM_RIGHTS) + cmsg->cmsg_type != SCM_RIGHTS) { continue; + } + + fd_size = cmsg->cmsg_len - CMSG_LEN(0); - fd = *((int *)CMSG_DATA(cmsg)); - if (fd < 0) + if (!fd_size) { continue; + } + + /* close and clean read_msgfds */ + for (i = 0; i < s->read_msgfds_num; i++) { + close(s->read_msgfds[i]); + } + + if (s->read_msgfds_num) { + g_free(s->read_msgfds); + } - /* O_NONBLOCK is preserved across SCM_RIGHTS so reset it */ - qemu_set_block(fd); + s->read_msgfds_num = fd_size / sizeof(int); + s->read_msgfds = g_malloc(fd_size); + memcpy(s->read_msgfds, CMSG_DATA(cmsg), fd_size); -#ifndef MSG_CMSG_CLOEXEC - qemu_set_cloexec(fd); -#endif - if (s->msgfd != -1) - close(s->msgfd); - s->msgfd = fd; + for (i = 0; i < s->read_msgfds_num; i++) { + int fd = s->read_msgfds[i]; + if (fd < 0) { + continue; + } + + /* O_NONBLOCK is preserved across SCM_RIGHTS so reset it */ + qemu_set_block(fd); + + #ifndef MSG_CMSG_CLOEXEC + qemu_set_cloexec(fd); + #endif + } } } @@ -2454,6 +2605,23 @@ static GSource *tcp_chr_add_watch(CharDriverState *chr, GIOCondition cond) return g_io_create_watch(s->chan, cond); } +static void tcp_chr_disconnect(CharDriverState *chr) +{ + TCPCharDriver *s = chr->opaque; + + s->connected = 0; + if (s->listen_chan) { + s->listen_tag = g_io_add_watch(s->listen_chan, G_IO_IN, + tcp_chr_accept, chr); + } + remove_fd_in_watch(chr); + g_io_channel_unref(s->chan); + s->chan = NULL; + closesocket(s->fd); + s->fd = -1; + qemu_chr_be_event(chr, CHR_EVENT_CLOSED); +} + static gboolean tcp_chr_read(GIOChannel *chan, GIOCondition cond, void *opaque) { CharDriverState *chr = opaque; @@ -2470,16 +2638,7 @@ static gboolean tcp_chr_read(GIOChannel *chan, GIOCondition cond, void *opaque) size = tcp_chr_recv(chr, (void *)buf, len); if (size == 0) { /* connection closed */ - s->connected = 0; - if (s->listen_chan) { - s->listen_tag = g_io_add_watch(s->listen_chan, G_IO_IN, tcp_chr_accept, chr); - } - remove_fd_in_watch(chr); - g_io_channel_unref(s->chan); - s->chan = NULL; - closesocket(s->fd); - s->fd = -1; - qemu_chr_be_event(chr, CHR_EVENT_CLOSED); + tcp_chr_disconnect(chr); } else if (size > 0) { if (s->do_telnetopt) tcp_chr_process_IAC_bytes(chr, s, buf, &size); @@ -2490,6 +2649,24 @@ static gboolean tcp_chr_read(GIOChannel *chan, GIOCondition cond, void *opaque) return TRUE; } +static int tcp_chr_sync_read(CharDriverState *chr, const uint8_t *buf, int len) +{ + TCPCharDriver *s = chr->opaque; + int size; + + if (!s->connected) { + return 0; + } + + size = tcp_chr_recv(chr, (void *) buf, len); + if (size == 0) { + /* connection closed */ + tcp_chr_disconnect(chr); + } + + return size; +} + #ifndef _WIN32 CharDriverState *qemu_chr_open_eventfd(int eventfd) { @@ -2503,6 +2680,25 @@ CharDriverState *qemu_chr_open_eventfd(int eventfd) } #endif +static gboolean tcp_chr_chan_close(GIOChannel *channel, GIOCondition cond, + void *opaque) +{ + CharDriverState *chr = opaque; + + if (cond != G_IO_HUP) { + return FALSE; + } + + /* connection closed */ + tcp_chr_disconnect(chr); + if (chr->fd_hup_tag) { + g_source_remove(chr->fd_hup_tag); + chr->fd_hup_tag = 0; + } + + return TRUE; +} + static void tcp_chr_connect(void *opaque) { CharDriverState *chr = opaque; @@ -2512,6 +2708,8 @@ static void tcp_chr_connect(void *opaque) if (s->chan) { chr->fd_in_tag = io_add_watch_poll(s->chan, tcp_chr_read_poll, tcp_chr_read, chr); + chr->fd_hup_tag = g_io_add_watch(s->chan, G_IO_HUP, tcp_chr_chan_close, + chr); } qemu_chr_be_generic_open(chr); } @@ -2604,6 +2802,7 @@ static gboolean tcp_chr_accept(GIOChannel *channel, GIOCondition cond, void *opa static void tcp_chr_close(CharDriverState *chr) { TCPCharDriver *s = chr->opaque; + int i; if (s->fd >= 0) { remove_fd_in_watch(chr); if (s->chan) { @@ -2621,6 +2820,15 @@ static void tcp_chr_close(CharDriverState *chr) } closesocket(s->listen_fd); } + if (s->read_msgfds_num) { + for (i = 0; i < s->read_msgfds_num; i++) { + close(s->read_msgfds[i]); + } + g_free(s->read_msgfds); + } + if (s->write_msgfds_num) { + g_free(s->write_msgfds); + } g_free(s); qemu_chr_be_event(chr, CHR_EVENT_CLOSED); } @@ -2649,7 +2857,10 @@ static CharDriverState *qemu_chr_open_socket_fd(int fd, bool do_nodelay, s->connected = 0; s->fd = -1; s->listen_fd = -1; - s->msgfd = -1; + s->read_msgfds = 0; + s->read_msgfds_num = 0; + s->write_msgfds = 0; + s->write_msgfds_num = 0; chr->filename = g_malloc(256); switch (ss.ss_family) { @@ -2678,8 +2889,10 @@ static CharDriverState *qemu_chr_open_socket_fd(int fd, bool do_nodelay, chr->opaque = s; chr->chr_write = tcp_chr_write; + chr->chr_sync_read = tcp_chr_sync_read; chr->chr_close = tcp_chr_close; - chr->get_msgfd = tcp_get_msgfd; + chr->get_msgfds = tcp_get_msgfds; + chr->set_msgfds = tcp_set_msgfds; chr->chr_add_client = tcp_chr_add_client; chr->chr_add_watch = tcp_chr_add_watch; chr->chr_update_read_handler = tcp_chr_update_read_handler; diff --git a/qemu-options.hx b/qemu-options.hx index d0714c43a6..ca75760b27 100644 --- a/qemu-options.hx +++ b/qemu-options.hx @@ -95,12 +95,22 @@ specifies the maximum number of hotpluggable CPUs. ETEXI DEF("numa", HAS_ARG, QEMU_OPTION_numa, - "-numa node[,mem=size][,cpus=cpu[-cpu]][,nodeid=node]\n", QEMU_ARCH_ALL) + "-numa node[,mem=size][,cpus=cpu[-cpu]][,nodeid=node]\n" + "-numa node[,memdev=id][,cpus=cpu[-cpu]][,nodeid=node]\n", QEMU_ARCH_ALL) STEXI -@item -numa @var{opts} +@item -numa node[,mem=@var{size}][,cpus=@var{cpu[-cpu]}][,nodeid=@var{node}] +@item -numa node[,memdev=@var{id}][,cpus=@var{cpu[-cpu]}][,nodeid=@var{node}] @findex -numa -Simulate a multi node NUMA system. If mem and cpus are omitted, resources -are split equally. +Simulate a multi node NUMA system. If @samp{mem}, @samp{memdev} +and @samp{cpus} are omitted, resources are split equally. Also, note +that the -@option{numa} option doesn't allocate any of the specified +resources. That is, it just assigns existing resources to NUMA nodes. This +means that one still has to use the @option{-m}, @option{-smp} options +to allocate RAM and VCPUs respectively, and possibly @option{-object} +to specify the memory backend for the @samp{memdev} suboption. + +@samp{mem} and @samp{memdev} are mutually exclusive. Furthermore, if one +node uses @samp{memdev}, all of them have to use it. ETEXI DEF("add-fd", HAS_ARG, QEMU_OPTION_add_fd, @@ -210,17 +220,20 @@ use is discouraged as it may be removed from future versions. ETEXI DEF("m", HAS_ARG, QEMU_OPTION_m, - "-m [size=]megs\n" + "-m[emory] [size=]megs[,slots=n,maxmem=size]\n" " configure guest RAM\n" " size: initial amount of guest memory (default: " - stringify(DEFAULT_RAM_SIZE) "MiB)\n", + stringify(DEFAULT_RAM_SIZE) "MiB)\n" + " slots: number of hotplug slots (default: none)\n" + " maxmem: maximum amount of guest memory (default: none)\n", QEMU_ARCH_ALL) STEXI @item -m [size=]@var{megs} @findex -m Set virtual RAM size to @var{megs} megabytes. Default is 128 MiB. Optionally, a suffix of ``M'' or ``G'' can be used to signify a value in megabytes or -gigabytes respectively. +gigabytes respectively. Optional pair @var{slots}, @var{maxmem} could be used +to set amount of hotluggable memory slots and possible maximum amount of memory. ETEXI DEF("mem-path", HAS_ARG, QEMU_OPTION_mempath, @@ -1457,6 +1470,7 @@ DEF("netdev", HAS_ARG, QEMU_OPTION_netdev, #ifdef CONFIG_NETMAP "netmap|" #endif + "vhost-user|" "socket|" "hubport],id=str[,option][,option][,...]\n", QEMU_ARCH_ALL) STEXI @@ -1788,6 +1802,23 @@ The hubport netdev lets you connect a NIC to a QEMU "vlan" instead of a single netdev. @code{-net} and @code{-device} with parameter @option{vlan} create the required hub automatically. +@item -netdev vhost-user,chardev=@var{id}[,vhostforce=on|off] + +Establish a vhost-user netdev, backed by a chardev @var{id}. The chardev should +be a unix domain socket backed one. The vhost-user uses a specifically defined +protocol to pass vhost ioctl replacement messages to an application on the other +end of the socket. On non-MSIX guests, the feature can be forced with +@var{vhostforce}. + +Example: +@example +qemu -m 512 -object memory-backend-file,id=mem,size=512M,mem-path=/hugetlbfs,share=on \ + -numa node,memdev=mem \ + -chardev socket,path=/path/to/socket \ + -netdev type=vhost-user,id=net0,chardev=chr0 \ + -device virtio-net-pci,netdev=net0 +@end example + @item -net dump[,vlan=@var{n}][,file=@var{file}][,len=@var{len}] Dump network traffic on VLAN @var{n} to file @var{file} (@file{qemu-vlan0.pcap} by default). At most @var{len} bytes (64k by default) per packet are stored. The file format is diff --git a/qmp-commands.hx b/qmp-commands.hx index d6bb0f483f..e4a1c80434 100644 --- a/qmp-commands.hx +++ b/qmp-commands.hx @@ -3571,6 +3571,93 @@ Example: "format":"qcow2", "virtual-size":2048000 } - } }Â ] } + } } ] } EQMP + + { + .name = "query-memdev", + .args_type = "", + .mhandler.cmd_new = qmp_marshal_input_query_memdev, + }, + +SQMP +query-memdev +------------ + +Show memory devices information. + + +Example (1): + +-> { "execute": "query-memdev" } +<- { "return": [ + { + "size": 536870912, + "merge": false, + "dump": true, + "prealloc": false, + "host-nodes": [0, 1], + "policy": "bind" + }, + { + "size": 536870912, + "merge": false, + "dump": true, + "prealloc": true, + "host-nodes": [2, 3], + "policy": "preferred" + } + ] + } + +EQMP + + { + .name = "query-memory-devices", + .args_type = "", + .mhandler.cmd_new = qmp_marshal_input_query_memory_devices, + }, + +SQMP +@query-memory-devices +-------------------- + +Return a list of memory devices. + +Example: +-> { "execute": "query-memory-devices" } +<- { "return": [ { "data": + { "addr": 5368709120, + "hotpluggable": true, + "hotplugged": true, + "id": "d1", + "memdev": "/objects/memX", + "node": 0, + "size": 1073741824, + "slot": 0}, + "type": "dimm" + } ] } +EQMP + + { + .name = "query-acpi-ospm-status", + .args_type = "", + .mhandler.cmd_new = qmp_marshal_input_query_acpi_ospm_status, + }, + +SQMP +@query-acpi-ospm-status +-------------------- + +Return list of ACPIOSTInfo for devices that support status reporting +via ACPI _OST method. + +Example: +-> { "execute": "query-acpi-ospm-status" } +<- { "return": [ { "device": "d1", "slot": "0", "slot-type": "DIMM", "source": 1, "status": 0}, + { "slot": "1", "slot-type": "DIMM", "source": 0, "status": 0}, + { "slot": "2", "slot-type": "DIMM", "source": 0, "status": 0}, + { "slot": "3", "slot-type": "DIMM", "source": 0, "status": 0} + ]} +EQMP @@ -28,6 +28,8 @@ #include "qapi/qmp-input-visitor.h" #include "hw/boards.h" #include "qom/object_interfaces.h" +#include "hw/mem/pc-dimm.h" +#include "hw/acpi/acpi_dev_interface.h" NameInfo *qmp_query_name(Error **errp) { @@ -540,7 +542,7 @@ void object_add(const char *type, const char *id, const QDict *qdict, klass = object_class_by_name(type); if (!klass) { - error_setg(errp, "invalid class name"); + error_setg(errp, "invalid object type: %s", type); return; } @@ -565,13 +567,18 @@ void object_add(const char *type, const char *id, const QDict *qdict, } } - user_creatable_complete(obj, &local_err); + object_property_add_child(container_get(object_get_root(), "/objects"), + id, obj, &local_err); if (local_err) { goto out; } - object_property_add_child(container_get(object_get_root(), "/objects"), - id, obj, &local_err); + user_creatable_complete(obj, &local_err); + if (local_err) { + object_property_del(container_get(object_get_root(), "/objects"), + id, &error_abort); + goto out; + } out: if (local_err) { error_propagate(errp, local_err); @@ -623,3 +630,32 @@ void qmp_object_del(const char *id, Error **errp) } object_unparent(obj); } + +MemoryDeviceInfoList *qmp_query_memory_devices(Error **errp) +{ + MemoryDeviceInfoList *head = NULL; + MemoryDeviceInfoList **prev = &head; + + qmp_pc_dimm_device_list(qdev_get_machine(), &prev); + + return head; +} + +ACPIOSTInfoList *qmp_query_acpi_ospm_status(Error **errp) +{ + bool ambig; + ACPIOSTInfoList *head = NULL; + ACPIOSTInfoList **prev = &head; + Object *obj = object_resolve_path_type("", TYPE_ACPI_DEVICE_IF, &ambig); + + if (obj) { + AcpiDeviceIfClass *adevc = ACPI_DEVICE_IF_GET_CLASS(obj); + AcpiDeviceIf *adev = ACPI_DEVICE_IF(obj); + + adevc->ospm_status(adev, &prev); + } else { + error_setg(errp, "command is not supported, missing ACPI device"); + } + + return head; +} diff --git a/qom/object.c b/qom/object.c index e42b254303..3876618c2e 100644 --- a/qom/object.c +++ b/qom/object.c @@ -13,6 +13,7 @@ #include "qom/object.h" #include "qemu-common.h" #include "qapi/visitor.h" +#include "qapi-visit.h" #include "qapi/string-input-visitor.h" #include "qapi/string-output-visitor.h" #include "qapi/qmp/qerror.h" @@ -938,6 +939,40 @@ int64_t object_property_get_int(Object *obj, const char *name, return retval; } +int object_property_get_enum(Object *obj, const char *name, + const char *strings[], Error **errp) +{ + StringOutputVisitor *sov; + StringInputVisitor *siv; + int ret; + + sov = string_output_visitor_new(false); + object_property_get(obj, string_output_get_visitor(sov), name, errp); + siv = string_input_visitor_new(string_output_get_string(sov)); + string_output_visitor_cleanup(sov); + visit_type_enum(string_input_get_visitor(siv), + &ret, strings, NULL, name, errp); + string_input_visitor_cleanup(siv); + + return ret; +} + +void object_property_get_uint16List(Object *obj, const char *name, + uint16List **list, Error **errp) +{ + StringOutputVisitor *ov; + StringInputVisitor *iv; + + ov = string_output_visitor_new(false); + object_property_get(obj, string_output_get_visitor(ov), + name, errp); + iv = string_input_visitor_new(string_output_get_string(ov)); + visit_type_uint16List(string_input_get_visitor(iv), + list, NULL, errp); + string_output_visitor_cleanup(ov); + string_input_visitor_cleanup(iv); +} + void object_property_parse(Object *obj, const char *string, const char *name, Error **errp) { @@ -42,7 +42,6 @@ #include "block/snapshot.h" #include "block/qapi.h" -#define SELF_ANNOUNCE_ROUNDS 5 #ifndef ETH_P_RARP #define ETH_P_RARP 0x8035 @@ -98,7 +97,7 @@ static void qemu_announce_self_once(void *opaque) if (--count) { /* delay 50ms, 150ms, 250ms, ... */ timer_mod(timer, qemu_clock_get_ms(QEMU_CLOCK_REALTIME) + - 50 + (SELF_ANNOUNCE_ROUNDS - count - 1) * 100); + self_announce_delay(count)); } else { timer_del(timer); timer_free(timer); diff --git a/stubs/Makefile.objs b/stubs/Makefile.objs index d99e2b9259..997d68d5b9 100644 --- a/stubs/Makefile.objs +++ b/stubs/Makefile.objs @@ -1,4 +1,6 @@ stub-obj-y += arch-query-cpu-def.o +stub-obj-y += bdrv-commit-all.o +stub-obj-y += chr-msmouse.o stub-obj-y += clock-warp.o stub-obj-y += cpu-get-clock.o stub-obj-y += cpu-get-icount.o @@ -9,13 +11,18 @@ stub-obj-y += fdset-get-fd.o stub-obj-y += fdset-remove-fd.o stub-obj-y += gdbstub.o stub-obj-y += get-fd.o +stub-obj-y += get-next-serial.o stub-obj-y += get-vm-name.o stub-obj-y += iothread-lock.o +stub-obj-y += is-daemonized.o +stub-obj-y += machine-init-done.o stub-obj-y += migr-blocker.o stub-obj-y += mon-is-qmp.o stub-obj-y += mon-printf.o stub-obj-y += mon-protocol-event.o stub-obj-y += mon-set-error.o +stub-obj-y += monitor-init.o +stub-obj-y += notify-event.o stub-obj-y += pci-drive-hot-add.o stub-obj-y += qtest.o stub-obj-y += reset.o @@ -24,8 +31,10 @@ stub-obj-y += set-fd-handler.o stub-obj-y += slirp.o stub-obj-y += sysbus.o stub-obj-y += uuid.o +stub-obj-y += vc-init.o stub-obj-y += vm-stop.o stub-obj-y += vmstate.o stub-obj-$(CONFIG_WIN32) += fd-register.o stub-obj-y += cpus.o stub-obj-y += kvm.o +stub-obj-y += qmp_pc_dimm_device_list.o diff --git a/stubs/bdrv-commit-all.c b/stubs/bdrv-commit-all.c new file mode 100644 index 0000000000..a8e0a95417 --- /dev/null +++ b/stubs/bdrv-commit-all.c @@ -0,0 +1,7 @@ +#include "qemu-common.h" +#include "block/block.h" + +int bdrv_commit_all(void) +{ + return 0; +} diff --git a/stubs/chr-msmouse.c b/stubs/chr-msmouse.c new file mode 100644 index 0000000000..812f8b0abe --- /dev/null +++ b/stubs/chr-msmouse.c @@ -0,0 +1,7 @@ +#include "qemu-common.h" +#include "sysemu/char.h" + +CharDriverState *qemu_chr_open_msmouse(void) +{ + return 0; +} diff --git a/stubs/get-next-serial.c b/stubs/get-next-serial.c new file mode 100644 index 0000000000..40c56d13d7 --- /dev/null +++ b/stubs/get-next-serial.c @@ -0,0 +1,3 @@ +#include "qemu-common.h" + +CharDriverState *serial_hds[0]; diff --git a/stubs/is-daemonized.c b/stubs/is-daemonized.c new file mode 100644 index 0000000000..c0ee9171a7 --- /dev/null +++ b/stubs/is-daemonized.c @@ -0,0 +1,9 @@ +#include "qemu-common.h" + +/* Win32 has its own inline stub */ +#ifndef _WIN32 +bool is_daemonized(void) +{ + return false; +} +#endif diff --git a/stubs/machine-init-done.c b/stubs/machine-init-done.c new file mode 100644 index 0000000000..28a92555b6 --- /dev/null +++ b/stubs/machine-init-done.c @@ -0,0 +1,6 @@ +#include "qemu-common.h" +#include "sysemu/sysemu.h" + +void qemu_add_machine_init_done_notifier(Notifier *notify) +{ +} diff --git a/stubs/monitor-init.c b/stubs/monitor-init.c new file mode 100644 index 0000000000..563902b412 --- /dev/null +++ b/stubs/monitor-init.c @@ -0,0 +1,6 @@ +#include "qemu-common.h" +#include "monitor/monitor.h" + +void monitor_init(CharDriverState *chr, int flags) +{ +} diff --git a/stubs/notify-event.c b/stubs/notify-event.c new file mode 100644 index 0000000000..32f7289d3a --- /dev/null +++ b/stubs/notify-event.c @@ -0,0 +1,6 @@ +#include "qemu-common.h" +#include "qemu/main-loop.h" + +void qemu_notify_event(void) +{ +} diff --git a/stubs/qmp_pc_dimm_device_list.c b/stubs/qmp_pc_dimm_device_list.c new file mode 100644 index 0000000000..5cb220c66c --- /dev/null +++ b/stubs/qmp_pc_dimm_device_list.c @@ -0,0 +1,7 @@ +#include "qom/object.h" +#include "hw/mem/pc-dimm.h" + +int qmp_pc_dimm_device_list(Object *obj, void *opaque) +{ + return 0; +} diff --git a/stubs/vc-init.c b/stubs/vc-init.c new file mode 100644 index 0000000000..2af054fe6b --- /dev/null +++ b/stubs/vc-init.c @@ -0,0 +1,7 @@ +#include "qemu-common.h" +#include "ui/console.h" + +CharDriverState *vc_init(ChardevVC *vc) +{ + return 0; +} diff --git a/tests/Makefile b/tests/Makefile index 361bb7b6e3..4caf7deb89 100644 --- a/tests/Makefile +++ b/tests/Makefile @@ -156,6 +156,7 @@ gcov-files-i386-y += hw/usb/hcd-ehci.c gcov-files-i386-y += hw/usb/hcd-uhci.c gcov-files-i386-y += hw/usb/dev-hid.c gcov-files-i386-y += hw/usb/dev-storage.c +#check-qtest-i386-y += tests/vhost-user-test$(EXESUF) check-qtest-x86_64-y = $(check-qtest-i386-y) gcov-files-i386-y += i386-softmmu/hw/timer/mc146818rtc.c gcov-files-x86_64-y = $(subst i386-softmmu/,x86_64-softmmu/,$(gcov-files-i386-y)) @@ -322,9 +323,12 @@ tests/es1370-test$(EXESUF): tests/es1370-test.o tests/intel-hda-test$(EXESUF): tests/intel-hda-test.o tests/ioh3420-test$(EXESUF): tests/ioh3420-test.o tests/usb-hcd-ehci-test$(EXESUF): tests/usb-hcd-ehci-test.o $(libqos-pc-obj-y) +tests/vhost-user-test$(EXESUF): tests/vhost-user-test.o qemu-char.o qemu-timer.o libqemuutil.a libqemustub.a tests/qemu-iotests/socket_scm_helper$(EXESUF): tests/qemu-iotests/socket_scm_helper.o tests/test-qemu-opts$(EXESUF): tests/test-qemu-opts.o libqemuutil.a libqemustub.a +#LIBS+= -lutil + # QTest rules TARGETS=$(patsubst %-softmmu,%, $(filter %-softmmu,$(TARGET_DIRS))) diff --git a/tests/acpi-test-data/pc/DSDT b/tests/acpi-test-data/pc/DSDT Binary files differindex d0bb3de79d..7ed03fd37e 100644 --- a/tests/acpi-test-data/pc/DSDT +++ b/tests/acpi-test-data/pc/DSDT diff --git a/tests/acpi-test-data/pc/SSDT b/tests/acpi-test-data/pc/SSDT Binary files differindex c987fb2379..eb2d8b698c 100644 --- a/tests/acpi-test-data/pc/SSDT +++ b/tests/acpi-test-data/pc/SSDT diff --git a/tests/acpi-test-data/q35/DSDT b/tests/acpi-test-data/q35/DSDT Binary files differindex fc5b970009..2d2bc4adaf 100644 --- a/tests/acpi-test-data/q35/DSDT +++ b/tests/acpi-test-data/q35/DSDT diff --git a/tests/acpi-test-data/q35/SSDT b/tests/acpi-test-data/q35/SSDT Binary files differindex 9199638757..778b79bf42 100644 --- a/tests/acpi-test-data/q35/SSDT +++ b/tests/acpi-test-data/q35/SSDT diff --git a/tests/test-string-input-visitor.c b/tests/test-string-input-visitor.c index 877e737714..8e3433e0c7 100644 --- a/tests/test-string-input-visitor.c +++ b/tests/test-string-input-visitor.c @@ -64,6 +64,33 @@ static void test_visitor_in_int(TestInputVisitorData *data, g_assert_cmpint(res, ==, value); } +static void test_visitor_in_intList(TestInputVisitorData *data, + const void *unused) +{ + int64_t value[] = {0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 20}; + int16List *res = NULL, *tmp; + Visitor *v; + int i = 0; + + v = visitor_input_test_init(data, "1,2,0,2-4,20,5-9,1-8"); + + visit_type_int16List(v, &res, NULL, &error_abort); + tmp = res; + while (i < sizeof(value) / sizeof(value[0])) { + g_assert(tmp); + g_assert_cmpint(tmp->value, ==, value[i++]); + tmp = tmp->next; + } + g_assert(!tmp); + + tmp = res; + while (tmp) { + res = res->next; + g_free(tmp); + tmp = res; + } +} + static void test_visitor_in_bool(TestInputVisitorData *data, const void *unused) { @@ -170,6 +197,7 @@ static void test_visitor_in_fuzz(TestInputVisitorData *data, const void *unused) { int64_t ires; + intList *ilres; bool bres; double nres; char *sres; @@ -193,6 +221,11 @@ static void test_visitor_in_fuzz(TestInputVisitorData *data, v = visitor_input_test_init(data, buf); visit_type_int(v, &ires, NULL, NULL); + visitor_input_teardown(data, NULL); + + v = visitor_input_test_init(data, buf); + visit_type_intList(v, &ilres, NULL, NULL); + visitor_input_teardown(data, NULL); v = visitor_input_test_init(data, buf); visit_type_bool(v, &bres, NULL, NULL); @@ -200,11 +233,13 @@ static void test_visitor_in_fuzz(TestInputVisitorData *data, v = visitor_input_test_init(data, buf); visit_type_number(v, &nres, NULL, NULL); + visitor_input_teardown(data, NULL); v = visitor_input_test_init(data, buf); sres = NULL; visit_type_str(v, &sres, NULL, NULL); g_free(sres); + visitor_input_teardown(data, NULL); v = visitor_input_test_init(data, buf); visit_type_EnumOne(v, &eres, NULL, NULL); @@ -228,6 +263,8 @@ int main(int argc, char **argv) input_visitor_test_add("/string-visitor/input/int", &in_visitor_data, test_visitor_in_int); + input_visitor_test_add("/string-visitor/input/intList", + &in_visitor_data, test_visitor_in_intList); input_visitor_test_add("/string-visitor/input/bool", &in_visitor_data, test_visitor_in_bool); input_visitor_test_add("/string-visitor/input/number", diff --git a/tests/test-string-output-visitor.c b/tests/test-string-output-visitor.c index 2af5a21ab5..28e7359a2a 100644 --- a/tests/test-string-output-visitor.c +++ b/tests/test-string-output-visitor.c @@ -44,7 +44,7 @@ static void visitor_output_teardown(TestOutputVisitorData *data, static void test_visitor_out_int(TestOutputVisitorData *data, const void *unused) { - int64_t value = -42; + int64_t value = 42; Error *err = NULL; char *str; @@ -53,10 +53,42 @@ static void test_visitor_out_int(TestOutputVisitorData *data, str = string_output_get_string(data->sov); g_assert(str != NULL); - g_assert_cmpstr(str, ==, "-42"); + g_assert_cmpstr(str, ==, "42"); g_free(str); } +static void test_visitor_out_intList(TestOutputVisitorData *data, + const void *unused) +{ + int64_t value[] = {0, 1, 9, 10, 16, 15, 14, + 3, 4, 5, 6, 11, 12, 13, 21, 22, INT64_MAX - 1, INT64_MAX}; + intList *list = NULL, **tmp = &list; + int i; + Error *errp = NULL; + char *str; + + for (i = 0; i < sizeof(value) / sizeof(value[0]); i++) { + *tmp = g_malloc0(sizeof(**tmp)); + (*tmp)->value = value[i]; + tmp = &(*tmp)->next; + } + + visit_type_intList(data->ov, &list, NULL, &errp); + g_assert(errp == NULL); + + str = string_output_get_string(data->sov); + g_assert(str != NULL); + g_assert_cmpstr(str, ==, + "0-1,3-6,9-16,21-22,9223372036854775806-9223372036854775807"); + g_free(str); + while (list) { + intList *tmp2; + tmp2 = list->next; + g_free(list); + list = tmp2; + } +} + static void test_visitor_out_bool(TestOutputVisitorData *data, const void *unused) { @@ -182,6 +214,8 @@ int main(int argc, char **argv) &out_visitor_data, test_visitor_out_enum); output_visitor_test_add("/string-visitor/output/enum-errors", &out_visitor_data, test_visitor_out_enum_errors); + output_visitor_test_add("/string-visitor/output/intList", + &out_visitor_data, test_visitor_out_intList); g_test_run(); diff --git a/tests/vhost-user-test.c b/tests/vhost-user-test.c new file mode 100644 index 0000000000..7c826b49e5 --- /dev/null +++ b/tests/vhost-user-test.c @@ -0,0 +1,312 @@ +/* + * QTest testcase for the vhost-user + * + * Copyright (c) 2014 Virtual Open Systems Sarl. + * + * This work is licensed under the terms of the GNU GPL, version 2 or later. + * See the COPYING file in the top-level directory. + * + */ + +#include "libqtest.h" +#include "qemu/option.h" +#include "sysemu/char.h" +#include "sysemu/sysemu.h" + +#include <glib.h> +#include <linux/vhost.h> +#include <sys/mman.h> +#include <sys/vfs.h> +#include <qemu/sockets.h> + +#define QEMU_CMD_ACCEL " -machine accel=tcg" +#define QEMU_CMD_MEM " -m 512 -object memory-backend-file,id=mem,size=512M,"\ + "mem-path=%s,share=on -numa node,memdev=mem" +#define QEMU_CMD_CHR " -chardev socket,id=chr0,path=%s" +#define QEMU_CMD_NETDEV " -netdev vhost-user,id=net0,chardev=chr0,vhostforce" +#define QEMU_CMD_NET " -device virtio-net-pci,netdev=net0 " +#define QEMU_CMD_ROM " -option-rom ../pc-bios/pxe-virtio.rom" + +#define QEMU_CMD QEMU_CMD_ACCEL QEMU_CMD_MEM QEMU_CMD_CHR \ + QEMU_CMD_NETDEV QEMU_CMD_NET QEMU_CMD_ROM + +#define HUGETLBFS_MAGIC 0x958458f6 + +/*********** FROM hw/virtio/vhost-user.c *************************************/ + +#define VHOST_MEMORY_MAX_NREGIONS 8 + +typedef enum VhostUserRequest { + VHOST_USER_NONE = 0, + VHOST_USER_GET_FEATURES = 1, + VHOST_USER_SET_FEATURES = 2, + VHOST_USER_SET_OWNER = 3, + VHOST_USER_RESET_OWNER = 4, + VHOST_USER_SET_MEM_TABLE = 5, + VHOST_USER_SET_LOG_BASE = 6, + VHOST_USER_SET_LOG_FD = 7, + VHOST_USER_SET_VRING_NUM = 8, + VHOST_USER_SET_VRING_ADDR = 9, + VHOST_USER_SET_VRING_BASE = 10, + VHOST_USER_GET_VRING_BASE = 11, + VHOST_USER_SET_VRING_KICK = 12, + VHOST_USER_SET_VRING_CALL = 13, + VHOST_USER_SET_VRING_ERR = 14, + VHOST_USER_MAX +} VhostUserRequest; + +typedef struct VhostUserMemoryRegion { + uint64_t guest_phys_addr; + uint64_t memory_size; + uint64_t userspace_addr; +} VhostUserMemoryRegion; + +typedef struct VhostUserMemory { + uint32_t nregions; + uint32_t padding; + VhostUserMemoryRegion regions[VHOST_MEMORY_MAX_NREGIONS]; +} VhostUserMemory; + +typedef struct VhostUserMsg { + VhostUserRequest request; + +#define VHOST_USER_VERSION_MASK (0x3) +#define VHOST_USER_REPLY_MASK (0x1<<2) + uint32_t flags; + uint32_t size; /* the following payload size */ + union { + uint64_t u64; + struct vhost_vring_state state; + struct vhost_vring_addr addr; + VhostUserMemory memory; + }; +} QEMU_PACKED VhostUserMsg; + +static VhostUserMsg m __attribute__ ((unused)); +#define VHOST_USER_HDR_SIZE (sizeof(m.request) \ + + sizeof(m.flags) \ + + sizeof(m.size)) + +#define VHOST_USER_PAYLOAD_SIZE (sizeof(m) - VHOST_USER_HDR_SIZE) + +/* The version of the protocol we support */ +#define VHOST_USER_VERSION (0x1) +/*****************************************************************************/ + +int fds_num = 0, fds[VHOST_MEMORY_MAX_NREGIONS]; +static VhostUserMemory memory; +static GMutex data_mutex; +static GCond data_cond; + +static void read_guest_mem(void) +{ + uint32_t *guest_mem; + gint64 end_time; + int i, j; + + g_mutex_lock(&data_mutex); + + end_time = g_get_monotonic_time() + 5 * G_TIME_SPAN_SECOND; + while (!fds_num) { + if (!g_cond_wait_until(&data_cond, &data_mutex, end_time)) { + /* timeout has passed */ + g_assert(fds_num); + break; + } + } + + /* check for sanity */ + g_assert_cmpint(fds_num, >, 0); + g_assert_cmpint(fds_num, ==, memory.nregions); + + /* iterate all regions */ + for (i = 0; i < fds_num; i++) { + + /* We'll check only the region statring at 0x0*/ + if (memory.regions[i].guest_phys_addr != 0x0) { + continue; + } + + g_assert_cmpint(memory.regions[i].memory_size, >, 1024); + + guest_mem = mmap(0, memory.regions[i].memory_size, + PROT_READ | PROT_WRITE, MAP_SHARED, fds[i], 0); + + for (j = 0; j < 256; j++) { + uint32_t a = readl(memory.regions[i].guest_phys_addr + j*4); + uint32_t b = guest_mem[j]; + + g_assert_cmpint(a, ==, b); + } + + munmap(guest_mem, memory.regions[i].memory_size); + } + + g_assert_cmpint(1, ==, 1); + g_mutex_unlock(&data_mutex); +} + +static void *thread_function(void *data) +{ + GMainLoop *loop; + loop = g_main_loop_new(NULL, FALSE); + g_main_loop_run(loop); + return NULL; +} + +static int chr_can_read(void *opaque) +{ + return VHOST_USER_HDR_SIZE; +} + +static void chr_read(void *opaque, const uint8_t *buf, int size) +{ + CharDriverState *chr = opaque; + VhostUserMsg msg; + uint8_t *p = (uint8_t *) &msg; + int fd; + + if (size != VHOST_USER_HDR_SIZE) { + g_test_message("Wrong message size received %d\n", size); + return; + } + + memcpy(p, buf, VHOST_USER_HDR_SIZE); + + if (msg.size) { + p += VHOST_USER_HDR_SIZE; + qemu_chr_fe_read_all(chr, p, msg.size); + } + + switch (msg.request) { + case VHOST_USER_GET_FEATURES: + /* send back features to qemu */ + msg.flags |= VHOST_USER_REPLY_MASK; + msg.size = sizeof(m.u64); + msg.u64 = 0; + p = (uint8_t *) &msg; + qemu_chr_fe_write_all(chr, p, VHOST_USER_HDR_SIZE + msg.size); + break; + + case VHOST_USER_GET_VRING_BASE: + /* send back vring base to qemu */ + msg.flags |= VHOST_USER_REPLY_MASK; + msg.size = sizeof(m.state); + msg.state.num = 0; + p = (uint8_t *) &msg; + qemu_chr_fe_write_all(chr, p, VHOST_USER_HDR_SIZE + msg.size); + break; + + case VHOST_USER_SET_MEM_TABLE: + /* received the mem table */ + memcpy(&memory, &msg.memory, sizeof(msg.memory)); + fds_num = qemu_chr_fe_get_msgfds(chr, fds, sizeof(fds) / sizeof(int)); + + /* signal the test that it can continue */ + g_cond_signal(&data_cond); + g_mutex_unlock(&data_mutex); + break; + + case VHOST_USER_SET_VRING_KICK: + case VHOST_USER_SET_VRING_CALL: + /* consume the fd */ + qemu_chr_fe_get_msgfds(chr, &fd, 1); + /* + * This is a non-blocking eventfd. + * The receive function forces it to be blocking, + * so revert it back to non-blocking. + */ + qemu_set_nonblock(fd); + break; + default: + break; + } +} + +static const char *init_hugepagefs(void) +{ + const char *path; + struct statfs fs; + int ret; + + path = getenv("QTEST_HUGETLBFS_PATH"); + if (!path) { + path = "/hugetlbfs"; + } + + if (access(path, R_OK | W_OK | X_OK)) { + g_test_message("access on path (%s): %s\n", path, strerror(errno)); + return NULL; + } + + do { + ret = statfs(path, &fs); + } while (ret != 0 && errno == EINTR); + + if (ret != 0) { + g_test_message("statfs on path (%s): %s\n", path, strerror(errno)); + return NULL; + } + + if (fs.f_type != HUGETLBFS_MAGIC) { + g_test_message("Warning: path not on HugeTLBFS: %s\n", path); + return NULL; + } + + return path; +} + +int main(int argc, char **argv) +{ + QTestState *s = NULL; + CharDriverState *chr = NULL; + const char *hugefs = 0; + char *socket_path = 0; + char *qemu_cmd = 0; + char *chr_path = 0; + int ret; + + g_test_init(&argc, &argv, NULL); + + module_call_init(MODULE_INIT_QOM); + + hugefs = init_hugepagefs(); + if (!hugefs) { + return 0; + } + + socket_path = g_strdup_printf("/tmp/vhost-%d.sock", getpid()); + + /* create char dev and add read handlers */ + qemu_add_opts(&qemu_chardev_opts); + chr_path = g_strdup_printf("unix:%s,server,nowait", socket_path); + chr = qemu_chr_new("chr0", chr_path, NULL); + g_free(chr_path); + qemu_chr_add_handlers(chr, chr_can_read, chr_read, NULL, chr); + + /* run the main loop thread so the chardev may operate */ + g_mutex_init(&data_mutex); + g_cond_init(&data_cond); + g_mutex_lock(&data_mutex); + g_thread_new(NULL, thread_function, NULL); + + qemu_cmd = g_strdup_printf(QEMU_CMD, hugefs, socket_path); + s = qtest_start(qemu_cmd); + g_free(qemu_cmd); + + qtest_add_func("/vhost-user/read-guest-mem", read_guest_mem); + + ret = g_test_run(); + + if (s) { + qtest_quit(s); + } + + /* cleanup */ + unlink(socket_path); + g_free(socket_path); + g_cond_clear(&data_cond); + g_mutex_clear(&data_mutex); + + return ret; +} diff --git a/trace-events b/trace-events index f8dff485b2..ba01ad52cf 100644 --- a/trace-events +++ b/trace-events @@ -1272,6 +1272,23 @@ xen_pv_mmio_write(uint64_t addr) "WARNING: write to Xen PV Device MMIO space (ad pci_cfg_read(const char *dev, unsigned devid, unsigned fnid, unsigned offs, unsigned val) "%s %02u:%u @0x%x -> 0x%x" pci_cfg_write(const char *dev, unsigned devid, unsigned fnid, unsigned offs, unsigned val) "%s %02u:%u @0x%x <- 0x%x" +#hw/acpi/memory_hotplug.c +mhp_acpi_invalid_slot_selected(uint32_t slot) "0x%"PRIx32 +mhp_acpi_read_addr_lo(uint32_t slot, uint32_t addr) "slot[0x%"PRIx32"] addr lo: 0x%"PRIx32 +mhp_acpi_read_addr_hi(uint32_t slot, uint32_t addr) "slot[0x%"PRIx32"] addr hi: 0x%"PRIx32 +mhp_acpi_read_size_lo(uint32_t slot, uint32_t size) "slot[0x%"PRIx32"] size lo: 0x%"PRIx32 +mhp_acpi_read_size_hi(uint32_t slot, uint32_t size) "slot[0x%"PRIx32"] size hi: 0x%"PRIx32 +mhp_acpi_read_pxm(uint32_t slot, uint32_t pxm) "slot[0x%"PRIx32"] proximity: 0x%"PRIx32 +mhp_acpi_read_flags(uint32_t slot, uint32_t flags) "slot[0x%"PRIx32"] flags: 0x%"PRIx32 +mhp_acpi_write_slot(uint32_t slot) "set active slot: 0x%"PRIx32 +mhp_acpi_write_ost_ev(uint32_t slot, uint32_t ev) "slot[0x%"PRIx32"] OST EVENT: 0x%"PRIx32 +mhp_acpi_write_ost_status(uint32_t slot, uint32_t st) "slot[0x%"PRIx32"] OST STATUS: 0x%"PRIx32 +mhp_acpi_clear_insert_evt(uint32_t slot) "slot[0x%"PRIx32"] clear insert event" + +#hw/i386/pc.c +mhp_pc_dimm_assigned_slot(int slot) "0x%d" +mhp_pc_dimm_assigned_address(uint64_t addr) "0x%"PRIx64 + # target-s390x/kvm.c kvm_enable_cmma(int rc) "CMMA: enabling with result code %d" kvm_clear_cmma(int rc) "CMMA: clearing with result code %d" diff --git a/translate-all.c b/translate-all.c index 6b7b46e761..5425d038d9 100644 --- a/translate-all.c +++ b/translate-all.c @@ -295,14 +295,7 @@ void page_size_init(void) { /* NOTE: we can always suppose that qemu_host_page_size >= TARGET_PAGE_SIZE */ -#ifdef _WIN32 - SYSTEM_INFO system_info; - - GetSystemInfo(&system_info); - qemu_real_host_page_size = system_info.dwPageSize; -#else qemu_real_host_page_size = getpagesize(); -#endif if (qemu_host_page_size == 0) { qemu_host_page_size = qemu_real_host_page_size; } diff --git a/util/oslib-posix.c b/util/oslib-posix.c index 8e9c770d28..1524ead755 100644 --- a/util/oslib-posix.c +++ b/util/oslib-posix.c @@ -46,6 +46,7 @@ extern int daemon(int, int); #else # define QEMU_VMALLOC_ALIGN getpagesize() #endif +#define HUGETLBFS_MAGIC 0x958458f6 #include <termios.h> #include <unistd.h> @@ -58,9 +59,12 @@ extern int daemon(int, int); #include "qemu/sockets.h" #include <sys/mman.h> #include <libgen.h> +#include <setjmp.h> +#include <sys/signal.h> #ifdef CONFIG_LINUX #include <sys/syscall.h> +#include <sys/vfs.h> #endif #ifdef __FreeBSD__ @@ -332,3 +336,72 @@ char *qemu_get_exec_dir(void) { return g_strdup(exec_dir); } + +static sigjmp_buf sigjump; + +static void sigbus_handler(int signal) +{ + siglongjmp(sigjump, 1); +} + +static size_t fd_getpagesize(int fd) +{ +#ifdef CONFIG_LINUX + struct statfs fs; + int ret; + + if (fd != -1) { + do { + ret = fstatfs(fd, &fs); + } while (ret != 0 && errno == EINTR); + + if (ret == 0 && fs.f_type == HUGETLBFS_MAGIC) { + return fs.f_bsize; + } + } +#endif + + return getpagesize(); +} + +void os_mem_prealloc(int fd, char *area, size_t memory) +{ + int ret, i; + struct sigaction act, oldact; + sigset_t set, oldset; + size_t hpagesize = fd_getpagesize(fd); + + memset(&act, 0, sizeof(act)); + act.sa_handler = &sigbus_handler; + act.sa_flags = 0; + + ret = sigaction(SIGBUS, &act, &oldact); + if (ret) { + perror("os_mem_prealloc: failed to install signal handler"); + exit(1); + } + + /* unblock SIGBUS */ + sigemptyset(&set); + sigaddset(&set, SIGBUS); + pthread_sigmask(SIG_UNBLOCK, &set, &oldset); + + if (sigsetjmp(sigjump, 1)) { + fprintf(stderr, "os_mem_prealloc: failed to preallocate pages\n"); + exit(1); + } + + /* MAP_POPULATE silently ignores failures */ + memory = (memory + hpagesize - 1) & -hpagesize; + for (i = 0; i < (memory/hpagesize); i++) { + memset(area + (hpagesize*i), 0, 1); + } + + ret = sigaction(SIGBUS, &oldact, NULL); + if (ret) { + perror("os_mem_prealloc: failed to reinstall signal handler"); + exit(1); + } + + pthread_sigmask(SIG_SETMASK, &oldset, NULL); +} diff --git a/util/oslib-win32.c b/util/oslib-win32.c index 157f10fab2..507cedd84d 100644 --- a/util/oslib-win32.c +++ b/util/oslib-win32.c @@ -446,3 +446,22 @@ gint g_poll(GPollFD *fds, guint nfds, gint timeout) return retval; } + +size_t getpagesize(void) +{ + SYSTEM_INFO system_info; + + GetSystemInfo(&system_info); + return system_info.dwPageSize; +} + +void os_mem_prealloc(int fd, char *area, size_t memory) +{ + int i; + size_t pagesize = getpagesize(); + + memory = (memory + pagesize - 1) & -pagesize; + for (i = 0; i < memory / pagesize; i++) { + memset(area + pagesize * i, 0, 1); + } +} @@ -116,7 +116,7 @@ int main(int argc, char **argv) #include "ui/qemu-spice.h" #include "qapi/string-input-visitor.h" -#include "qom/object_interfaces.h" +#include "qapi/opts-visitor.h" #define DEFAULT_RAM_SIZE 128 @@ -195,8 +195,7 @@ static QTAILQ_HEAD(, FWBootEntry) fw_boot_order = QTAILQ_HEAD_INITIALIZER(fw_boot_order); int nb_numa_nodes; -uint64_t node_mem[MAX_NODES]; -unsigned long *node_cpumask[MAX_NODES]; +NodeInfo numa_info[MAX_NODES]; uint8_t qemu_uuid[16]; bool qemu_uuid_set; @@ -520,6 +519,14 @@ static QemuOptsList qemu_mem_opts = { .name = "size", .type = QEMU_OPT_SIZE, }, + { + .name = "slots", + .type = QEMU_OPT_NUMBER, + }, + { + .name = "maxmem", + .type = QEMU_OPT_SIZE, + }, { /* end of list */ } }, }; @@ -1267,102 +1274,6 @@ char *get_boot_devices_list(size_t *size, bool ignore_suffixes) return list; } -static void numa_node_parse_cpus(int nodenr, const char *cpus) -{ - char *endptr; - unsigned long long value, endvalue; - - /* Empty CPU range strings will be considered valid, they will simply - * not set any bit in the CPU bitmap. - */ - if (!*cpus) { - return; - } - - if (parse_uint(cpus, &value, &endptr, 10) < 0) { - goto error; - } - if (*endptr == '-') { - if (parse_uint_full(endptr + 1, &endvalue, 10) < 0) { - goto error; - } - } else if (*endptr == '\0') { - endvalue = value; - } else { - goto error; - } - - if (endvalue >= MAX_CPUMASK_BITS) { - endvalue = MAX_CPUMASK_BITS - 1; - fprintf(stderr, - "qemu: NUMA: A max of %d VCPUs are supported\n", - MAX_CPUMASK_BITS); - } - - if (endvalue < value) { - goto error; - } - - bitmap_set(node_cpumask[nodenr], value, endvalue-value+1); - return; - -error: - fprintf(stderr, "qemu: Invalid NUMA CPU range: %s\n", cpus); - exit(1); -} - -static void numa_add(const char *optarg) -{ - char option[128]; - char *endptr; - unsigned long long nodenr; - - optarg = get_opt_name(option, 128, optarg, ','); - if (*optarg == ',') { - optarg++; - } - if (!strcmp(option, "node")) { - - if (nb_numa_nodes >= MAX_NODES) { - fprintf(stderr, "qemu: too many NUMA nodes\n"); - exit(1); - } - - if (get_param_value(option, 128, "nodeid", optarg) == 0) { - nodenr = nb_numa_nodes; - } else { - if (parse_uint_full(option, &nodenr, 10) < 0) { - fprintf(stderr, "qemu: Invalid NUMA nodeid: %s\n", option); - exit(1); - } - } - - if (nodenr >= MAX_NODES) { - fprintf(stderr, "qemu: invalid NUMA nodeid: %llu\n", nodenr); - exit(1); - } - - if (get_param_value(option, 128, "mem", optarg) == 0) { - node_mem[nodenr] = 0; - } else { - int64_t sval; - sval = strtosz(option, &endptr); - if (sval < 0 || *endptr) { - fprintf(stderr, "qemu: invalid numa mem size: %s\n", optarg); - exit(1); - } - node_mem[nodenr] = sval; - } - if (get_param_value(option, 128, "cpus", optarg) != 0) { - numa_node_parse_cpus(nodenr, option); - } - nb_numa_nodes++; - } else { - fprintf(stderr, "Invalid -numa option: %s\n", option); - exit(1); - } -} - static QemuOptsList qemu_smp_opts = { .name = "smp-opts", .implied_opt_name = "cpus", @@ -2911,43 +2822,51 @@ static int object_set_property(const char *name, const char *value, void *opaque static int object_create(QemuOpts *opts, void *opaque) { - const char *type = qemu_opt_get(opts, "qom-type"); - const char *id = qemu_opts_id(opts); - Error *local_err = NULL; - Object *obj; + Error *err = NULL; + char *type = NULL; + char *id = NULL; + void *dummy = NULL; + OptsVisitor *ov; + QDict *pdict; - g_assert(type != NULL); + ov = opts_visitor_new(opts); + pdict = qemu_opts_to_qdict(opts, NULL); - if (id == NULL) { - qerror_report(QERR_MISSING_PARAMETER, "id"); - return -1; + visit_start_struct(opts_get_visitor(ov), &dummy, NULL, NULL, 0, &err); + if (err) { + goto out; } - obj = object_new(type); - if (qemu_opt_foreach(opts, object_set_property, obj, 1) < 0) { - object_unref(obj); - return -1; + qdict_del(pdict, "qom-type"); + visit_type_str(opts_get_visitor(ov), &type, "qom-type", &err); + if (err) { + goto out; } - if (!object_dynamic_cast(obj, TYPE_USER_CREATABLE)) { - error_setg(&local_err, "object '%s' isn't supported by -object", - id); + qdict_del(pdict, "id"); + visit_type_str(opts_get_visitor(ov), &id, "id", &err); + if (err) { goto out; } - user_creatable_complete(obj, &local_err); - if (local_err) { + object_add(type, id, pdict, opts_get_visitor(ov), &err); + if (err) { goto out; } - - object_property_add_child(container_get(object_get_root(), "/objects"), - id, obj, &local_err); + visit_end_struct(opts_get_visitor(ov), &err); + if (err) { + qmp_object_del(id, NULL); + } out: - object_unref(obj); - if (local_err) { - qerror_report_err(local_err); - error_free(local_err); + opts_visitor_cleanup(ov); + + QDECREF(pdict); + g_free(id); + g_free(type); + g_free(dummy); + if (err) { + qerror_report_err(err); return -1; } return 0; @@ -2991,6 +2910,8 @@ int main(int argc, char **argv, char **envp) const char *trace_file = NULL; const ram_addr_t default_ram_size = (ram_addr_t)DEFAULT_RAM_SIZE * 1024 * 1024; + ram_addr_t maxram_size = default_ram_size; + uint64_t ram_slots = 0; atexit(qemu_run_exit_notifiers); error_set_progname(argv[0]); @@ -3024,6 +2945,7 @@ int main(int argc, char **argv, char **envp) qemu_add_opts(&qemu_realtime_opts); qemu_add_opts(&qemu_msg_opts); qemu_add_opts(&qemu_name_opts); + qemu_add_opts(&qemu_numa_opts); runstate_init(); @@ -3044,8 +2966,8 @@ int main(int argc, char **argv, char **envp) translation = BIOS_ATA_TRANSLATION_AUTO; for (i = 0; i < MAX_NODES; i++) { - node_mem[i] = 0; - node_cpumask[i] = bitmap_new(MAX_CPUMASK_BITS); + numa_info[i].node_mem = 0; + bitmap_zero(numa_info[i].node_cpu, MAX_CPUMASK_BITS); } nb_numa_nodes = 0; @@ -3219,7 +3141,10 @@ int main(int argc, char **argv, char **envp) } break; case QEMU_OPTION_numa: - numa_add(optarg); + opts = qemu_opts_parse(qemu_find_opts("numa"), optarg, 1); + if (!opts) { + exit(1); + } break; case QEMU_OPTION_display: display_type = select_display(optarg); @@ -3326,6 +3251,7 @@ int main(int argc, char **argv, char **envp) case QEMU_OPTION_m: { uint64_t sz; const char *mem_str; + const char *maxmem_str, *slots_str; opts = qemu_opts_parse(qemu_find_opts("memory"), optarg, 1); @@ -3367,6 +3293,44 @@ int main(int argc, char **argv, char **envp) error_report("ram size too large"); exit(EXIT_FAILURE); } + + maxmem_str = qemu_opt_get(opts, "maxmem"); + slots_str = qemu_opt_get(opts, "slots"); + if (maxmem_str && slots_str) { + uint64_t slots; + + sz = qemu_opt_get_size(opts, "maxmem", 0); + if (sz < ram_size) { + fprintf(stderr, "qemu: invalid -m option value: maxmem " + "(%" PRIu64 ") <= initial memory (" + RAM_ADDR_FMT ")\n", sz, ram_size); + exit(EXIT_FAILURE); + } + + slots = qemu_opt_get_number(opts, "slots", 0); + if ((sz > ram_size) && !slots) { + fprintf(stderr, "qemu: invalid -m option value: maxmem " + "(%" PRIu64 ") more than initial memory (" + RAM_ADDR_FMT ") but no hotplug slots where " + "specified\n", sz, ram_size); + exit(EXIT_FAILURE); + } + + if ((sz <= ram_size) && slots) { + fprintf(stderr, "qemu: invalid -m option value: %" + PRIu64 " hotplug slots where specified but " + "maxmem (%" PRIu64 ") <= initial memory (" + RAM_ADDR_FMT ")\n", slots, sz, ram_size); + exit(EXIT_FAILURE); + } + maxram_size = sz; + ram_slots = slots; + } else if ((!maxmem_str && slots_str) || + (maxmem_str && !slots_str)) { + fprintf(stderr, "qemu: invalid -m option value: missing " + "'%s' option\n", slots_str ? "maxmem" : "slots"); + exit(EXIT_FAILURE); + } break; } #ifdef CONFIG_TPM @@ -3964,6 +3928,8 @@ int main(int argc, char **argv, char **envp) } loc_set_none(); + os_daemonize(); + if (qemu_init_main_loop()) { fprintf(stderr, "qemu_init_main_loop failed\n"); exit(1); @@ -3993,6 +3959,8 @@ int main(int argc, char **argv, char **envp) exit(1); } + cpu_exec_init_all(); + current_machine = MACHINE(object_new(object_class_get_name( OBJECT_CLASS(machine_class)))); object_property_add_child(object_get_root(), "machine", @@ -4205,8 +4173,6 @@ int main(int argc, char **argv, char **envp) } #endif - os_daemonize(); - if (pid_file && qemu_create_pidfile(pid_file) != 0) { os_pidfile_error(); exit(1); @@ -4332,8 +4298,6 @@ int main(int argc, char **argv, char **envp) } } - cpu_exec_init_all(); - blk_mig_init(); ram_mig_init(); @@ -4350,49 +4314,13 @@ int main(int argc, char **argv, char **envp) default_drive(default_floppy, snapshot, IF_FLOPPY, 0, FD_OPTS); default_drive(default_sdcard, snapshot, IF_SD, 0, SD_OPTS); - if (nb_numa_nodes > 0) { - int i; - - if (nb_numa_nodes > MAX_NODES) { - nb_numa_nodes = MAX_NODES; - } - - /* If no memory size if given for any node, assume the default case - * and distribute the available memory equally across all nodes - */ - for (i = 0; i < nb_numa_nodes; i++) { - if (node_mem[i] != 0) - break; - } - if (i == nb_numa_nodes) { - uint64_t usedmem = 0; - - /* On Linux, the each node's border has to be 8MB aligned, - * the final node gets the rest. - */ - for (i = 0; i < nb_numa_nodes - 1; i++) { - node_mem[i] = (ram_size / nb_numa_nodes) & ~((1 << 23UL) - 1); - usedmem += node_mem[i]; - } - node_mem[i] = ram_size - usedmem; - } - - for (i = 0; i < nb_numa_nodes; i++) { - if (!bitmap_empty(node_cpumask[i], MAX_CPUMASK_BITS)) { - break; - } - } - /* assigning the VCPUs round-robin is easier to implement, guest OSes - * must cope with this anyway, because there are BIOSes out there in - * real machines which also use this scheme. - */ - if (i == nb_numa_nodes) { - for (i = 0; i < max_cpus; i++) { - set_bit(i, node_cpumask[i % nb_numa_nodes]); - } - } + if (qemu_opts_foreach(qemu_find_opts("numa"), numa_init_func, + NULL, 1) != 0) { + exit(1); } + set_numa_nodes(); + if (qemu_opts_foreach(qemu_find_opts("mon"), mon_init_func, NULL, 1) != 0) { exit(1); } @@ -4435,6 +4363,8 @@ int main(int argc, char **argv, char **envp) qdev_machine_init(); current_machine->ram_size = ram_size; + current_machine->maxram_size = maxram_size; + current_machine->ram_slots = ram_slots; current_machine->boot_order = boot_order; current_machine->cpu_model = cpu_model; |