diff options
168 files changed, 8935 insertions, 2028 deletions
diff --git a/MAINTAINERS b/MAINTAINERS index 0a637c90c6..9b93edda77 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -731,6 +731,16 @@ S: Odd Fixes F: gdbstub* F: gdb-xml/ +Memory API +M: Paolo Bonzini <pbonzini@redhat.com> +S: Supported +F: include/exec/ioport.h +F: ioport.c +F: include/exec/memory.h +F: memory.c +F: include/exec/memory-internal.h +F: exec.c + SPICE M: Gerd Hoffmann <kraxel@redhat.com> S: Supported diff --git a/Makefile.target b/Makefile.target index 06c1e59bc4..fc5827cd72 100644 --- a/Makefile.target +++ b/Makefile.target @@ -119,7 +119,7 @@ endif #CONFIG_BSD_USER ######################################################### # System emulator target ifdef CONFIG_SOFTMMU -obj-y += arch_init.o cpus.o monitor.o gdbstub.o balloon.o ioport.o +obj-y += arch_init.o cpus.o monitor.o gdbstub.o balloon.o ioport.o numa.o obj-y += qtest.o obj-y += hw/ obj-$(CONFIG_FDT) += device_tree.o diff --git a/backends/Makefile.objs b/backends/Makefile.objs index 591ddcf6f3..506a46c33b 100644 --- a/backends/Makefile.objs +++ b/backends/Makefile.objs @@ -6,3 +6,6 @@ common-obj-$(CONFIG_BRLAPI) += baum.o baum.o-cflags := $(SDL_CFLAGS) common-obj-$(CONFIG_TPM) += tpm.o + +common-obj-y += hostmem.o hostmem-ram.o +common-obj-$(CONFIG_LINUX) += hostmem-file.o diff --git a/backends/hostmem-file.c b/backends/hostmem-file.c new file mode 100644 index 0000000000..51799943f1 --- /dev/null +++ b/backends/hostmem-file.c @@ -0,0 +1,134 @@ +/* + * QEMU Host Memory Backend for hugetlbfs + * + * Copyright (C) 2013-2014 Red Hat Inc + * + * Authors: + * Paolo Bonzini <pbonzini@redhat.com> + * + * This work is licensed under the terms of the GNU GPL, version 2 or later. + * See the COPYING file in the top-level directory. + */ +#include "qemu-common.h" +#include "sysemu/hostmem.h" +#include "sysemu/sysemu.h" +#include "qom/object_interfaces.h" + +/* hostmem-file.c */ +/** + * @TYPE_MEMORY_BACKEND_FILE: + * name of backend that uses mmap on a file descriptor + */ +#define TYPE_MEMORY_BACKEND_FILE "memory-backend-file" + +#define MEMORY_BACKEND_FILE(obj) \ + OBJECT_CHECK(HostMemoryBackendFile, (obj), TYPE_MEMORY_BACKEND_FILE) + +typedef struct HostMemoryBackendFile HostMemoryBackendFile; + +struct HostMemoryBackendFile { + HostMemoryBackend parent_obj; + + bool share; + char *mem_path; +}; + +static void +file_backend_memory_alloc(HostMemoryBackend *backend, Error **errp) +{ + HostMemoryBackendFile *fb = MEMORY_BACKEND_FILE(backend); + + if (!backend->size) { + error_setg(errp, "can't create backend with size 0"); + return; + } + if (!fb->mem_path) { + error_setg(errp, "mem_path property not set"); + return; + } +#ifndef CONFIG_LINUX + error_setg(errp, "-mem-path not supported on this host"); +#else + if (!memory_region_size(&backend->mr)) { + backend->force_prealloc = mem_prealloc; + memory_region_init_ram_from_file(&backend->mr, OBJECT(backend), + object_get_canonical_path(OBJECT(backend)), + backend->size, fb->share, + fb->mem_path, errp); + } +#endif +} + +static void +file_backend_class_init(ObjectClass *oc, void *data) +{ + HostMemoryBackendClass *bc = MEMORY_BACKEND_CLASS(oc); + + bc->alloc = file_backend_memory_alloc; +} + +static char *get_mem_path(Object *o, Error **errp) +{ + HostMemoryBackendFile *fb = MEMORY_BACKEND_FILE(o); + + return g_strdup(fb->mem_path); +} + +static void set_mem_path(Object *o, const char *str, Error **errp) +{ + HostMemoryBackend *backend = MEMORY_BACKEND(o); + HostMemoryBackendFile *fb = MEMORY_BACKEND_FILE(o); + + if (memory_region_size(&backend->mr)) { + error_setg(errp, "cannot change property value"); + return; + } + if (fb->mem_path) { + g_free(fb->mem_path); + } + fb->mem_path = g_strdup(str); +} + +static bool file_memory_backend_get_share(Object *o, Error **errp) +{ + HostMemoryBackendFile *fb = MEMORY_BACKEND_FILE(o); + + return fb->share; +} + +static void file_memory_backend_set_share(Object *o, bool value, Error **errp) +{ + HostMemoryBackend *backend = MEMORY_BACKEND(o); + HostMemoryBackendFile *fb = MEMORY_BACKEND_FILE(o); + + if (memory_region_size(&backend->mr)) { + error_setg(errp, "cannot change property value"); + return; + } + fb->share = value; +} + +static void +file_backend_instance_init(Object *o) +{ + object_property_add_bool(o, "share", + file_memory_backend_get_share, + file_memory_backend_set_share, NULL); + object_property_add_str(o, "mem-path", get_mem_path, + set_mem_path, NULL); +} + +static const TypeInfo file_backend_info = { + .name = TYPE_MEMORY_BACKEND_FILE, + .parent = TYPE_MEMORY_BACKEND, + .class_init = file_backend_class_init, + .instance_init = file_backend_instance_init, + .instance_size = sizeof(HostMemoryBackendFile), +}; + +static void register_types(void) +{ + type_register_static(&file_backend_info); +} + +type_init(register_types); diff --git a/backends/hostmem-ram.c b/backends/hostmem-ram.c new file mode 100644 index 0000000000..d9a8290dc9 --- /dev/null +++ b/backends/hostmem-ram.c @@ -0,0 +1,53 @@ +/* + * QEMU Host Memory Backend + * + * Copyright (C) 2013-2014 Red Hat Inc + * + * Authors: + * Igor Mammedov <imammedo@redhat.com> + * + * This work is licensed under the terms of the GNU GPL, version 2 or later. + * See the COPYING file in the top-level directory. + */ +#include "sysemu/hostmem.h" +#include "qom/object_interfaces.h" + +#define TYPE_MEMORY_BACKEND_RAM "memory-backend-ram" + + +static void +ram_backend_memory_alloc(HostMemoryBackend *backend, Error **errp) +{ + char *path; + + if (!backend->size) { + error_setg(errp, "can't create backend with size 0"); + return; + } + + path = object_get_canonical_path_component(OBJECT(backend)); + memory_region_init_ram(&backend->mr, OBJECT(backend), path, + backend->size); + g_free(path); +} + +static void +ram_backend_class_init(ObjectClass *oc, void *data) +{ + HostMemoryBackendClass *bc = MEMORY_BACKEND_CLASS(oc); + + bc->alloc = ram_backend_memory_alloc; +} + +static const TypeInfo ram_backend_info = { + .name = TYPE_MEMORY_BACKEND_RAM, + .parent = TYPE_MEMORY_BACKEND, + .class_init = ram_backend_class_init, +}; + +static void register_types(void) +{ + type_register_static(&ram_backend_info); +} + +type_init(register_types); diff --git a/backends/hostmem.c b/backends/hostmem.c new file mode 100644 index 0000000000..ca10c51b51 --- /dev/null +++ b/backends/hostmem.c @@ -0,0 +1,375 @@ +/* + * QEMU Host Memory Backend + * + * Copyright (C) 2013-2014 Red Hat Inc + * + * Authors: + * Igor Mammedov <imammedo@redhat.com> + * + * This work is licensed under the terms of the GNU GPL, version 2 or later. + * See the COPYING file in the top-level directory. + */ +#include "sysemu/hostmem.h" +#include "qapi/visitor.h" +#include "qapi-types.h" +#include "qapi-visit.h" +#include "qapi/qmp/qerror.h" +#include "qemu/config-file.h" +#include "qom/object_interfaces.h" + +#ifdef CONFIG_NUMA +#include <numaif.h> +QEMU_BUILD_BUG_ON(HOST_MEM_POLICY_DEFAULT != MPOL_DEFAULT); +QEMU_BUILD_BUG_ON(HOST_MEM_POLICY_PREFERRED != MPOL_PREFERRED); +QEMU_BUILD_BUG_ON(HOST_MEM_POLICY_BIND != MPOL_BIND); +QEMU_BUILD_BUG_ON(HOST_MEM_POLICY_INTERLEAVE != MPOL_INTERLEAVE); +#endif + +static void +host_memory_backend_get_size(Object *obj, Visitor *v, void *opaque, + const char *name, Error **errp) +{ + HostMemoryBackend *backend = MEMORY_BACKEND(obj); + uint64_t value = backend->size; + + visit_type_size(v, &value, name, errp); +} + +static void +host_memory_backend_set_size(Object *obj, Visitor *v, void *opaque, + const char *name, Error **errp) +{ + HostMemoryBackend *backend = MEMORY_BACKEND(obj); + Error *local_err = NULL; + uint64_t value; + + if (memory_region_size(&backend->mr)) { + error_setg(&local_err, "cannot change property value"); + goto out; + } + + visit_type_size(v, &value, name, &local_err); + if (local_err) { + goto out; + } + if (!value) { + error_setg(&local_err, "Property '%s.%s' doesn't take value '%" + PRIu64 "'", object_get_typename(obj), name, value); + goto out; + } + backend->size = value; +out: + error_propagate(errp, local_err); +} + +static void +host_memory_backend_get_host_nodes(Object *obj, Visitor *v, void *opaque, + const char *name, Error **errp) +{ + HostMemoryBackend *backend = MEMORY_BACKEND(obj); + uint16List *host_nodes = NULL; + uint16List **node = &host_nodes; + unsigned long value; + + value = find_first_bit(backend->host_nodes, MAX_NODES); + if (value == MAX_NODES) { + return; + } + + *node = g_malloc0(sizeof(**node)); + (*node)->value = value; + node = &(*node)->next; + + do { + value = find_next_bit(backend->host_nodes, MAX_NODES, value + 1); + if (value == MAX_NODES) { + break; + } + + *node = g_malloc0(sizeof(**node)); + (*node)->value = value; + node = &(*node)->next; + } while (true); + + visit_type_uint16List(v, &host_nodes, name, errp); +} + +static void +host_memory_backend_set_host_nodes(Object *obj, Visitor *v, void *opaque, + const char *name, Error **errp) +{ +#ifdef CONFIG_NUMA + HostMemoryBackend *backend = MEMORY_BACKEND(obj); + uint16List *l = NULL; + + visit_type_uint16List(v, &l, name, errp); + + while (l) { + bitmap_set(backend->host_nodes, l->value, 1); + l = l->next; + } +#else + error_setg(errp, "NUMA node binding are not supported by this QEMU"); +#endif +} + +static void +host_memory_backend_get_policy(Object *obj, Visitor *v, void *opaque, + const char *name, Error **errp) +{ + HostMemoryBackend *backend = MEMORY_BACKEND(obj); + int policy = backend->policy; + + visit_type_enum(v, &policy, HostMemPolicy_lookup, NULL, name, errp); +} + +static void +host_memory_backend_set_policy(Object *obj, Visitor *v, void *opaque, + const char *name, Error **errp) +{ + HostMemoryBackend *backend = MEMORY_BACKEND(obj); + int policy; + + visit_type_enum(v, &policy, HostMemPolicy_lookup, NULL, name, errp); + backend->policy = policy; + +#ifndef CONFIG_NUMA + if (policy != HOST_MEM_POLICY_DEFAULT) { + error_setg(errp, "NUMA policies are not supported by this QEMU"); + } +#endif +} + +static bool host_memory_backend_get_merge(Object *obj, Error **errp) +{ + HostMemoryBackend *backend = MEMORY_BACKEND(obj); + + return backend->merge; +} + +static void host_memory_backend_set_merge(Object *obj, bool value, Error **errp) +{ + HostMemoryBackend *backend = MEMORY_BACKEND(obj); + + if (!memory_region_size(&backend->mr)) { + backend->merge = value; + return; + } + + if (value != backend->merge) { + void *ptr = memory_region_get_ram_ptr(&backend->mr); + uint64_t sz = memory_region_size(&backend->mr); + + qemu_madvise(ptr, sz, + value ? QEMU_MADV_MERGEABLE : QEMU_MADV_UNMERGEABLE); + backend->merge = value; + } +} + +static bool host_memory_backend_get_dump(Object *obj, Error **errp) +{ + HostMemoryBackend *backend = MEMORY_BACKEND(obj); + + return backend->dump; +} + +static void host_memory_backend_set_dump(Object *obj, bool value, Error **errp) +{ + HostMemoryBackend *backend = MEMORY_BACKEND(obj); + + if (!memory_region_size(&backend->mr)) { + backend->dump = value; + return; + } + + if (value != backend->dump) { + void *ptr = memory_region_get_ram_ptr(&backend->mr); + uint64_t sz = memory_region_size(&backend->mr); + + qemu_madvise(ptr, sz, + value ? QEMU_MADV_DODUMP : QEMU_MADV_DONTDUMP); + backend->dump = value; + } +} + +static bool host_memory_backend_get_prealloc(Object *obj, Error **errp) +{ + HostMemoryBackend *backend = MEMORY_BACKEND(obj); + + return backend->prealloc || backend->force_prealloc; +} + +static void host_memory_backend_set_prealloc(Object *obj, bool value, + Error **errp) +{ + HostMemoryBackend *backend = MEMORY_BACKEND(obj); + + if (backend->force_prealloc) { + if (value) { + error_setg(errp, + "remove -mem-prealloc to use the prealloc property"); + return; + } + } + + if (!memory_region_size(&backend->mr)) { + backend->prealloc = value; + return; + } + + if (value && !backend->prealloc) { + int fd = memory_region_get_fd(&backend->mr); + void *ptr = memory_region_get_ram_ptr(&backend->mr); + uint64_t sz = memory_region_size(&backend->mr); + + os_mem_prealloc(fd, ptr, sz); + backend->prealloc = true; + } +} + +static void host_memory_backend_init(Object *obj) +{ + HostMemoryBackend *backend = MEMORY_BACKEND(obj); + + backend->merge = qemu_opt_get_bool(qemu_get_machine_opts(), + "mem-merge", true); + backend->dump = qemu_opt_get_bool(qemu_get_machine_opts(), + "dump-guest-core", true); + backend->prealloc = mem_prealloc; + + object_property_add_bool(obj, "merge", + host_memory_backend_get_merge, + host_memory_backend_set_merge, NULL); + object_property_add_bool(obj, "dump", + host_memory_backend_get_dump, + host_memory_backend_set_dump, NULL); + object_property_add_bool(obj, "prealloc", + host_memory_backend_get_prealloc, + host_memory_backend_set_prealloc, NULL); + object_property_add(obj, "size", "int", + host_memory_backend_get_size, + host_memory_backend_set_size, NULL, NULL, NULL); + object_property_add(obj, "host-nodes", "int", + host_memory_backend_get_host_nodes, + host_memory_backend_set_host_nodes, NULL, NULL, NULL); + object_property_add(obj, "policy", "str", + host_memory_backend_get_policy, + host_memory_backend_set_policy, NULL, NULL, NULL); +} + +static void host_memory_backend_finalize(Object *obj) +{ + HostMemoryBackend *backend = MEMORY_BACKEND(obj); + + if (memory_region_size(&backend->mr)) { + memory_region_destroy(&backend->mr); + } +} + +MemoryRegion * +host_memory_backend_get_memory(HostMemoryBackend *backend, Error **errp) +{ + return memory_region_size(&backend->mr) ? &backend->mr : NULL; +} + +static void +host_memory_backend_memory_complete(UserCreatable *uc, Error **errp) +{ + HostMemoryBackend *backend = MEMORY_BACKEND(uc); + HostMemoryBackendClass *bc = MEMORY_BACKEND_GET_CLASS(uc); + Error *local_err = NULL; + void *ptr; + uint64_t sz; + + if (bc->alloc) { + bc->alloc(backend, &local_err); + if (local_err) { + error_propagate(errp, local_err); + return; + } + + ptr = memory_region_get_ram_ptr(&backend->mr); + sz = memory_region_size(&backend->mr); + + if (backend->merge) { + qemu_madvise(ptr, sz, QEMU_MADV_MERGEABLE); + } + if (!backend->dump) { + qemu_madvise(ptr, sz, QEMU_MADV_DONTDUMP); + } +#ifdef CONFIG_NUMA + unsigned long lastbit = find_last_bit(backend->host_nodes, MAX_NODES); + /* lastbit == MAX_NODES means maxnode = 0 */ + unsigned long maxnode = (lastbit + 1) % (MAX_NODES + 1); + /* ensure policy won't be ignored in case memory is preallocated + * before mbind(). note: MPOL_MF_STRICT is ignored on hugepages so + * this doesn't catch hugepage case. */ + unsigned flags = MPOL_MF_STRICT; + + /* check for invalid host-nodes and policies and give more verbose + * error messages than mbind(). */ + if (maxnode && backend->policy == MPOL_DEFAULT) { + error_setg(errp, "host-nodes must be empty for policy default," + " or you should explicitly specify a policy other" + " than default"); + return; + } else if (maxnode == 0 && backend->policy != MPOL_DEFAULT) { + error_setg(errp, "host-nodes must be set for policy %s", + HostMemPolicy_lookup[backend->policy]); + return; + } + + /* We can have up to MAX_NODES nodes, but we need to pass maxnode+1 + * as argument to mbind() due to an old Linux bug (feature?) which + * cuts off the last specified node. This means backend->host_nodes + * must have MAX_NODES+1 bits available. + */ + assert(sizeof(backend->host_nodes) >= + BITS_TO_LONGS(MAX_NODES + 1) * sizeof(unsigned long)); + assert(maxnode <= MAX_NODES); + if (mbind(ptr, sz, backend->policy, + maxnode ? backend->host_nodes : NULL, maxnode + 1, flags)) { + error_setg_errno(errp, errno, + "cannot bind memory to host NUMA nodes"); + return; + } +#endif + /* Preallocate memory after the NUMA policy has been instantiated. + * This is necessary to guarantee memory is allocated with + * specified NUMA policy in place. + */ + if (backend->prealloc) { + os_mem_prealloc(memory_region_get_fd(&backend->mr), ptr, sz); + } + } +} + +static void +host_memory_backend_class_init(ObjectClass *oc, void *data) +{ + UserCreatableClass *ucc = USER_CREATABLE_CLASS(oc); + + ucc->complete = host_memory_backend_memory_complete; +} + +static const TypeInfo host_memory_backend_info = { + .name = TYPE_MEMORY_BACKEND, + .parent = TYPE_OBJECT, + .abstract = true, + .class_size = sizeof(HostMemoryBackendClass), + .class_init = host_memory_backend_class_init, + .instance_size = sizeof(HostMemoryBackend), + .instance_init = host_memory_backend_init, + .instance_finalize = host_memory_backend_finalize, + .interfaces = (InterfaceInfo[]) { + { TYPE_USER_CREATABLE }, + { } + } +}; + +static void register_types(void) +{ + type_register_static(&host_memory_backend_info); +} + +type_init(register_types); diff --git a/block/iscsi.c b/block/iscsi.c index 6f87605e72..84aa22a62e 100644 --- a/block/iscsi.c +++ b/block/iscsi.c @@ -26,6 +26,7 @@ #include "config-host.h" #include <poll.h> +#include <math.h> #include <arpa/inet.h> #include "qemu-common.h" #include "qemu/config-file.h" @@ -64,6 +65,7 @@ typedef struct IscsiLun { unsigned char *zeroblock; unsigned long *allocationmap; int cluster_sectors; + bool use_16_for_rw; } IscsiLun; typedef struct IscsiTask { @@ -75,6 +77,7 @@ typedef struct IscsiTask { Coroutine *co; QEMUBH *bh; IscsiLun *iscsilun; + QEMUTimer retry_timer; } IscsiTask; typedef struct IscsiAIOCB { @@ -86,7 +89,6 @@ typedef struct IscsiAIOCB { uint8_t *buf; int status; int canceled; - int retries; int64_t sector_num; int nb_sectors; #ifdef __linux__ @@ -96,7 +98,8 @@ typedef struct IscsiAIOCB { #define NOP_INTERVAL 5000 #define MAX_NOP_FAILURES 3 -#define ISCSI_CMD_RETRIES 5 +#define ISCSI_CMD_RETRIES ARRAY_SIZE(iscsi_retry_times) +static const unsigned iscsi_retry_times[] = {8, 32, 128, 512, 2048}; /* this threshhold is a trade-off knob to choose between * the potential additional overhead of an extra GET_LBA_STATUS request @@ -142,10 +145,25 @@ iscsi_schedule_bh(IscsiAIOCB *acb) static void iscsi_co_generic_bh_cb(void *opaque) { struct IscsiTask *iTask = opaque; + iTask->complete = 1; qemu_bh_delete(iTask->bh); qemu_coroutine_enter(iTask->co, NULL); } +static void iscsi_retry_timer_expired(void *opaque) +{ + struct IscsiTask *iTask = opaque; + iTask->complete = 1; + if (iTask->co) { + qemu_coroutine_enter(iTask->co, NULL); + } +} + +static inline unsigned exp_random(double mean) +{ + return -mean * log((double)rand() / RAND_MAX); +} + static void iscsi_co_generic_cb(struct iscsi_context *iscsi, int status, void *command_data, void *opaque) @@ -153,19 +171,34 @@ iscsi_co_generic_cb(struct iscsi_context *iscsi, int status, struct IscsiTask *iTask = opaque; struct scsi_task *task = command_data; - iTask->complete = 1; iTask->status = status; iTask->do_retry = 0; iTask->task = task; - if (iTask->retries-- > 0 && status == SCSI_STATUS_CHECK_CONDITION - && task->sense.key == SCSI_SENSE_UNIT_ATTENTION) { - error_report("iSCSI CheckCondition: %s", iscsi_get_error(iscsi)); - iTask->do_retry = 1; - goto out; - } - if (status != SCSI_STATUS_GOOD) { + if (iTask->retries++ < ISCSI_CMD_RETRIES) { + if (status == SCSI_STATUS_CHECK_CONDITION + && task->sense.key == SCSI_SENSE_UNIT_ATTENTION) { + error_report("iSCSI CheckCondition: %s", + iscsi_get_error(iscsi)); + iTask->do_retry = 1; + goto out; + } + if (status == SCSI_STATUS_BUSY) { + unsigned retry_time = + exp_random(iscsi_retry_times[iTask->retries - 1]); + error_report("iSCSI Busy (retry #%u in %u ms): %s", + iTask->retries, retry_time, + iscsi_get_error(iscsi)); + aio_timer_init(iTask->iscsilun->aio_context, + &iTask->retry_timer, QEMU_CLOCK_REALTIME, + SCALE_MS, iscsi_retry_timer_expired, iTask); + timer_mod(&iTask->retry_timer, + qemu_clock_get_ms(QEMU_CLOCK_REALTIME) + retry_time); + iTask->do_retry = 1; + return; + } + } error_report("iSCSI Failure: %s", iscsi_get_error(iscsi)); } @@ -174,15 +207,16 @@ out: iTask->bh = aio_bh_new(iTask->iscsilun->aio_context, iscsi_co_generic_bh_cb, iTask); qemu_bh_schedule(iTask->bh); + } else { + iTask->complete = 1; } } static void iscsi_co_init_iscsitask(IscsiLun *iscsilun, struct IscsiTask *iTask) { *iTask = (struct IscsiTask) { - .co = qemu_coroutine_self(), - .retries = ISCSI_CMD_RETRIES, - .iscsilun = iscsilun, + .co = qemu_coroutine_self(), + .iscsilun = iscsilun, }; } @@ -325,8 +359,6 @@ static int coroutine_fn iscsi_co_writev(BlockDriverState *bs, struct IscsiTask iTask; uint64_t lba; uint32_t num_sectors; - uint8_t *data = NULL; - uint8_t *buf = NULL; if (!is_request_lun_aligned(sector_num, nb_sectors, iscsilun)) { return -EINVAL; @@ -334,31 +366,24 @@ static int coroutine_fn iscsi_co_writev(BlockDriverState *bs, lba = sector_qemu2lun(sector_num, iscsilun); num_sectors = sector_qemu2lun(nb_sectors, iscsilun); -#if !defined(LIBISCSI_FEATURE_IOVECTOR) - /* if the iovec only contains one buffer we can pass it directly */ - if (iov->niov == 1) { - data = iov->iov[0].iov_base; - } else { - size_t size = MIN(nb_sectors * BDRV_SECTOR_SIZE, iov->size); - buf = g_malloc(size); - qemu_iovec_to_buf(iov, 0, buf, size); - data = buf; - } -#endif iscsi_co_init_iscsitask(iscsilun, &iTask); retry: - iTask.task = iscsi_write16_task(iscsilun->iscsi, iscsilun->lun, lba, - data, num_sectors * iscsilun->block_size, - iscsilun->block_size, 0, 0, 0, 0, 0, - iscsi_co_generic_cb, &iTask); + if (iscsilun->use_16_for_rw) { + iTask.task = iscsi_write16_task(iscsilun->iscsi, iscsilun->lun, lba, + NULL, num_sectors * iscsilun->block_size, + iscsilun->block_size, 0, 0, 0, 0, 0, + iscsi_co_generic_cb, &iTask); + } else { + iTask.task = iscsi_write10_task(iscsilun->iscsi, iscsilun->lun, lba, + NULL, num_sectors * iscsilun->block_size, + iscsilun->block_size, 0, 0, 0, 0, 0, + iscsi_co_generic_cb, &iTask); + } if (iTask.task == NULL) { - g_free(buf); return -ENOMEM; } -#if defined(LIBISCSI_FEATURE_IOVECTOR) scsi_task_set_iov_out(iTask.task, (struct scsi_iovec *) iov->iov, iov->niov); -#endif while (!iTask.complete) { iscsi_set_events(iscsilun); qemu_coroutine_yield(); @@ -374,8 +399,6 @@ retry: goto retry; } - g_free(buf); - if (iTask.status != SCSI_STATUS_GOOD) { return -EIO; } @@ -386,7 +409,6 @@ retry: } -#if defined(LIBISCSI_FEATURE_IOVECTOR) static bool iscsi_allocationmap_is_allocated(IscsiLun *iscsilun, int64_t sector_num, int nb_sectors) { @@ -496,9 +518,6 @@ out: return ret; } -#endif /* LIBISCSI_FEATURE_IOVECTOR */ - - static int coroutine_fn iscsi_co_readv(BlockDriverState *bs, int64_t sector_num, int nb_sectors, QEMUIOVector *iov) @@ -507,15 +526,11 @@ static int coroutine_fn iscsi_co_readv(BlockDriverState *bs, struct IscsiTask iTask; uint64_t lba; uint32_t num_sectors; -#if !defined(LIBISCSI_FEATURE_IOVECTOR) - int i; -#endif if (!is_request_lun_aligned(sector_num, nb_sectors, iscsilun)) { return -EINVAL; } -#if defined(LIBISCSI_FEATURE_IOVECTOR) if (iscsilun->lbprz && nb_sectors >= ISCSI_CHECKALLOC_THRES && !iscsi_allocationmap_is_allocated(iscsilun, sector_num, nb_sectors)) { int64_t ret; @@ -529,42 +544,28 @@ static int coroutine_fn iscsi_co_readv(BlockDriverState *bs, return 0; } } -#endif lba = sector_qemu2lun(sector_num, iscsilun); num_sectors = sector_qemu2lun(nb_sectors, iscsilun); iscsi_co_init_iscsitask(iscsilun, &iTask); retry: - switch (iscsilun->type) { - case TYPE_DISK: + if (iscsilun->use_16_for_rw) { iTask.task = iscsi_read16_task(iscsilun->iscsi, iscsilun->lun, lba, num_sectors * iscsilun->block_size, iscsilun->block_size, 0, 0, 0, 0, 0, iscsi_co_generic_cb, &iTask); - break; - default: + } else { iTask.task = iscsi_read10_task(iscsilun->iscsi, iscsilun->lun, lba, num_sectors * iscsilun->block_size, iscsilun->block_size, -#if !defined(CONFIG_LIBISCSI_1_4) /* API change from 1.4.0 to 1.5.0 */ 0, 0, 0, 0, 0, -#endif iscsi_co_generic_cb, &iTask); - break; } if (iTask.task == NULL) { return -ENOMEM; } -#if defined(LIBISCSI_FEATURE_IOVECTOR) scsi_task_set_iov_in(iTask.task, (struct scsi_iovec *) iov->iov, iov->niov); -#else - for (i = 0; i < iov->niov; i++) { - scsi_task_add_data_in_buffer(iTask.task, - iov->iov[i].iov_len, - iov->iov[i].iov_base); - } -#endif while (!iTask.complete) { iscsi_set_events(iscsilun); @@ -719,18 +720,9 @@ static BlockDriverAIOCB *iscsi_aio_ioctl(BlockDriverState *bs, data.data = acb->ioh->dxferp; data.size = acb->ioh->dxfer_len; } else { -#if defined(LIBISCSI_FEATURE_IOVECTOR) scsi_task_set_iov_out(acb->task, (struct scsi_iovec *) acb->ioh->dxferp, acb->ioh->iovec_count); -#else - struct iovec *iov = (struct iovec *)acb->ioh->dxferp; - - acb->buf = g_malloc(acb->ioh->dxfer_len); - data.data = acb->buf; - data.size = iov_to_buf(iov, acb->ioh->iovec_count, 0, - acb->buf, acb->ioh->dxfer_len); -#endif } } @@ -750,20 +742,9 @@ static BlockDriverAIOCB *iscsi_aio_ioctl(BlockDriverState *bs, acb->ioh->dxfer_len, acb->ioh->dxferp); } else { -#if defined(LIBISCSI_FEATURE_IOVECTOR) scsi_task_set_iov_in(acb->task, (struct scsi_iovec *) acb->ioh->dxferp, acb->ioh->iovec_count); -#else - int i; - for (i = 0; i < acb->ioh->iovec_count; i++) { - struct iovec *iov = (struct iovec *)acb->ioh->dxferp; - - scsi_task_add_data_in_buffer(acb->task, - iov[i].iov_len, - iov[i].iov_base); - } -#endif } } @@ -772,7 +753,6 @@ static BlockDriverAIOCB *iscsi_aio_ioctl(BlockDriverState *bs, return &acb->common; } - static void ioctl_cb(void *opaque, int status) { int *p_status = opaque; @@ -877,8 +857,6 @@ retry: return 0; } -#if defined(SCSI_SENSE_ASCQ_CAPACITY_DATA_HAS_CHANGED) - static int coroutine_fn iscsi_co_write_zeroes(BlockDriverState *bs, int64_t sector_num, int nb_sectors, BdrvRequestFlags flags) @@ -887,19 +865,27 @@ coroutine_fn iscsi_co_write_zeroes(BlockDriverState *bs, int64_t sector_num, struct IscsiTask iTask; uint64_t lba; uint32_t nb_blocks; + bool use_16_for_ws = iscsilun->use_16_for_rw; if (!is_request_lun_aligned(sector_num, nb_sectors, iscsilun)) { return -EINVAL; } - if ((flags & BDRV_REQ_MAY_UNMAP) && !iscsilun->lbp.lbpws) { - /* WRITE SAME with UNMAP is not supported by the target, - * fall back and try WRITE SAME without UNMAP */ - flags &= ~BDRV_REQ_MAY_UNMAP; + if (flags & BDRV_REQ_MAY_UNMAP) { + if (!use_16_for_ws && !iscsilun->lbp.lbpws10) { + /* WRITESAME10 with UNMAP is unsupported try WRITESAME16 */ + use_16_for_ws = true; + } + if (use_16_for_ws && !iscsilun->lbp.lbpws) { + /* WRITESAME16 with UNMAP is not supported by the target, + * fall back and try WRITESAME10/16 without UNMAP */ + flags &= ~BDRV_REQ_MAY_UNMAP; + use_16_for_ws = iscsilun->use_16_for_rw; + } } if (!(flags & BDRV_REQ_MAY_UNMAP) && !iscsilun->has_write_same) { - /* WRITE SAME without UNMAP is not supported by the target */ + /* WRITESAME without UNMAP is not supported by the target */ return -ENOTSUP; } @@ -912,10 +898,18 @@ coroutine_fn iscsi_co_write_zeroes(BlockDriverState *bs, int64_t sector_num, iscsi_co_init_iscsitask(iscsilun, &iTask); retry: - if (iscsi_writesame16_task(iscsilun->iscsi, iscsilun->lun, lba, - iscsilun->zeroblock, iscsilun->block_size, - nb_blocks, 0, !!(flags & BDRV_REQ_MAY_UNMAP), - 0, 0, iscsi_co_generic_cb, &iTask) == NULL) { + if (use_16_for_ws) { + iTask.task = iscsi_writesame16_task(iscsilun->iscsi, iscsilun->lun, lba, + iscsilun->zeroblock, iscsilun->block_size, + nb_blocks, 0, !!(flags & BDRV_REQ_MAY_UNMAP), + 0, 0, iscsi_co_generic_cb, &iTask); + } else { + iTask.task = iscsi_writesame10_task(iscsilun->iscsi, iscsilun->lun, lba, + iscsilun->zeroblock, iscsilun->block_size, + nb_blocks, 0, !!(flags & BDRV_REQ_MAY_UNMAP), + 0, 0, iscsi_co_generic_cb, &iTask); + } + if (iTask.task == NULL) { return -ENOMEM; } @@ -957,8 +951,6 @@ retry: return 0; } -#endif /* SCSI_SENSE_ASCQ_CAPACITY_DATA_HAS_CHANGED */ - static void parse_chap(struct iscsi_context *iscsi, const char *target, Error **errp) { @@ -1068,7 +1060,6 @@ static char *parse_initiator_name(const char *target) return iscsi_name; } -#if defined(LIBISCSI_FEATURE_NOP_COUNTER) static void iscsi_nop_timed_event(void *opaque) { IscsiLun *iscsilun = opaque; @@ -1086,7 +1077,6 @@ static void iscsi_nop_timed_event(void *opaque) timer_mod(iscsilun->nop_timer, qemu_clock_get_ms(QEMU_CLOCK_REALTIME) + NOP_INTERVAL); iscsi_set_events(iscsilun); } -#endif static void iscsi_readcapacity_sync(IscsiLun *iscsilun, Error **errp) { @@ -1113,6 +1103,7 @@ static void iscsi_readcapacity_sync(IscsiLun *iscsilun, Error **errp) iscsilun->num_blocks = rc16->returned_lba + 1; iscsilun->lbpme = rc16->lbpme; iscsilun->lbprz = rc16->lbprz; + iscsilun->use_16_for_rw = (rc16->returned_lba > 0xffffffff); } } break; @@ -1224,14 +1215,12 @@ static void iscsi_attach_aio_context(BlockDriverState *bs, iscsilun->aio_context = new_context; iscsi_set_events(iscsilun); -#if defined(LIBISCSI_FEATURE_NOP_COUNTER) /* Set up a timer for sending out iSCSI NOPs */ iscsilun->nop_timer = aio_timer_new(iscsilun->aio_context, QEMU_CLOCK_REALTIME, SCALE_MS, iscsi_nop_timed_event, iscsilun); timer_mod(iscsilun->nop_timer, qemu_clock_get_ms(QEMU_CLOCK_REALTIME) + NOP_INTERVAL); -#endif } /* @@ -1423,13 +1412,11 @@ static int iscsi_open(BlockDriverState *bs, QDict *options, int flags, iscsilun->bl.opt_unmap_gran * iscsilun->block_size <= 16 * 1024 * 1024) { iscsilun->cluster_sectors = (iscsilun->bl.opt_unmap_gran * iscsilun->block_size) >> BDRV_SECTOR_BITS; -#if defined(LIBISCSI_FEATURE_IOVECTOR) if (iscsilun->lbprz && !(bs->open_flags & BDRV_O_NOCACHE)) { iscsilun->allocationmap = bitmap_new(DIV_ROUND_UP(bs->total_sectors, iscsilun->cluster_sectors)); } -#endif } out: @@ -1614,13 +1601,9 @@ static BlockDriver bdrv_iscsi = { .bdrv_truncate = iscsi_truncate, .bdrv_refresh_limits = iscsi_refresh_limits, -#if defined(LIBISCSI_FEATURE_IOVECTOR) .bdrv_co_get_block_status = iscsi_co_get_block_status, -#endif .bdrv_co_discard = iscsi_co_discard, -#if defined(SCSI_SENSE_ASCQ_CAPACITY_DATA_HAS_CHANGED) .bdrv_co_write_zeroes = iscsi_co_write_zeroes, -#endif .bdrv_co_readv = iscsi_co_readv, .bdrv_co_writev = iscsi_co_writev, .bdrv_co_flush_to_disk = iscsi_co_flush, @@ -334,6 +334,7 @@ tpm="no" libssh2="" vhdx="" quorum="" +numa="" # parse CC options first for opt do @@ -1118,6 +1119,10 @@ for opt do ;; --enable-quorum) quorum="yes" ;; + --disable-numa) numa="no" + ;; + --enable-numa) numa="yes" + ;; *) echo "ERROR: unknown option $opt" echo "Try '$0 --help' for more information" @@ -1384,6 +1389,8 @@ Advanced options (experts only): --enable-vhdx enable support for the Microsoft VHDX image format --disable-quorum disable quorum block filter support --enable-quorum enable quorum block filter support + --disable-numa disable libnuma support + --enable-numa enable libnuma support NOTE: The object files are built at the place where configure is launched EOF @@ -3168,6 +3175,26 @@ if compile_prog "" "" ; then fi ########################################## +# libnuma probe + +if test "$numa" != "no" ; then + cat > $TMPC << EOF +#include <numa.h> +int main(void) { return numa_available(); } +EOF + + if compile_prog "" "-lnuma" ; then + numa=yes + libs_softmmu="-lnuma $libs_softmmu" + else + if test "$numa" = "yes" ; then + feature_not_found "numa" "install numactl devel" + fi + numa=no + fi +fi + +########################################## # signalfd probe signalfd="no" cat > $TMPC << EOF @@ -3405,46 +3432,20 @@ if compile_prog "" "" ; then fi ########################################## -# Do we have libiscsi -# We check for iscsi_write16_sync() to make sure we have a -# at least version 1.4.0 of libiscsi. +# Do we have libiscsi >= 1.9.0 if test "$libiscsi" != "no" ; then - cat > $TMPC << EOF -#include <stdio.h> -#include <iscsi/iscsi.h> -int main(void) { iscsi_write16_sync(NULL,0,0,NULL,0,0,0,0,0,0,0); return 0; } -EOF - if $pkg_config --atleast-version=1.7.0 libiscsi; then + if $pkg_config --atleast-version=1.9.0 libiscsi; then libiscsi="yes" libiscsi_cflags=$($pkg_config --cflags libiscsi) libiscsi_libs=$($pkg_config --libs libiscsi) - elif compile_prog "" "-liscsi" ; then - libiscsi="yes" - libiscsi_libs="-liscsi" else if test "$libiscsi" = "yes" ; then - feature_not_found "libiscsi" "Install libiscsi devel" + feature_not_found "libiscsi" "Install libiscsi >= 1.9.0" fi libiscsi="no" fi fi -# We also need to know the API version because there was an -# API change from 1.4.0 to 1.5.0. -if test "$libiscsi" = "yes"; then - cat >$TMPC <<EOF -#include <iscsi/iscsi.h> -int main(void) -{ - iscsi_read10_task(0, 0, 0, 0, 0, 0, 0); - return 0; -} -EOF - if compile_prog "" "-liscsi"; then - libiscsi_version="1.4.0" - fi -fi - ########################################## # Do we need libm cat > $TMPC << EOF @@ -4218,11 +4219,7 @@ echo "nss used $smartcard_nss" echo "libusb $libusb" echo "usb net redir $usb_redir" echo "GLX support $glx" -if test "$libiscsi_version" = "1.4.0"; then -echo "libiscsi support $libiscsi (1.4.0)" -else echo "libiscsi support $libiscsi" -fi echo "libnfs support $libnfs" echo "build guest agent $guest_agent" echo "QGA VSS support $guest_agent_with_vss" @@ -4241,6 +4238,7 @@ echo "vhdx $vhdx" echo "Quorum $quorum" echo "lzo support $lzo" echo "snappy support $snappy" +echo "NUMA host support $numa" if test "$sdl_too_old" = "yes"; then echo "-> Your SDL version is too old - please upgrade to have SDL support" @@ -4515,6 +4513,9 @@ fi if test "$vhost_scsi" = "yes" ; then echo "CONFIG_VHOST_SCSI=y" >> $config_host_mak fi +if test "$vhost_net" = "yes" ; then + echo "CONFIG_VHOST_NET_USED=y" >> $config_host_mak +fi if test "$blobs" = "yes" ; then echo "INSTALL_BLOBS=yes" >> $config_host_mak fi @@ -4579,9 +4580,6 @@ fi if test "$libiscsi" = "yes" ; then echo "CONFIG_LIBISCSI=m" >> $config_host_mak - if test "$libiscsi_version" = "1.4.0"; then - echo "CONFIG_LIBISCSI_1_4=y" >> $config_host_mak - fi echo "LIBISCSI_CFLAGS=$libiscsi_cflags" >> $config_host_mak echo "LIBISCSI_LIBS=$libiscsi_libs" >> $config_host_mak fi @@ -5211,6 +5209,10 @@ if [ "$dtc_internal" = "yes" ]; then echo "config-host.h: subdir-dtc" >> $config_host_mak fi +if test "$numa" = "yes"; then + echo "CONFIG_NUMA=y" >> $config_host_mak +fi + # build tree in object directory in case the source is not in the current directory DIRS="tests tests/tcg tests/tcg/cris tests/tcg/lm32 tests/libqos tests/qapi-schema tests/tcg/xtensa tests/qemu-iotests" DIRS="$DIRS fsdev" @@ -347,7 +347,7 @@ void qtest_clock_warp(int64_t dest) assert(qtest_enabled()); while (clock < dest) { int64_t deadline = qemu_clock_deadline_ns_all(QEMU_CLOCK_VIRTUAL); - int64_t warp = MIN(dest - clock, deadline); + int64_t warp = qemu_soonest_timeout(dest - clock, deadline); seqlock_write_lock(&timers_state.vm_clock_seqlock); qemu_icount_bias += warp; seqlock_write_unlock(&timers_state.vm_clock_seqlock); @@ -1312,20 +1312,6 @@ static void tcg_exec_all(void) exit_request = 0; } -void set_numa_modes(void) -{ - CPUState *cpu; - int i; - - CPU_FOREACH(cpu) { - for (i = 0; i < nb_numa_nodes; i++) { - if (test_bit(cpu->cpu_index, node_cpumask[i])) { - cpu->numa_node = i; - } - } - } -} - void list_cpus(FILE *f, fprintf_function cpu_fprintf, const char *optarg) { /* XXX: implement xxx_cpu_list for targets that still miss it */ diff --git a/default-configs/i386-softmmu.mak b/default-configs/i386-softmmu.mak index 37ef90f585..8e08841760 100644 --- a/default-configs/i386-softmmu.mak +++ b/default-configs/i386-softmmu.mak @@ -44,3 +44,4 @@ CONFIG_APIC=y CONFIG_IOAPIC=y CONFIG_ICC_BUS=y CONFIG_PVPANIC=y +CONFIG_MEM_HOTPLUG=y diff --git a/default-configs/x86_64-softmmu.mak b/default-configs/x86_64-softmmu.mak index 31bddce4f4..66557ac590 100644 --- a/default-configs/x86_64-softmmu.mak +++ b/default-configs/x86_64-softmmu.mak @@ -44,3 +44,4 @@ CONFIG_APIC=y CONFIG_IOAPIC=y CONFIG_ICC_BUS=y CONFIG_PVPANIC=y +CONFIG_MEM_HOTPLUG=y diff --git a/docs/qmp/qmp-events.txt b/docs/qmp/qmp-events.txt index 145402e078..019db53ec8 100644 --- a/docs/qmp/qmp-events.txt +++ b/docs/qmp/qmp-events.txt @@ -1,6 +1,16 @@ QEMU Machine Protocol Events ============================ +ACPI_DEVICE_OST +--------------- + +Emitted when guest executes ACPI _OST method. + + - data: ACPIOSTInfo type as described in qapi-schema.json + +{ "event": "ACPI_DEVICE_OST", + "data": { "device": "d1", "slot": "0", "slot-type": "DIMM", "source": 1, "status": 0 } } + BALLOON_CHANGE -------------- diff --git a/docs/specs/acpi_mem_hotplug.txt b/docs/specs/acpi_mem_hotplug.txt new file mode 100644 index 0000000000..12909940cc --- /dev/null +++ b/docs/specs/acpi_mem_hotplug.txt @@ -0,0 +1,44 @@ +QEMU<->ACPI BIOS memory hotplug interface +-------------------------------------- + +ACPI BIOS GPE.3 handler is dedicated for notifying OS about memory hot-add +events. + +Memory hot-plug interface (IO port 0xa00-0xa17, 1-4 byte access): +--------------------------------------------------------------- +0xa00: + read access: + [0x0-0x3] Lo part of memory device phys address + [0x4-0x7] Hi part of memory device phys address + [0x8-0xb] Lo part of memory device size in bytes + [0xc-0xf] Hi part of memory device size in bytes + [0x10-0x13] Memory device proximity domain + [0x14] Memory device status fields + bits: + 0: Device is enabled and may be used by guest + 1: Device insert event, used to distinguish device for which + no device check event to OSPM was issued. + It's valid only when bit 1 is set. + 2-7: reserved and should be ignored by OSPM + [0x15-0x17] reserved + + write access: + [0x0-0x3] Memory device slot selector, selects active memory device. + All following accesses to other registers in 0xa00-0xa17 + region will read/store data from/to selected memory device. + [0x4-0x7] OST event code reported by OSPM + [0x8-0xb] OST status code reported by OSPM + [0xc-0x13] reserved, writes into it are ignored + [0x14] Memory device control fields + bits: + 0: reserved, OSPM must clear it before writing to register + 1: if set to 1 clears device insert event, set by OSPM + after it has emitted device check event for the + selected memory device + 2-7: reserved, OSPM must clear them before writing to register + +Selecting memory device slot beyond present range has no effect on platform: + - write accesses to memory hot-plug registers not documented above are + ignored + - read accesses to memory hot-plug registers not documented above return + all bits set to 1. diff --git a/docs/specs/vhost-user.txt b/docs/specs/vhost-user.txt new file mode 100644 index 0000000000..0ea767e4b8 --- /dev/null +++ b/docs/specs/vhost-user.txt @@ -0,0 +1,266 @@ +Vhost-user Protocol +=================== + +Copyright (c) 2014 Virtual Open Systems Sarl. + +This work is licensed under the terms of the GNU GPL, version 2 or later. +See the COPYING file in the top-level directory. +=================== + +This protocol is aiming to complement the ioctl interface used to control the +vhost implementation in the Linux kernel. It implements the control plane needed +to establish virtqueue sharing with a user space process on the same host. It +uses communication over a Unix domain socket to share file descriptors in the +ancillary data of the message. + +The protocol defines 2 sides of the communication, master and slave. Master is +the application that shares its virtqueues, in our case QEMU. Slave is the +consumer of the virtqueues. + +In the current implementation QEMU is the Master, and the Slave is intended to +be a software Ethernet switch running in user space, such as Snabbswitch. + +Master and slave can be either a client (i.e. connecting) or server (listening) +in the socket communication. + +Message Specification +--------------------- + +Note that all numbers are in the machine native byte order. A vhost-user message +consists of 3 header fields and a payload: + +------------------------------------ +| request | flags | size | payload | +------------------------------------ + + * Request: 32-bit type of the request + * Flags: 32-bit bit field: + - Lower 2 bits are the version (currently 0x01) + - Bit 2 is the reply flag - needs to be sent on each reply from the slave + * Size - 32-bit size of the payload + + +Depending on the request type, payload can be: + + * A single 64-bit integer + ------- + | u64 | + ------- + + u64: a 64-bit unsigned integer + + * A vring state description + --------------- + | index | num | + --------------- + + Index: a 32-bit index + Num: a 32-bit number + + * A vring address description + -------------------------------------------------------------- + | index | flags | size | descriptor | used | available | log | + -------------------------------------------------------------- + + Index: a 32-bit vring index + Flags: a 32-bit vring flags + Descriptor: a 64-bit user address of the vring descriptor table + Used: a 64-bit user address of the vring used ring + Available: a 64-bit user address of the vring available ring + Log: a 64-bit guest address for logging + + * Memory regions description + --------------------------------------------------- + | num regions | padding | region0 | ... | region7 | + --------------------------------------------------- + + Num regions: a 32-bit number of regions + Padding: 32-bit + + A region is: + --------------------------------------- + | guest address | size | user address | + --------------------------------------- + + Guest address: a 64-bit guest address of the region + Size: a 64-bit size + User address: a 64-bit user address + + +In QEMU the vhost-user message is implemented with the following struct: + +typedef struct VhostUserMsg { + VhostUserRequest request; + uint32_t flags; + uint32_t size; + union { + uint64_t u64; + struct vhost_vring_state state; + struct vhost_vring_addr addr; + VhostUserMemory memory; + }; +} QEMU_PACKED VhostUserMsg; + +Communication +------------- + +The protocol for vhost-user is based on the existing implementation of vhost +for the Linux Kernel. Most messages that can be sent via the Unix domain socket +implementing vhost-user have an equivalent ioctl to the kernel implementation. + +The communication consists of master sending message requests and slave sending +message replies. Most of the requests don't require replies. Here is a list of +the ones that do: + + * VHOST_GET_FEATURES + * VHOST_GET_VRING_BASE + +There are several messages that the master sends with file descriptors passed +in the ancillary data: + + * VHOST_SET_MEM_TABLE + * VHOST_SET_LOG_FD + * VHOST_SET_VRING_KICK + * VHOST_SET_VRING_CALL + * VHOST_SET_VRING_ERR + +If Master is unable to send the full message or receives a wrong reply it will +close the connection. An optional reconnection mechanism can be implemented. + +Message types +------------- + + * VHOST_USER_GET_FEATURES + + Id: 2 + Equivalent ioctl: VHOST_GET_FEATURES + Master payload: N/A + Slave payload: u64 + + Get from the underlying vhost implementation the features bitmask. + + * VHOST_USER_SET_FEATURES + + Id: 3 + Ioctl: VHOST_SET_FEATURES + Master payload: u64 + + Enable features in the underlying vhost implementation using a bitmask. + + * VHOST_USER_SET_OWNER + + Id: 4 + Equivalent ioctl: VHOST_SET_OWNER + Master payload: N/A + + Issued when a new connection is established. It sets the current Master + as an owner of the session. This can be used on the Slave as a + "session start" flag. + + * VHOST_USER_RESET_OWNER + + Id: 5 + Equivalent ioctl: VHOST_RESET_OWNER + Master payload: N/A + + Issued when a new connection is about to be closed. The Master will no + longer own this connection (and will usually close it). + + * VHOST_USER_SET_MEM_TABLE + + Id: 6 + Equivalent ioctl: VHOST_SET_MEM_TABLE + Master payload: memory regions description + + Sets the memory map regions on the slave so it can translate the vring + addresses. In the ancillary data there is an array of file descriptors + for each memory mapped region. The size and ordering of the fds matches + the number and ordering of memory regions. + + * VHOST_USER_SET_LOG_BASE + + Id: 7 + Equivalent ioctl: VHOST_SET_LOG_BASE + Master payload: u64 + + Sets the logging base address. + + * VHOST_USER_SET_LOG_FD + + Id: 8 + Equivalent ioctl: VHOST_SET_LOG_FD + Master payload: N/A + + Sets the logging file descriptor, which is passed as ancillary data. + + * VHOST_USER_SET_VRING_NUM + + Id: 9 + Equivalent ioctl: VHOST_SET_VRING_NUM + Master payload: vring state description + + Sets the number of vrings for this owner. + + * VHOST_USER_SET_VRING_ADDR + + Id: 10 + Equivalent ioctl: VHOST_SET_VRING_ADDR + Master payload: vring address description + Slave payload: N/A + + Sets the addresses of the different aspects of the vring. + + * VHOST_USER_SET_VRING_BASE + + Id: 11 + Equivalent ioctl: VHOST_SET_VRING_BASE + Master payload: vring state description + + Sets the base offset in the available vring. + + * VHOST_USER_GET_VRING_BASE + + Id: 12 + Equivalent ioctl: VHOST_USER_GET_VRING_BASE + Master payload: vring state description + Slave payload: vring state description + + Get the available vring base offset. + + * VHOST_USER_SET_VRING_KICK + + Id: 13 + Equivalent ioctl: VHOST_SET_VRING_KICK + Master payload: u64 + + Set the event file descriptor for adding buffers to the vring. It + is passed in the ancillary data. + Bits (0-7) of the payload contain the vring index. Bit 8 is the + invalid FD flag. This flag is set when there is no file descriptor + in the ancillary data. This signals that polling should be used + instead of waiting for a kick. + + * VHOST_USER_SET_VRING_CALL + + Id: 14 + Equivalent ioctl: VHOST_SET_VRING_CALL + Master payload: u64 + + Set the event file descriptor to signal when buffers are used. It + is passed in the ancillary data. + Bits (0-7) of the payload contain the vring index. Bit 8 is the + invalid FD flag. This flag is set when there is no file descriptor + in the ancillary data. This signals that polling will be used + instead of waiting for the call. + + * VHOST_USER_SET_VRING_ERR + + Id: 15 + Equivalent ioctl: VHOST_SET_VRING_ERR + Master payload: u64 + + Set the event file descriptor to signal when error occurs. It + is passed in the ancillary data. + Bits (0-7) of the payload contain the vring index. Bit 8 is the + invalid FD flag. This flag is set when there is no file descriptor + in the ancillary data. @@ -70,6 +70,12 @@ AddressSpace address_space_memory; MemoryRegion io_mem_rom, io_mem_notdirty; static MemoryRegion io_mem_unassigned; +/* RAM is pre-allocated and passed into qemu_ram_alloc_from_ptr */ +#define RAM_PREALLOC (1 << 0) + +/* RAM is mmap-ed with MAP_SHARED */ +#define RAM_SHARED (1 << 1) + #endif struct CPUTailQ cpus = QTAILQ_HEAD_INITIALIZER(cpus); @@ -1011,16 +1017,10 @@ static long gethugepagesize(const char *path) return fs.f_bsize; } -static sigjmp_buf sigjump; - -static void sigbus_handler(int signal) -{ - siglongjmp(sigjump, 1); -} - static void *file_ram_alloc(RAMBlock *block, ram_addr_t memory, - const char *path) + const char *path, + Error **errp) { char *filename; char *sanitized_name; @@ -1039,7 +1039,8 @@ static void *file_ram_alloc(RAMBlock *block, } if (kvm_enabled() && !kvm_has_sync_mmu()) { - fprintf(stderr, "host lacks kvm mmu notifiers, -mem-path unsupported\n"); + error_setg(errp, + "host lacks kvm mmu notifiers, -mem-path unsupported"); goto error; } @@ -1056,7 +1057,8 @@ static void *file_ram_alloc(RAMBlock *block, fd = mkstemp(filename); if (fd < 0) { - perror("unable to create backing store for hugepages"); + error_setg_errno(errp, errno, + "unable to create backing store for hugepages"); g_free(filename); goto error; } @@ -1071,53 +1073,22 @@ static void *file_ram_alloc(RAMBlock *block, * If anything goes wrong with it under other filesystems, * mmap will fail. */ - if (ftruncate(fd, memory)) + if (ftruncate(fd, memory)) { perror("ftruncate"); + } - area = mmap(0, memory, PROT_READ | PROT_WRITE, MAP_PRIVATE, fd, 0); + area = mmap(0, memory, PROT_READ | PROT_WRITE, + (block->flags & RAM_SHARED ? MAP_SHARED : MAP_PRIVATE), + fd, 0); if (area == MAP_FAILED) { - perror("file_ram_alloc: can't mmap RAM pages"); + error_setg_errno(errp, errno, + "unable to map backing store for hugepages"); close(fd); goto error; } if (mem_prealloc) { - int ret, i; - struct sigaction act, oldact; - sigset_t set, oldset; - - memset(&act, 0, sizeof(act)); - act.sa_handler = &sigbus_handler; - act.sa_flags = 0; - - ret = sigaction(SIGBUS, &act, &oldact); - if (ret) { - perror("file_ram_alloc: failed to install signal handler"); - exit(1); - } - - /* unblock SIGBUS */ - sigemptyset(&set); - sigaddset(&set, SIGBUS); - pthread_sigmask(SIG_UNBLOCK, &set, &oldset); - - if (sigsetjmp(sigjump, 1)) { - fprintf(stderr, "file_ram_alloc: failed to preallocate pages\n"); - exit(1); - } - - /* MAP_POPULATE silently ignores failures */ - for (i = 0; i < (memory/hpagesize); i++) { - memset(area + (hpagesize*i), 0, 1); - } - - ret = sigaction(SIGBUS, &oldact, NULL); - if (ret) { - perror("file_ram_alloc: failed to reinstall signal handler"); - exit(1); - } - - pthread_sigmask(SIG_SETMASK, &oldset, NULL); + os_mem_prealloc(fd, area, memory); } block->fd = fd; @@ -1129,14 +1100,6 @@ error: } return NULL; } -#else -static void *file_ram_alloc(RAMBlock *block, - ram_addr_t memory, - const char *path) -{ - fprintf(stderr, "-mem-path not supported on this host\n"); - exit(1); -} #endif static ram_addr_t find_ram_offset(ram_addr_t size) @@ -1201,17 +1164,24 @@ static void qemu_ram_setup_dump(void *addr, ram_addr_t size) } } -void qemu_ram_set_idstr(ram_addr_t addr, const char *name, DeviceState *dev) +static RAMBlock *find_ram_block(ram_addr_t addr) { - RAMBlock *new_block, *block; + RAMBlock *block; - new_block = NULL; QTAILQ_FOREACH(block, &ram_list.blocks, next) { if (block->offset == addr) { - new_block = block; - break; + return block; } } + + return NULL; +} + +void qemu_ram_set_idstr(ram_addr_t addr, const char *name, DeviceState *dev) +{ + RAMBlock *new_block = find_ram_block(addr); + RAMBlock *block; + assert(new_block); assert(!new_block->idstr[0]); @@ -1236,6 +1206,15 @@ void qemu_ram_set_idstr(ram_addr_t addr, const char *name, DeviceState *dev) qemu_mutex_unlock_ramlist(); } +void qemu_ram_unset_idstr(ram_addr_t addr) +{ + RAMBlock *block = find_ram_block(addr); + + if (block) { + memset(block->idstr, 0, sizeof(block->idstr)); + } +} + static int memory_try_enable_merging(void *addr, size_t len) { if (!qemu_opt_get_bool(qemu_get_machine_opts(), "mem-merge", true)) { @@ -1246,56 +1225,30 @@ static int memory_try_enable_merging(void *addr, size_t len) return qemu_madvise(addr, len, QEMU_MADV_MERGEABLE); } -ram_addr_t qemu_ram_alloc_from_ptr(ram_addr_t size, void *host, - MemoryRegion *mr) +static ram_addr_t ram_block_add(RAMBlock *new_block) { - RAMBlock *block, *new_block; + RAMBlock *block; ram_addr_t old_ram_size, new_ram_size; old_ram_size = last_ram_offset() >> TARGET_PAGE_BITS; - size = TARGET_PAGE_ALIGN(size); - new_block = g_malloc0(sizeof(*new_block)); - new_block->fd = -1; - /* This assumes the iothread lock is taken here too. */ qemu_mutex_lock_ramlist(); - new_block->mr = mr; - new_block->offset = find_ram_offset(size); - if (host) { - new_block->host = host; - new_block->flags |= RAM_PREALLOC_MASK; - } else if (xen_enabled()) { - if (mem_path) { - fprintf(stderr, "-mem-path not supported with Xen\n"); - exit(1); - } - xen_ram_alloc(new_block->offset, size, mr); - } else { - if (mem_path) { - if (phys_mem_alloc != qemu_anon_ram_alloc) { - /* - * file_ram_alloc() needs to allocate just like - * phys_mem_alloc, but we haven't bothered to provide - * a hook there. - */ - fprintf(stderr, - "-mem-path not supported with this accelerator\n"); - exit(1); - } - new_block->host = file_ram_alloc(new_block, size, mem_path); - } - if (!new_block->host) { - new_block->host = phys_mem_alloc(size); + new_block->offset = find_ram_offset(new_block->length); + + if (!new_block->host) { + if (xen_enabled()) { + xen_ram_alloc(new_block->offset, new_block->length, new_block->mr); + } else { + new_block->host = phys_mem_alloc(new_block->length); if (!new_block->host) { fprintf(stderr, "Cannot set up guest memory '%s': %s\n", new_block->mr->name, strerror(errno)); exit(1); } - memory_try_enable_merging(new_block->host, size); + memory_try_enable_merging(new_block->host, new_block->length); } } - new_block->length = size; /* Keep the list sorted from biggest to smallest block. */ QTAILQ_FOREACH(block, &ram_list.blocks, next) { @@ -1323,18 +1276,75 @@ ram_addr_t qemu_ram_alloc_from_ptr(ram_addr_t size, void *host, old_ram_size, new_ram_size); } } - cpu_physical_memory_set_dirty_range(new_block->offset, size); + cpu_physical_memory_set_dirty_range(new_block->offset, new_block->length); - qemu_ram_setup_dump(new_block->host, size); - qemu_madvise(new_block->host, size, QEMU_MADV_HUGEPAGE); - qemu_madvise(new_block->host, size, QEMU_MADV_DONTFORK); + qemu_ram_setup_dump(new_block->host, new_block->length); + qemu_madvise(new_block->host, new_block->length, QEMU_MADV_HUGEPAGE); + qemu_madvise(new_block->host, new_block->length, QEMU_MADV_DONTFORK); - if (kvm_enabled()) - kvm_setup_guest_memory(new_block->host, size); + if (kvm_enabled()) { + kvm_setup_guest_memory(new_block->host, new_block->length); + } return new_block->offset; } +#ifdef __linux__ +ram_addr_t qemu_ram_alloc_from_file(ram_addr_t size, MemoryRegion *mr, + bool share, const char *mem_path, + Error **errp) +{ + RAMBlock *new_block; + + if (xen_enabled()) { + error_setg(errp, "-mem-path not supported with Xen"); + return -1; + } + + if (phys_mem_alloc != qemu_anon_ram_alloc) { + /* + * file_ram_alloc() needs to allocate just like + * phys_mem_alloc, but we haven't bothered to provide + * a hook there. + */ + error_setg(errp, + "-mem-path not supported with this accelerator"); + return -1; + } + + size = TARGET_PAGE_ALIGN(size); + new_block = g_malloc0(sizeof(*new_block)); + new_block->mr = mr; + new_block->length = size; + new_block->flags = share ? RAM_SHARED : 0; + new_block->host = file_ram_alloc(new_block, size, + mem_path, errp); + if (!new_block->host) { + g_free(new_block); + return -1; + } + + return ram_block_add(new_block); +} +#endif + +ram_addr_t qemu_ram_alloc_from_ptr(ram_addr_t size, void *host, + MemoryRegion *mr) +{ + RAMBlock *new_block; + + size = TARGET_PAGE_ALIGN(size); + new_block = g_malloc0(sizeof(*new_block)); + new_block->mr = mr; + new_block->length = size; + new_block->fd = -1; + new_block->host = host; + if (host) { + new_block->flags |= RAM_PREALLOC; + } + return ram_block_add(new_block); +} + ram_addr_t qemu_ram_alloc(ram_addr_t size, MemoryRegion *mr) { return qemu_ram_alloc_from_ptr(size, NULL, mr); @@ -1369,7 +1379,7 @@ void qemu_ram_free(ram_addr_t addr) QTAILQ_REMOVE(&ram_list.blocks, block, next); ram_list.mru_block = NULL; ram_list.version++; - if (block->flags & RAM_PREALLOC_MASK) { + if (block->flags & RAM_PREALLOC) { ; } else if (xen_enabled()) { xen_invalidate_map_cache_entry(block->host); @@ -1401,7 +1411,7 @@ void qemu_ram_remap(ram_addr_t addr, ram_addr_t length) offset = addr - block->offset; if (offset < block->length) { vaddr = block->host + offset; - if (block->flags & RAM_PREALLOC_MASK) { + if (block->flags & RAM_PREALLOC) { ; } else if (xen_enabled()) { abort(); @@ -1409,12 +1419,8 @@ void qemu_ram_remap(ram_addr_t addr, ram_addr_t length) flags = MAP_FIXED; munmap(vaddr, length); if (block->fd >= 0) { -#ifdef MAP_POPULATE - flags |= mem_prealloc ? MAP_POPULATE | MAP_SHARED : - MAP_PRIVATE; -#else - flags |= MAP_PRIVATE; -#endif + flags |= (block->flags & RAM_SHARED ? + MAP_SHARED : MAP_PRIVATE); area = mmap(vaddr, length, PROT_READ | PROT_WRITE, flags, block->fd, offset); } else { @@ -1444,6 +1450,13 @@ void qemu_ram_remap(ram_addr_t addr, ram_addr_t length) } #endif /* !_WIN32 */ +int qemu_get_ram_fd(ram_addr_t addr) +{ + RAMBlock *block = qemu_get_ram_block(addr); + + return block->fd; +} + /* Return a host pointer to ram allocated with qemu_ram_alloc. With the exception of the softmmu code in this file, this should only be used for local memory (e.g. video ram) that the device owns, @@ -1760,10 +1773,12 @@ static subpage_t *subpage_init(AddressSpace *as, hwaddr base) return mmio; } -static uint16_t dummy_section(PhysPageMap *map, MemoryRegion *mr) +static uint16_t dummy_section(PhysPageMap *map, AddressSpace *as, + MemoryRegion *mr) { + assert(as); MemoryRegionSection section = { - .address_space = &address_space_memory, + .address_space = as, .mr = mr, .offset_within_address_space = 0, .offset_within_region = 0, @@ -1795,13 +1810,13 @@ static void mem_begin(MemoryListener *listener) AddressSpaceDispatch *d = g_new0(AddressSpaceDispatch, 1); uint16_t n; - n = dummy_section(&d->map, &io_mem_unassigned); + n = dummy_section(&d->map, as, &io_mem_unassigned); assert(n == PHYS_SECTION_UNASSIGNED); - n = dummy_section(&d->map, &io_mem_notdirty); + n = dummy_section(&d->map, as, &io_mem_notdirty); assert(n == PHYS_SECTION_NOTDIRTY); - n = dummy_section(&d->map, &io_mem_rom); + n = dummy_section(&d->map, as, &io_mem_rom); assert(n == PHYS_SECTION_ROM); - n = dummy_section(&d->map, &io_mem_watch); + n = dummy_section(&d->map, as, &io_mem_watch); assert(n == PHYS_SECTION_WATCH); d->phys_map = (PhysPageEntry) { .ptr = PHYS_MAP_NODE_NIL, .skip = 1 }; diff --git a/hmp-commands.hx b/hmp-commands.hx index 5f1a677b85..d0943b1ff3 100644 --- a/hmp-commands.hx +++ b/hmp-commands.hx @@ -1211,7 +1211,7 @@ ETEXI { .name = "host_net_add", .args_type = "device:s,opts:s?", - .params = "tap|user|socket|vde|netmap|bridge|dump [options]", + .params = "tap|user|socket|vde|netmap|bridge|vhost-user|dump [options]", .help = "add host VLAN client", .mhandler.cmd = net_host_device_add, .command_completion = host_net_add_completion, @@ -1241,7 +1241,7 @@ ETEXI { .name = "netdev_add", .args_type = "netdev:O", - .params = "[user|tap|socket|vde|bridge|hubport|netmap],id=str[,prop=value][,...]", + .params = "[user|tap|socket|vde|bridge|hubport|netmap|vhost-user],id=str[,prop=value][,...]", .help = "add host network device", .mhandler.cmd = hmp_netdev_add, .command_completion = netdev_add_completion, @@ -22,6 +22,8 @@ #include "qemu/sockets.h" #include "monitor/monitor.h" #include "qapi/opts-visitor.h" +#include "qapi/string-output-visitor.h" +#include "qapi-visit.h" #include "ui/console.h" #include "block/qapi.h" #include "qemu-io.h" @@ -1676,3 +1678,37 @@ void hmp_object_del(Monitor *mon, const QDict *qdict) qmp_object_del(id, &err); hmp_handle_error(mon, &err); } + +void hmp_info_memdev(Monitor *mon, const QDict *qdict) +{ + Error *err = NULL; + MemdevList *memdev_list = qmp_query_memdev(&err); + MemdevList *m = memdev_list; + StringOutputVisitor *ov; + int i = 0; + + + while (m) { + ov = string_output_visitor_new(false); + visit_type_uint16List(string_output_get_visitor(ov), + &m->value->host_nodes, NULL, NULL); + monitor_printf(mon, "memory device %d\n", i); + monitor_printf(mon, " size: %" PRId64 "\n", m->value->size); + monitor_printf(mon, " merge: %s\n", + m->value->merge ? "true" : "false"); + monitor_printf(mon, " dump: %s\n", + m->value->dump ? "true" : "false"); + monitor_printf(mon, " prealloc: %s\n", + m->value->prealloc ? "true" : "false"); + monitor_printf(mon, " policy: %s\n", + HostMemPolicy_lookup[m->value->policy]); + monitor_printf(mon, " host nodes: %s\n", + string_output_get_string(ov)); + + string_output_visitor_cleanup(ov); + m = m->next; + i++; + } + + monitor_printf(mon, "\n"); +} @@ -93,6 +93,7 @@ void hmp_qemu_io(Monitor *mon, const QDict *qdict); void hmp_cpu_add(Monitor *mon, const QDict *qdict); void hmp_object_add(Monitor *mon, const QDict *qdict); void hmp_object_del(Monitor *mon, const QDict *qdict); +void hmp_info_memdev(Monitor *mon, const QDict *qdict); void object_add_completion(ReadLineState *rs, int nb_args, const char *str); void object_del_completion(ReadLineState *rs, int nb_args, const char *str); void device_add_completion(ReadLineState *rs, int nb_args, const char *str); diff --git a/hw/9pfs/virtio-9p.c b/hw/9pfs/virtio-9p.c index 9aa6725f09..5861a5b826 100644 --- a/hw/9pfs/virtio-9p.c +++ b/hw/9pfs/virtio-9p.c @@ -299,9 +299,7 @@ static int v9fs_xattr_fid_clunk(V9fsPDU *pdu, V9fsFidState *fidp) free_out: v9fs_string_free(&fidp->fs.xattr.name); free_value: - if (fidp->fs.xattr.value) { - g_free(fidp->fs.xattr.value); - } + g_free(fidp->fs.xattr.value); return retval; } diff --git a/hw/Makefile.objs b/hw/Makefile.objs index d178b65de4..52a1464051 100644 --- a/hw/Makefile.objs +++ b/hw/Makefile.objs @@ -29,6 +29,7 @@ devices-dirs-$(CONFIG_SOFTMMU) += usb/ devices-dirs-$(CONFIG_VIRTIO) += virtio/ devices-dirs-$(CONFIG_SOFTMMU) += watchdog/ devices-dirs-$(CONFIG_SOFTMMU) += xen/ +devices-dirs-$(CONFIG_MEM_HOTPLUG) += mem/ devices-dirs-y += core/ common-obj-y += $(devices-dirs-y) obj-y += $(devices-dirs-y) diff --git a/hw/acpi/Makefile.objs b/hw/acpi/Makefile.objs index 397d32babd..acd2389431 100644 --- a/hw/acpi/Makefile.objs +++ b/hw/acpi/Makefile.objs @@ -1 +1,3 @@ common-obj-$(CONFIG_ACPI) += core.o piix4.o ich9.o pcihp.o cpu_hotplug.o +common-obj-$(CONFIG_ACPI) += memory_hotplug.o +common-obj-$(CONFIG_ACPI) += acpi_interface.o diff --git a/hw/acpi/acpi_interface.c b/hw/acpi/acpi_interface.c new file mode 100644 index 0000000000..c181bb2262 --- /dev/null +++ b/hw/acpi/acpi_interface.c @@ -0,0 +1,15 @@ +#include "hw/acpi/acpi_dev_interface.h" +#include "qemu/module.h" + +static void register_types(void) +{ + static const TypeInfo acpi_dev_if_info = { + .name = TYPE_ACPI_DEVICE_IF, + .parent = TYPE_INTERFACE, + .class_size = sizeof(AcpiDeviceIfClass), + }; + + type_register_static(&acpi_dev_if_info); +} + +type_init(register_types) diff --git a/hw/acpi/ich9.c b/hw/acpi/ich9.c index 407ae8900c..e7d6c77b34 100644 --- a/hw/acpi/ich9.c +++ b/hw/acpi/ich9.c @@ -34,6 +34,7 @@ #include "exec/address-spaces.h" #include "hw/i386/ich9.h" +#include "hw/mem/pc-dimm.h" //#define DEBUG @@ -139,6 +140,23 @@ static int ich9_pm_post_load(void *opaque, int version_id) .offset = vmstate_offset_pointer(_state, _field, uint8_t), \ } +static bool vmstate_test_use_memhp(void *opaque) +{ + ICH9LPCPMRegs *s = opaque; + return s->acpi_memory_hotplug.is_enabled; +} + +static const VMStateDescription vmstate_memhp_state = { + .name = "ich9_pm/memhp", + .version_id = 1, + .minimum_version_id = 1, + .minimum_version_id_old = 1, + .fields = (VMStateField[]) { + VMSTATE_MEMORY_HOTPLUG(acpi_memory_hotplug, ICH9LPCPMRegs), + VMSTATE_END_OF_LIST() + } +}; + const VMStateDescription vmstate_ich9_pm = { .name = "ich9_pm", .version_id = 1, @@ -155,6 +173,13 @@ const VMStateDescription vmstate_ich9_pm = { VMSTATE_UINT32(smi_en, ICH9LPCPMRegs), VMSTATE_UINT32(smi_sts, ICH9LPCPMRegs), VMSTATE_END_OF_LIST() + }, + .subsections = (VMStateSubsection[]) { + { + .vmsd = &vmstate_memhp_state, + .needed = vmstate_test_use_memhp, + }, + VMSTATE_END_OF_LIST() } }; @@ -223,6 +248,11 @@ void ich9_pm_init(PCIDevice *lpc_pci, ICH9LPCPMRegs *pm, &pm->gpe_cpu, ICH9_CPU_HOTPLUG_IO_BASE); pm->cpu_added_notifier.notify = ich9_cpu_added_req; qemu_register_cpu_added_notifier(&pm->cpu_added_notifier); + + if (pm->acpi_memory_hotplug.is_enabled) { + acpi_memory_hotplug_init(pci_address_space_io(lpc_pci), OBJECT(lpc_pci), + &pm->acpi_memory_hotplug); + } } static void ich9_pm_get_gpe0_blk(Object *obj, Visitor *v, @@ -235,9 +265,25 @@ static void ich9_pm_get_gpe0_blk(Object *obj, Visitor *v, visit_type_uint32(v, &value, name, errp); } +static bool ich9_pm_get_memory_hotplug_support(Object *obj, Error **errp) +{ + ICH9LPCState *s = ICH9_LPC_DEVICE(obj); + + return s->pm.acpi_memory_hotplug.is_enabled; +} + +static void ich9_pm_set_memory_hotplug_support(Object *obj, bool value, + Error **errp) +{ + ICH9LPCState *s = ICH9_LPC_DEVICE(obj); + + s->pm.acpi_memory_hotplug.is_enabled = value; +} + void ich9_pm_add_properties(Object *obj, ICH9LPCPMRegs *pm, Error **errp) { static const uint32_t gpe0_len = ICH9_PMIO_GPE0_LEN; + pm->acpi_memory_hotplug.is_enabled = true; object_property_add_uint32_ptr(obj, ACPI_PM_PROP_PM_IO_BASE, &pm->pm_io_base, errp); @@ -246,4 +292,27 @@ void ich9_pm_add_properties(Object *obj, ICH9LPCPMRegs *pm, Error **errp) NULL, NULL, pm, NULL); object_property_add_uint32_ptr(obj, ACPI_PM_PROP_GPE0_BLK_LEN, &gpe0_len, errp); + object_property_add_bool(obj, "memory-hotplug-support", + ich9_pm_get_memory_hotplug_support, + ich9_pm_set_memory_hotplug_support, + NULL); +} + +void ich9_pm_device_plug_cb(ICH9LPCPMRegs *pm, DeviceState *dev, Error **errp) +{ + if (pm->acpi_memory_hotplug.is_enabled && + object_dynamic_cast(OBJECT(dev), TYPE_PC_DIMM)) { + acpi_memory_plug_cb(&pm->acpi_regs, pm->irq, &pm->acpi_memory_hotplug, + dev, errp); + } else { + error_setg(errp, "acpi: device plug request for not supported device" + " type: %s", object_get_typename(OBJECT(dev))); + } +} + +void ich9_pm_ospm_status(AcpiDeviceIf *adev, ACPIOSTInfoList ***list) +{ + ICH9LPCState *s = ICH9_LPC_DEVICE(adev); + + acpi_memory_ospm_status(&s->pm.acpi_memory_hotplug, list); } diff --git a/hw/acpi/memory_hotplug.c b/hw/acpi/memory_hotplug.c new file mode 100644 index 0000000000..de4ddc204f --- /dev/null +++ b/hw/acpi/memory_hotplug.c @@ -0,0 +1,245 @@ +#include "hw/acpi/memory_hotplug.h" +#include "hw/acpi/pc-hotplug.h" +#include "hw/mem/pc-dimm.h" +#include "hw/boards.h" +#include "trace.h" +#include "qapi-visit.h" +#include "monitor/monitor.h" +#include "qapi/dealloc-visitor.h" +#include "qapi/qmp-output-visitor.h" + +static ACPIOSTInfo *acpi_memory_device_status(int slot, MemStatus *mdev) +{ + ACPIOSTInfo *info = g_new0(ACPIOSTInfo, 1); + + info->slot_type = ACPI_SLOT_TYPE_DIMM; + info->slot = g_strdup_printf("%d", slot); + info->source = mdev->ost_event; + info->status = mdev->ost_status; + if (mdev->dimm) { + DeviceState *dev = DEVICE(mdev->dimm); + if (dev->id) { + info->device = g_strdup(dev->id); + info->has_device = true; + } + } + return info; +} + +void acpi_memory_ospm_status(MemHotplugState *mem_st, ACPIOSTInfoList ***list) +{ + int i; + + for (i = 0; i < mem_st->dev_count; i++) { + ACPIOSTInfoList *elem = g_new0(ACPIOSTInfoList, 1); + elem->value = acpi_memory_device_status(i, &mem_st->devs[i]); + elem->next = NULL; + **list = elem; + *list = &elem->next; + } +} + +static void acpi_memory_ost_mon_event(const MemHotplugState *mem_st) +{ + Visitor *v; + QObject *out_info; + QapiDeallocVisitor *md; + QmpOutputVisitor *mo = qmp_output_visitor_new(); + MemStatus *mdev = &mem_st->devs[mem_st->selector]; + ACPIOSTInfo *info = acpi_memory_device_status(mem_st->selector, mdev); + + v = qmp_output_get_visitor(mo); + visit_type_ACPIOSTInfo(v, &info, "unused", NULL); + + out_info = qmp_output_get_qobject(mo); + monitor_protocol_event(QEVENT_ACPI_OST, out_info); + qobject_decref(out_info); + + qmp_output_visitor_cleanup(mo); + md = qapi_dealloc_visitor_new(); + v = qapi_dealloc_get_visitor(md); + visit_type_ACPIOSTInfo(v, &info, "unused", NULL); + qapi_dealloc_visitor_cleanup(md); +} + +static uint64_t acpi_memory_hotplug_read(void *opaque, hwaddr addr, + unsigned int size) +{ + uint32_t val = 0; + MemHotplugState *mem_st = opaque; + MemStatus *mdev; + Object *o; + + if (mem_st->selector >= mem_st->dev_count) { + trace_mhp_acpi_invalid_slot_selected(mem_st->selector); + return 0; + } + + mdev = &mem_st->devs[mem_st->selector]; + o = OBJECT(mdev->dimm); + switch (addr) { + case 0x0: /* Lo part of phys address where DIMM is mapped */ + val = o ? object_property_get_int(o, PC_DIMM_ADDR_PROP, NULL) : 0; + trace_mhp_acpi_read_addr_lo(mem_st->selector, val); + break; + case 0x4: /* Hi part of phys address where DIMM is mapped */ + val = o ? object_property_get_int(o, PC_DIMM_ADDR_PROP, NULL) >> 32 : 0; + trace_mhp_acpi_read_addr_hi(mem_st->selector, val); + break; + case 0x8: /* Lo part of DIMM size */ + val = o ? object_property_get_int(o, PC_DIMM_SIZE_PROP, NULL) : 0; + trace_mhp_acpi_read_size_lo(mem_st->selector, val); + break; + case 0xc: /* Hi part of DIMM size */ + val = o ? object_property_get_int(o, PC_DIMM_SIZE_PROP, NULL) >> 32 : 0; + trace_mhp_acpi_read_size_hi(mem_st->selector, val); + break; + case 0x10: /* node proximity for _PXM method */ + val = o ? object_property_get_int(o, PC_DIMM_NODE_PROP, NULL) : 0; + trace_mhp_acpi_read_pxm(mem_st->selector, val); + break; + case 0x14: /* pack and return is_* fields */ + val |= mdev->is_enabled ? 1 : 0; + val |= mdev->is_inserting ? 2 : 0; + trace_mhp_acpi_read_flags(mem_st->selector, val); + break; + default: + val = ~0; + break; + } + return val; +} + +static void acpi_memory_hotplug_write(void *opaque, hwaddr addr, uint64_t data, + unsigned int size) +{ + MemHotplugState *mem_st = opaque; + MemStatus *mdev; + + if (!mem_st->dev_count) { + return; + } + + if (addr) { + if (mem_st->selector >= mem_st->dev_count) { + trace_mhp_acpi_invalid_slot_selected(mem_st->selector); + return; + } + } + + switch (addr) { + case 0x0: /* DIMM slot selector */ + mem_st->selector = data; + trace_mhp_acpi_write_slot(mem_st->selector); + break; + case 0x4: /* _OST event */ + mdev = &mem_st->devs[mem_st->selector]; + if (data == 1) { + /* TODO: handle device insert OST event */ + } else if (data == 3) { + /* TODO: handle device remove OST event */ + } + mdev->ost_event = data; + trace_mhp_acpi_write_ost_ev(mem_st->selector, mdev->ost_event); + break; + case 0x8: /* _OST status */ + mdev = &mem_st->devs[mem_st->selector]; + mdev->ost_status = data; + trace_mhp_acpi_write_ost_status(mem_st->selector, mdev->ost_status); + /* TODO: implement memory removal on guest signal */ + acpi_memory_ost_mon_event(mem_st); + break; + case 0x14: + mdev = &mem_st->devs[mem_st->selector]; + if (data & 2) { /* clear insert event */ + mdev->is_inserting = false; + trace_mhp_acpi_clear_insert_evt(mem_st->selector); + } + break; + } + +} +static const MemoryRegionOps acpi_memory_hotplug_ops = { + .read = acpi_memory_hotplug_read, + .write = acpi_memory_hotplug_write, + .endianness = DEVICE_LITTLE_ENDIAN, + .valid = { + .min_access_size = 1, + .max_access_size = 4, + }, +}; + +void acpi_memory_hotplug_init(MemoryRegion *as, Object *owner, + MemHotplugState *state) +{ + MachineState *machine = MACHINE(qdev_get_machine()); + + state->dev_count = machine->ram_slots; + if (!state->dev_count) { + return; + } + + state->devs = g_malloc0(sizeof(*state->devs) * state->dev_count); + memory_region_init_io(&state->io, owner, &acpi_memory_hotplug_ops, state, + "apci-mem-hotplug", ACPI_MEMORY_HOTPLUG_IO_LEN); + memory_region_add_subregion(as, ACPI_MEMORY_HOTPLUG_BASE, &state->io); +} + +void acpi_memory_plug_cb(ACPIREGS *ar, qemu_irq irq, MemHotplugState *mem_st, + DeviceState *dev, Error **errp) +{ + MemStatus *mdev; + Error *local_err = NULL; + int slot = object_property_get_int(OBJECT(dev), "slot", &local_err); + + if (local_err) { + error_propagate(errp, local_err); + return; + } + + if (slot >= mem_st->dev_count) { + char *dev_path = object_get_canonical_path(OBJECT(dev)); + error_setg(errp, "acpi_memory_plug_cb: " + "device [%s] returned invalid memory slot[%d]", + dev_path, slot); + g_free(dev_path); + return; + } + + mdev = &mem_st->devs[slot]; + mdev->dimm = dev; + mdev->is_enabled = true; + mdev->is_inserting = true; + + /* do ACPI magic */ + ar->gpe.sts[0] |= ACPI_MEMORY_HOTPLUG_STATUS; + acpi_update_sci(ar, irq); + return; +} + +static const VMStateDescription vmstate_memhp_sts = { + .name = "memory hotplug device state", + .version_id = 1, + .minimum_version_id = 1, + .minimum_version_id_old = 1, + .fields = (VMStateField[]) { + VMSTATE_BOOL(is_enabled, MemStatus), + VMSTATE_BOOL(is_inserting, MemStatus), + VMSTATE_UINT32(ost_event, MemStatus), + VMSTATE_UINT32(ost_status, MemStatus), + VMSTATE_END_OF_LIST() + } +}; + +const VMStateDescription vmstate_memory_hotplug = { + .name = "memory hotplug state", + .version_id = 1, + .minimum_version_id = 1, + .minimum_version_id_old = 1, + .fields = (VMStateField[]) { + VMSTATE_UINT32(selector, MemHotplugState), + VMSTATE_STRUCT_VARRAY_POINTER_UINT32(devs, MemHotplugState, dev_count, + vmstate_memhp_sts, MemStatus), + VMSTATE_END_OF_LIST() + } +}; diff --git a/hw/acpi/piix4.c b/hw/acpi/piix4.c index 252bbf2c77..b72b34e5c9 100644 --- a/hw/acpi/piix4.c +++ b/hw/acpi/piix4.c @@ -33,6 +33,9 @@ #include "hw/acpi/pcihp.h" #include "hw/acpi/cpu_hotplug.h" #include "hw/hotplug.h" +#include "hw/mem/pc-dimm.h" +#include "hw/acpi/memory_hotplug.h" +#include "hw/acpi/acpi_dev_interface.h" //#define DEBUG @@ -81,6 +84,8 @@ typedef struct PIIX4PMState { AcpiCpuHotplug gpe_cpu; Notifier cpu_added_notifier; + + MemHotplugState acpi_memory_hotplug; } PIIX4PMState; #define TYPE_PIIX4_PM "PIIX4_PM" @@ -244,6 +249,23 @@ static bool vmstate_test_no_use_acpi_pci_hotplug(void *opaque, int version_id) return !s->use_acpi_pci_hotplug; } +static bool vmstate_test_use_memhp(void *opaque) +{ + PIIX4PMState *s = opaque; + return s->acpi_memory_hotplug.is_enabled; +} + +static const VMStateDescription vmstate_memhp_state = { + .name = "piix4_pm/memhp", + .version_id = 1, + .minimum_version_id = 1, + .minimum_version_id_old = 1, + .fields = (VMStateField[]) { + VMSTATE_MEMORY_HOTPLUG(acpi_memory_hotplug, PIIX4PMState), + VMSTATE_END_OF_LIST() + } +}; + /* qemu-kvm 1.2 uses version 3 but advertised as 2 * To support incoming qemu-kvm 1.2 migration, change version_id * and minimum_version_id to 2 below (which breaks migration from @@ -275,6 +297,13 @@ static const VMStateDescription vmstate_acpi = { VMSTATE_PCI_HOTPLUG(acpi_pci_hotplug, PIIX4PMState, vmstate_test_use_acpi_pci_hotplug), VMSTATE_END_OF_LIST() + }, + .subsections = (VMStateSubsection[]) { + { + .vmsd = &vmstate_memhp_state, + .needed = vmstate_test_use_memhp, + }, + VMSTATE_END_OF_LIST() } }; @@ -308,19 +337,35 @@ static void piix4_pm_powerdown_req(Notifier *n, void *opaque) acpi_pm1_evt_power_down(&s->ar); } -static void piix4_pci_device_plug_cb(HotplugHandler *hotplug_dev, - DeviceState *dev, Error **errp) +static void piix4_device_plug_cb(HotplugHandler *hotplug_dev, + DeviceState *dev, Error **errp) { PIIX4PMState *s = PIIX4_PM(hotplug_dev); - acpi_pcihp_device_plug_cb(&s->ar, s->irq, &s->acpi_pci_hotplug, dev, errp); + + if (s->acpi_memory_hotplug.is_enabled && + object_dynamic_cast(OBJECT(dev), TYPE_PC_DIMM)) { + acpi_memory_plug_cb(&s->ar, s->irq, &s->acpi_memory_hotplug, dev, errp); + } else if (object_dynamic_cast(OBJECT(dev), TYPE_PCI_DEVICE)) { + acpi_pcihp_device_plug_cb(&s->ar, s->irq, &s->acpi_pci_hotplug, dev, + errp); + } else { + error_setg(errp, "acpi: device plug request for not supported device" + " type: %s", object_get_typename(OBJECT(dev))); + } } -static void piix4_pci_device_unplug_cb(HotplugHandler *hotplug_dev, - DeviceState *dev, Error **errp) +static void piix4_device_unplug_cb(HotplugHandler *hotplug_dev, + DeviceState *dev, Error **errp) { PIIX4PMState *s = PIIX4_PM(hotplug_dev); - acpi_pcihp_device_unplug_cb(&s->ar, s->irq, &s->acpi_pci_hotplug, dev, - errp); + + if (object_dynamic_cast(OBJECT(dev), TYPE_PCI_DEVICE)) { + acpi_pcihp_device_unplug_cb(&s->ar, s->irq, &s->acpi_pci_hotplug, dev, + errp); + } else { + error_setg(errp, "acpi: device unplug request for not supported device" + " type: %s", object_get_typename(OBJECT(dev))); + } } static void piix4_update_bus_hotplug(PCIBus *pci_bus, void *opaque) @@ -439,13 +484,17 @@ Object *piix4_pm_find(void) I2CBus *piix4_pm_init(PCIBus *bus, int devfn, uint32_t smb_io_base, qemu_irq sci_irq, qemu_irq smi_irq, - int kvm_enabled, FWCfgState *fw_cfg) + int kvm_enabled, FWCfgState *fw_cfg, + DeviceState **piix4_pm) { DeviceState *dev; PIIX4PMState *s; dev = DEVICE(pci_create(bus, devfn, TYPE_PIIX4_PM)); qdev_prop_set_uint32(dev, "smb_io_base", smb_io_base); + if (piix4_pm) { + *piix4_pm = dev; + } s = PIIX4_PM(dev); s->irq = sci_irq; @@ -518,6 +567,17 @@ static void piix4_acpi_system_hot_add_init(MemoryRegion *parent, PIIX4_CPU_HOTPLUG_IO_BASE); s->cpu_added_notifier.notify = piix4_cpu_added_req; qemu_register_cpu_added_notifier(&s->cpu_added_notifier); + + if (s->acpi_memory_hotplug.is_enabled) { + acpi_memory_hotplug_init(parent, OBJECT(s), &s->acpi_memory_hotplug); + } +} + +static void piix4_ospm_status(AcpiDeviceIf *adev, ACPIOSTInfoList ***list) +{ + PIIX4PMState *s = PIIX4_PM(adev); + + acpi_memory_ospm_status(&s->acpi_memory_hotplug, list); } static Property piix4_pm_properties[] = { @@ -527,6 +587,8 @@ static Property piix4_pm_properties[] = { DEFINE_PROP_UINT8(ACPI_PM_PROP_S4_VAL, PIIX4PMState, s4_val, 2), DEFINE_PROP_BOOL("acpi-pci-hotplug-with-bridge-support", PIIX4PMState, use_acpi_pci_hotplug, true), + DEFINE_PROP_BOOL("memory-hotplug-support", PIIX4PMState, + acpi_memory_hotplug.is_enabled, true), DEFINE_PROP_END_OF_LIST(), }; @@ -535,6 +597,7 @@ static void piix4_pm_class_init(ObjectClass *klass, void *data) DeviceClass *dc = DEVICE_CLASS(klass); PCIDeviceClass *k = PCI_DEVICE_CLASS(klass); HotplugHandlerClass *hc = HOTPLUG_HANDLER_CLASS(klass); + AcpiDeviceIfClass *adevc = ACPI_DEVICE_IF_CLASS(klass); k->init = piix4_pm_initfn; k->config_write = pm_write_config; @@ -551,8 +614,9 @@ static void piix4_pm_class_init(ObjectClass *klass, void *data) */ dc->cannot_instantiate_with_device_add_yet = true; dc->hotpluggable = false; - hc->plug = piix4_pci_device_plug_cb; - hc->unplug = piix4_pci_device_unplug_cb; + hc->plug = piix4_device_plug_cb; + hc->unplug = piix4_device_unplug_cb; + adevc->ospm_status = piix4_ospm_status; } static const TypeInfo piix4_pm_info = { @@ -562,6 +626,7 @@ static const TypeInfo piix4_pm_info = { .class_init = piix4_pm_class_init, .interfaces = (InterfaceInfo[]) { { TYPE_HOTPLUG_HANDLER }, + { TYPE_ACPI_DEVICE_IF }, { } } }; diff --git a/hw/arm/spitz.c b/hw/arm/spitz.c index 5455dbf326..45e75081e8 100644 --- a/hw/arm/spitz.c +++ b/hw/arm/spitz.c @@ -285,9 +285,9 @@ static void spitz_keyboard_keydown(SpitzKeyboardState *s, int keycode) spitz_keyboard_sense_update(s); } -#define MOD_SHIFT (1 << 7) -#define MOD_CTRL (1 << 8) -#define MOD_FN (1 << 9) +#define SPITZ_MOD_SHIFT (1 << 7) +#define SPITZ_MOD_CTRL (1 << 8) +#define SPITZ_MOD_FN (1 << 9) #define QUEUE_KEY(c) s->fifo[(s->fifopos + s->fifolen ++) & 0xf] = c @@ -324,21 +324,26 @@ static void spitz_keyboard_handler(void *opaque, int keycode) } code = s->pre_map[mapcode = ((s->modifiers & 3) ? - (keycode | MOD_SHIFT) : - (keycode & ~MOD_SHIFT))]; + (keycode | SPITZ_MOD_SHIFT) : + (keycode & ~SPITZ_MOD_SHIFT))]; if (code != mapcode) { #if 0 - if ((code & MOD_SHIFT) && !(s->modifiers & 1)) + if ((code & SPITZ_MOD_SHIFT) && !(s->modifiers & 1)) { QUEUE_KEY(0x2a | (keycode & 0x80)); - if ((code & MOD_CTRL ) && !(s->modifiers & 4)) + } + if ((code & SPITZ_MOD_CTRL) && !(s->modifiers & 4)) { QUEUE_KEY(0x1d | (keycode & 0x80)); - if ((code & MOD_FN ) && !(s->modifiers & 8)) + } + if ((code & SPITZ_MOD_FN) && !(s->modifiers & 8)) { QUEUE_KEY(0x38 | (keycode & 0x80)); - if ((code & MOD_FN ) && (s->modifiers & 1)) + } + if ((code & SPITZ_MOD_FN) && (s->modifiers & 1)) { QUEUE_KEY(0x2a | (~keycode & 0x80)); - if ((code & MOD_FN ) && (s->modifiers & 2)) + } + if ((code & SPITZ_MOD_FN) && (s->modifiers & 2)) { QUEUE_KEY(0x36 | (~keycode & 0x80)); + } #else if (keycode & 0x80) { if ((s->imodifiers & 1 ) && !(s->modifiers & 1)) @@ -353,24 +358,27 @@ static void spitz_keyboard_handler(void *opaque, int keycode) QUEUE_KEY(0x36); s->imodifiers = 0; } else { - if ((code & MOD_SHIFT) && !((s->modifiers | s->imodifiers) & 1)) { + if ((code & SPITZ_MOD_SHIFT) && + !((s->modifiers | s->imodifiers) & 1)) { QUEUE_KEY(0x2a); s->imodifiers |= 1; } - if ((code & MOD_CTRL ) && !((s->modifiers | s->imodifiers) & 4)) { + if ((code & SPITZ_MOD_CTRL) && + !((s->modifiers | s->imodifiers) & 4)) { QUEUE_KEY(0x1d); s->imodifiers |= 4; } - if ((code & MOD_FN ) && !((s->modifiers | s->imodifiers) & 8)) { + if ((code & SPITZ_MOD_FN) && + !((s->modifiers | s->imodifiers) & 8)) { QUEUE_KEY(0x38); s->imodifiers |= 8; } - if ((code & MOD_FN ) && (s->modifiers & 1) && + if ((code & SPITZ_MOD_FN) && (s->modifiers & 1) && !(s->imodifiers & 0x10)) { QUEUE_KEY(0x2a | 0x80); s->imodifiers |= 0x10; } - if ((code & MOD_FN ) && (s->modifiers & 2) && + if ((code & SPITZ_MOD_FN) && (s->modifiers & 2) && !(s->imodifiers & 0x20)) { QUEUE_KEY(0x36 | 0x80); s->imodifiers |= 0x20; @@ -402,38 +410,38 @@ static void spitz_keyboard_pre_map(SpitzKeyboardState *s) int i; for (i = 0; i < 0x100; i ++) s->pre_map[i] = i; - s->pre_map[0x02 | MOD_SHIFT ] = 0x02 | MOD_SHIFT; /* exclam */ - s->pre_map[0x28 | MOD_SHIFT ] = 0x03 | MOD_SHIFT; /* quotedbl */ - s->pre_map[0x04 | MOD_SHIFT ] = 0x04 | MOD_SHIFT; /* numbersign */ - s->pre_map[0x05 | MOD_SHIFT ] = 0x05 | MOD_SHIFT; /* dollar */ - s->pre_map[0x06 | MOD_SHIFT ] = 0x06 | MOD_SHIFT; /* percent */ - s->pre_map[0x08 | MOD_SHIFT ] = 0x07 | MOD_SHIFT; /* ampersand */ - s->pre_map[0x28 ] = 0x08 | MOD_SHIFT; /* apostrophe */ - s->pre_map[0x0a | MOD_SHIFT ] = 0x09 | MOD_SHIFT; /* parenleft */ - s->pre_map[0x0b | MOD_SHIFT ] = 0x0a | MOD_SHIFT; /* parenright */ - s->pre_map[0x29 | MOD_SHIFT ] = 0x0b | MOD_SHIFT; /* asciitilde */ - s->pre_map[0x03 | MOD_SHIFT ] = 0x0c | MOD_SHIFT; /* at */ - s->pre_map[0xd3 ] = 0x0e | MOD_FN; /* Delete */ - s->pre_map[0x3a ] = 0x0f | MOD_FN; /* Caps_Lock */ - s->pre_map[0x07 | MOD_SHIFT ] = 0x11 | MOD_FN; /* asciicircum */ - s->pre_map[0x0d ] = 0x12 | MOD_FN; /* equal */ - s->pre_map[0x0d | MOD_SHIFT ] = 0x13 | MOD_FN; /* plus */ - s->pre_map[0x1a ] = 0x14 | MOD_FN; /* bracketleft */ - s->pre_map[0x1b ] = 0x15 | MOD_FN; /* bracketright */ - s->pre_map[0x1a | MOD_SHIFT ] = 0x16 | MOD_FN; /* braceleft */ - s->pre_map[0x1b | MOD_SHIFT ] = 0x17 | MOD_FN; /* braceright */ - s->pre_map[0x27 ] = 0x22 | MOD_FN; /* semicolon */ - s->pre_map[0x27 | MOD_SHIFT ] = 0x23 | MOD_FN; /* colon */ - s->pre_map[0x09 | MOD_SHIFT ] = 0x24 | MOD_FN; /* asterisk */ - s->pre_map[0x2b ] = 0x25 | MOD_FN; /* backslash */ - s->pre_map[0x2b | MOD_SHIFT ] = 0x26 | MOD_FN; /* bar */ - s->pre_map[0x0c | MOD_SHIFT ] = 0x30 | MOD_FN; /* underscore */ - s->pre_map[0x33 | MOD_SHIFT ] = 0x33 | MOD_FN; /* less */ - s->pre_map[0x35 ] = 0x33 | MOD_SHIFT; /* slash */ - s->pre_map[0x34 | MOD_SHIFT ] = 0x34 | MOD_FN; /* greater */ - s->pre_map[0x35 | MOD_SHIFT ] = 0x34 | MOD_SHIFT; /* question */ - s->pre_map[0x49 ] = 0x48 | MOD_FN; /* Page_Up */ - s->pre_map[0x51 ] = 0x50 | MOD_FN; /* Page_Down */ + s->pre_map[0x02 | SPITZ_MOD_SHIFT] = 0x02 | SPITZ_MOD_SHIFT; /* exclam */ + s->pre_map[0x28 | SPITZ_MOD_SHIFT] = 0x03 | SPITZ_MOD_SHIFT; /* quotedbl */ + s->pre_map[0x04 | SPITZ_MOD_SHIFT] = 0x04 | SPITZ_MOD_SHIFT; /* # */ + s->pre_map[0x05 | SPITZ_MOD_SHIFT] = 0x05 | SPITZ_MOD_SHIFT; /* dollar */ + s->pre_map[0x06 | SPITZ_MOD_SHIFT] = 0x06 | SPITZ_MOD_SHIFT; /* percent */ + s->pre_map[0x08 | SPITZ_MOD_SHIFT] = 0x07 | SPITZ_MOD_SHIFT; /* ampersand */ + s->pre_map[0x28] = 0x08 | SPITZ_MOD_SHIFT; /* ' */ + s->pre_map[0x0a | SPITZ_MOD_SHIFT] = 0x09 | SPITZ_MOD_SHIFT; /* ( */ + s->pre_map[0x0b | SPITZ_MOD_SHIFT] = 0x0a | SPITZ_MOD_SHIFT; /* ) */ + s->pre_map[0x29 | SPITZ_MOD_SHIFT] = 0x0b | SPITZ_MOD_SHIFT; /* tilde */ + s->pre_map[0x03 | SPITZ_MOD_SHIFT] = 0x0c | SPITZ_MOD_SHIFT; /* at */ + s->pre_map[0xd3] = 0x0e | SPITZ_MOD_FN; /* Delete */ + s->pre_map[0x3a] = 0x0f | SPITZ_MOD_FN; /* Caps_Lock */ + s->pre_map[0x07 | SPITZ_MOD_SHIFT] = 0x11 | SPITZ_MOD_FN; /* ^ */ + s->pre_map[0x0d] = 0x12 | SPITZ_MOD_FN; /* equal */ + s->pre_map[0x0d | SPITZ_MOD_SHIFT] = 0x13 | SPITZ_MOD_FN; /* plus */ + s->pre_map[0x1a] = 0x14 | SPITZ_MOD_FN; /* [ */ + s->pre_map[0x1b] = 0x15 | SPITZ_MOD_FN; /* ] */ + s->pre_map[0x1a | SPITZ_MOD_SHIFT] = 0x16 | SPITZ_MOD_FN; /* { */ + s->pre_map[0x1b | SPITZ_MOD_SHIFT] = 0x17 | SPITZ_MOD_FN; /* } */ + s->pre_map[0x27] = 0x22 | SPITZ_MOD_FN; /* semicolon */ + s->pre_map[0x27 | SPITZ_MOD_SHIFT] = 0x23 | SPITZ_MOD_FN; /* colon */ + s->pre_map[0x09 | SPITZ_MOD_SHIFT] = 0x24 | SPITZ_MOD_FN; /* asterisk */ + s->pre_map[0x2b] = 0x25 | SPITZ_MOD_FN; /* backslash */ + s->pre_map[0x2b | SPITZ_MOD_SHIFT] = 0x26 | SPITZ_MOD_FN; /* bar */ + s->pre_map[0x0c | SPITZ_MOD_SHIFT] = 0x30 | SPITZ_MOD_FN; /* _ */ + s->pre_map[0x33 | SPITZ_MOD_SHIFT] = 0x33 | SPITZ_MOD_FN; /* less */ + s->pre_map[0x35] = 0x33 | SPITZ_MOD_SHIFT; /* slash */ + s->pre_map[0x34 | SPITZ_MOD_SHIFT] = 0x34 | SPITZ_MOD_FN; /* greater */ + s->pre_map[0x35 | SPITZ_MOD_SHIFT] = 0x34 | SPITZ_MOD_SHIFT; /* question */ + s->pre_map[0x49] = 0x48 | SPITZ_MOD_FN; /* Page_Up */ + s->pre_map[0x51] = 0x50 | SPITZ_MOD_FN; /* Page_Down */ s->modifiers = 0; s->imodifiers = 0; @@ -441,9 +449,9 @@ static void spitz_keyboard_pre_map(SpitzKeyboardState *s) s->fifolen = 0; } -#undef MOD_SHIFT -#undef MOD_CTRL -#undef MOD_FN +#undef SPITZ_MOD_SHIFT +#undef SPITZ_MOD_CTRL +#undef SPITZ_MOD_FN static int spitz_keyboard_post_load(void *opaque, int version_id) { diff --git a/hw/arm/vexpress.c b/hw/arm/vexpress.c index f311595d67..3d83e6c98d 100644 --- a/hw/arm/vexpress.c +++ b/hw/arm/vexpress.c @@ -533,7 +533,15 @@ static void vexpress_common_init(VEDBoardInfo *daughterboard, * If a bios file was provided, attempt to map it into memory */ if (bios_name) { - const char *fn = qemu_find_file(QEMU_FILE_TYPE_BIOS, bios_name); + const char *fn; + + if (drive_get(IF_PFLASH, 0, 0)) { + error_report("The contents of the first flash device may be " + "specified with -bios or with -drive if=pflash... " + "but you cannot use both options at once"); + exit(1); + } + fn = qemu_find_file(QEMU_FILE_TYPE_BIOS, bios_name); if (!fn || load_image_targphys(fn, map[VE_NORFLASH0], VEXPRESS_FLASH_SIZE) < 0) { error_report("Could not load ROM image '%s'", bios_name); diff --git a/hw/arm/virt.c b/hw/arm/virt.c index 3b55a4bf7d..72fe030e93 100644 --- a/hw/arm/virt.c +++ b/hw/arm/virt.c @@ -180,10 +180,23 @@ static void create_fdt(VirtBoardInfo *vbi) "clk24mhz"); qemu_fdt_setprop_cell(fdt, "/apb-pclk", "phandle", vbi->clock_phandle); +} + +static void fdt_add_psci_node(const VirtBoardInfo *vbi) +{ + void *fdt = vbi->fdt; + ARMCPU *armcpu = ARM_CPU(qemu_get_cpu(0)); + /* No PSCI for TCG yet */ if (kvm_enabled()) { qemu_fdt_add_subnode(fdt, "/psci"); - qemu_fdt_setprop_string(fdt, "/psci", "compatible", "arm,psci"); + if (armcpu->psci_version == 2) { + const char comp[] = "arm,psci-0.2\0arm,psci"; + qemu_fdt_setprop(fdt, "/psci", "compatible", comp, sizeof(comp)); + } else { + qemu_fdt_setprop_string(fdt, "/psci", "compatible", "arm,psci"); + } + qemu_fdt_setprop_string(fdt, "/psci", "method", "hvc"); qemu_fdt_setprop_cell(fdt, "/psci", "cpu_suspend", PSCI_FN_CPU_SUSPEND); @@ -446,6 +459,7 @@ static void machvirt_init(MachineState *machine) object_property_set_bool(cpuobj, true, "realized", NULL); } fdt_add_cpu_nodes(vbi); + fdt_add_psci_node(vbi); memory_region_init_ram(ram, NULL, "mach-virt.ram", machine->ram_size); vmstate_register_ram_global(ram); diff --git a/hw/block/pflash_cfi01.c b/hw/block/pflash_cfi01.c index 0c95d53dca..f9507b4e5a 100644 --- a/hw/block/pflash_cfi01.c +++ b/hw/block/pflash_cfi01.c @@ -748,9 +748,18 @@ static void pflash_cfi01_realize(DeviceState *dev, Error **errp) pflash_t *pfl = CFI_PFLASH01(dev); uint64_t total_len; int ret; + uint64_t blocks_per_device, device_len; + int num_devices; total_len = pfl->sector_len * pfl->nb_blocs; + /* These are only used to expose the parameters of each device + * in the cfi_table[]. + */ + num_devices = pfl->device_width ? (pfl->bank_width / pfl->device_width) : 1; + blocks_per_device = pfl->nb_blocs / num_devices; + device_len = pfl->sector_len * blocks_per_device; + /* XXX: to be fixed */ #if 0 if (total_len != (8 * 1024 * 1024) && total_len != (16 * 1024 * 1024) && @@ -838,7 +847,7 @@ static void pflash_cfi01_realize(DeviceState *dev, Error **errp) /* Max timeout for chip erase */ pfl->cfi_table[0x26] = 0x00; /* Device size */ - pfl->cfi_table[0x27] = ctz32(total_len); // + 1; + pfl->cfi_table[0x27] = ctz32(device_len); /* + 1; */ /* Flash device interface (8 & 16 bits) */ pfl->cfi_table[0x28] = 0x02; pfl->cfi_table[0x29] = 0x00; @@ -854,8 +863,8 @@ static void pflash_cfi01_realize(DeviceState *dev, Error **errp) /* Number of erase block regions (uniform) */ pfl->cfi_table[0x2C] = 0x01; /* Erase block region 1 */ - pfl->cfi_table[0x2D] = pfl->nb_blocs - 1; - pfl->cfi_table[0x2E] = (pfl->nb_blocs - 1) >> 8; + pfl->cfi_table[0x2D] = blocks_per_device - 1; + pfl->cfi_table[0x2E] = (blocks_per_device - 1) >> 8; pfl->cfi_table[0x2F] = pfl->sector_len >> 8; pfl->cfi_table[0x30] = pfl->sector_len >> 16; @@ -882,6 +891,11 @@ static void pflash_cfi01_realize(DeviceState *dev, Error **errp) static Property pflash_cfi01_properties[] = { DEFINE_PROP_DRIVE("drive", struct pflash_t, bs), + /* num-blocks is the number of blocks actually visible to the guest, + * ie the total size of the device divided by the sector length. + * If we're emulating flash devices wired in parallel the actual + * number of blocks per indvidual device will differ. + */ DEFINE_PROP_UINT32("num-blocks", struct pflash_t, nb_blocs, 0), DEFINE_PROP_UINT64("sector-length", struct pflash_t, sector_len, 0), /* width here is the overall width of this QEMU device in bytes. diff --git a/hw/core/qdev.c b/hw/core/qdev.c index e65a5aa3a8..b9cd4fc814 100644 --- a/hw/core/qdev.c +++ b/hw/core/qdev.c @@ -34,6 +34,7 @@ #include "qapi/qmp/qjson.h" #include "monitor/monitor.h" #include "hw/hotplug.h" +#include "hw/boards.h" int qdev_hotplug = 0; static bool qdev_hot_added = false; @@ -567,32 +568,35 @@ static void bus_set_realized(Object *obj, bool value, Error **errp) { BusState *bus = BUS(obj); BusClass *bc = BUS_GET_CLASS(bus); + BusChild *kid; Error *local_err = NULL; if (value && !bus->realized) { if (bc->realize) { bc->realize(bus, &local_err); + } + /* TODO: recursive realization */ + } else if (!value && bus->realized) { + QTAILQ_FOREACH(kid, &bus->children, sibling) { + DeviceState *dev = kid->child; + object_property_set_bool(OBJECT(dev), false, "realized", + &local_err); if (local_err != NULL) { - goto error; + break; } - } - } else if (!value && bus->realized) { - if (bc->unrealize) { + if (bc->unrealize && local_err == NULL) { bc->unrealize(bus, &local_err); - - if (local_err != NULL) { - goto error; - } } } - bus->realized = value; - return; + if (local_err != NULL) { + error_propagate(errp, local_err); + return; + } -error: - error_propagate(errp, local_err); + bus->realized = value; } void qbus_create_inplace(void *bus, size_t size, const char *typename, @@ -813,6 +817,18 @@ static void device_set_realized(Object *obj, bool value, Error **errp) local_err == NULL) { hotplug_handler_plug(dev->parent_bus->hotplug_handler, dev, &local_err); + } else if (local_err == NULL && + object_dynamic_cast(qdev_get_machine(), TYPE_MACHINE)) { + HotplugHandler *hotplug_ctrl; + MachineState *machine = MACHINE(qdev_get_machine()); + MachineClass *mc = MACHINE_GET_CLASS(machine); + + if (mc->get_hotplug_handler) { + hotplug_ctrl = mc->get_hotplug_handler(machine, dev); + if (hotplug_ctrl) { + hotplug_handler_plug(hotplug_ctrl, dev, &local_err); + } + } } if (qdev_get_vmsd(dev) && local_err == NULL) { @@ -865,6 +881,20 @@ static bool device_get_hotpluggable(Object *obj, Error **errp) dev->parent_bus->allow_hotplug); } +static bool device_get_hotplugged(Object *obj, Error **err) +{ + DeviceState *dev = DEVICE(obj); + + return dev->hotplugged; +} + +static void device_set_hotplugged(Object *obj, bool value, Error **err) +{ + DeviceState *dev = DEVICE(obj); + + dev->hotplugged = value; +} + static void device_initfn(Object *obj) { DeviceState *dev = DEVICE(obj); @@ -883,6 +913,9 @@ static void device_initfn(Object *obj) device_get_realized, device_set_realized, NULL); object_property_add_bool(obj, "hotpluggable", device_get_hotpluggable, NULL, NULL); + object_property_add_bool(obj, "hotplugged", + device_get_hotplugged, device_set_hotplugged, + &error_abort); class = object_get_class(OBJECT(dev)); do { diff --git a/hw/display/qxl.c b/hw/display/qxl.c index 736fd3c4e2..d43aa49eb8 100644 --- a/hw/display/qxl.c +++ b/hw/display/qxl.c @@ -710,7 +710,7 @@ static void interface_release_resource(QXLInstance *sin, if (ext.group_id == MEMSLOT_GROUP_HOST) { /* host group -> vga mode update request */ - QXLCommandExt *cmdext = (void *)(ext.info->id); + QXLCommandExt *cmdext = (void *)(intptr_t)(ext.info->id); SimpleSpiceUpdate *update; g_assert(cmdext->cmd.type == QXL_CMD_DRAW); update = container_of(cmdext, SimpleSpiceUpdate, ext); diff --git a/hw/i386/Makefile.objs b/hw/i386/Makefile.objs index f66c349508..48014abf0a 100644 --- a/hw/i386/Makefile.objs +++ b/hw/i386/Makefile.objs @@ -9,7 +9,8 @@ obj-y += acpi-build.o obj-y += bios-linker-loader.o hw/i386/acpi-build.o: hw/i386/acpi-build.c hw/i386/acpi-dsdt.hex \ hw/i386/ssdt-proc.hex hw/i386/ssdt-pcihp.hex hw/i386/ssdt-misc.hex \ - hw/i386/acpi-dsdt.hex hw/i386/q35-acpi-dsdt.hex + hw/i386/acpi-dsdt.hex hw/i386/q35-acpi-dsdt.hex \ + hw/i386/q35-acpi-dsdt.hex hw/i386/ssdt-mem.hex iasl-option=$(shell if test -z "`$(1) $(2) 2>&1 > /dev/null`" \ ; then echo "$(2)"; else echo "$(3)"; fi ;) @@ -17,7 +18,7 @@ iasl-option=$(shell if test -z "`$(1) $(2) 2>&1 > /dev/null`" \ ifdef IASL #IASL Present. Generate hex files from .dsl hw/i386/%.hex: $(SRC_PATH)/hw/i386/%.dsl $(SRC_PATH)/scripts/acpi_extract_preprocess.py $(SRC_PATH)/scripts/acpi_extract.py - $(call quiet-command, cpp -P $(QEMU_DGFLAGS) $(QEMU_INCLUDES) $< -o $*.dsl.i.orig, " CPP $(TARGET_DIR)$*.dsl.i.orig") + $(call quiet-command, $(CPP) -x c -P $(QEMU_DGFLAGS) $(QEMU_INCLUDES) $< -o $*.dsl.i.orig, " CPP $(TARGET_DIR)$*.dsl.i.orig") $(call quiet-command, $(PYTHON) $(SRC_PATH)/scripts/acpi_extract_preprocess.py $*.dsl.i.orig > $*.dsl.i, " ACPI_PREPROCESS $(TARGET_DIR)$*.dsl.i") $(call quiet-command, $(IASL) $(call iasl-option,$(IASL),-Pn,) -vs -l -tc -p $* $*.dsl.i $(if $(V), , > /dev/null) 2>&1 ," IASL $(TARGET_DIR)$*.dsl.i") $(call quiet-command, $(PYTHON) $(SRC_PATH)/scripts/acpi_extract.py $*.lst > $*.off, " ACPI_EXTRACT $(TARGET_DIR)$*.off") diff --git a/hw/i386/acpi-build.c b/hw/i386/acpi-build.c index 1e0aa09bc8..ebc5f034e3 100644 --- a/hw/i386/acpi-build.c +++ b/hw/i386/acpi-build.c @@ -37,6 +37,7 @@ #include "bios-linker-loader.h" #include "hw/loader.h" #include "hw/isa/isa.h" +#include "hw/acpi/memory_hotplug.h" /* Supported chipsets: */ #include "hw/acpi/piix4.h" @@ -667,6 +668,14 @@ static inline char acpi_get_hex(uint32_t val) #define ACPI_PCIQXL_SIZEOF (*ssdt_pciqxl_end - *ssdt_pciqxl_start) #define ACPI_PCIQXL_AML (ssdp_pcihp_aml + *ssdt_pciqxl_start) +#include "hw/i386/ssdt-mem.hex" + +/* 0x5B 0x82 DeviceOp PkgLength NameString DimmID */ +#define ACPI_MEM_OFFSET_HEX (*ssdt_mem_name - *ssdt_mem_start + 2) +#define ACPI_MEM_OFFSET_ID (*ssdt_mem_id - *ssdt_mem_start + 7) +#define ACPI_MEM_SIZEOF (*ssdt_mem_end - *ssdt_mem_start) +#define ACPI_MEM_AML (ssdm_mem_aml + *ssdt_mem_start) + #define ACPI_SSDT_SIGNATURE 0x54445353 /* SSDT */ #define ACPI_SSDT_HEADER_LENGTH 36 @@ -1003,6 +1012,8 @@ build_ssdt(GArray *table_data, GArray *linker, AcpiCpuInfo *cpu, AcpiPmInfo *pm, AcpiMiscInfo *misc, PcPciInfo *pci, PcGuestInfo *guest_info) { + MachineState *machine = MACHINE(qdev_get_machine()); + uint32_t nr_mem = machine->ram_slots; unsigned acpi_cpus = guest_info->apic_id_limit; int ssdt_start = table_data->len; uint8_t *ssdt_ptr; @@ -1031,6 +1042,9 @@ build_ssdt(GArray *table_data, GArray *linker, ACPI_BUILD_SET_LE(ssdt_ptr, sizeof(ssdp_misc_aml), ssdt_isa_pest[0], 16, misc->pvpanic_port); + ACPI_BUILD_SET_LE(ssdt_ptr, sizeof(ssdp_misc_aml), + ssdt_mctrl_nr_slots[0], 32, nr_mem); + { GArray *sb_scope = build_alloc_array(); uint8_t op = 0x10; /* ScopeOp */ @@ -1084,6 +1098,27 @@ build_ssdt(GArray *table_data, GArray *linker, build_free_array(package); } + if (nr_mem) { + assert(nr_mem <= ACPI_MAX_RAM_SLOTS); + /* build memory devices */ + for (i = 0; i < nr_mem; i++) { + char id[3]; + uint8_t *mem = acpi_data_push(sb_scope, ACPI_MEM_SIZEOF); + + snprintf(id, sizeof(id), "%02X", i); + memcpy(mem, ACPI_MEM_AML, ACPI_MEM_SIZEOF); + memcpy(mem + ACPI_MEM_OFFSET_HEX, id, 2); + memcpy(mem + ACPI_MEM_OFFSET_ID, id, 2); + } + + /* build Method(MEMORY_SLOT_NOTIFY_METHOD, 2) { + * If (LEqual(Arg0, 0x00)) {Notify(MP00, Arg1)} ... + */ + build_append_notify_method(sb_scope, + stringify(MEMORY_SLOT_NOTIFY_METHOD), + "MP%0.02X", nr_mem); + } + { AcpiBuildPciBusHotplugState hotplug_state; Object *pci_host; @@ -1132,15 +1167,22 @@ build_hpet(GArray *table_data, GArray *linker) (void *)hpet, "HPET", sizeof(*hpet), 1); } +typedef enum { + MEM_AFFINITY_NOFLAGS = 0, + MEM_AFFINITY_ENABLED = (1 << 0), + MEM_AFFINITY_HOTPLUGGABLE = (1 << 1), + MEM_AFFINITY_NON_VOLATILE = (1 << 2), +} MemoryAffinityFlags; + static void -acpi_build_srat_memory(AcpiSratMemoryAffinity *numamem, - uint64_t base, uint64_t len, int node, int enabled) +acpi_build_srat_memory(AcpiSratMemoryAffinity *numamem, uint64_t base, + uint64_t len, int node, MemoryAffinityFlags flags) { numamem->type = ACPI_SRAT_MEMORY; numamem->length = sizeof(*numamem); memset(numamem->proximity, 0, 4); numamem->proximity[0] = node; - numamem->flags = cpu_to_le32(!!enabled); + numamem->flags = cpu_to_le32(flags); numamem->base_addr = cpu_to_le64(base); numamem->range_length = cpu_to_le64(len); } @@ -1157,6 +1199,10 @@ build_srat(GArray *table_data, GArray *linker, uint64_t curnode; int srat_start, numa_start, slots; uint64_t mem_len, mem_base, next_base; + PCMachineState *pcms = PC_MACHINE(qdev_get_machine()); + ram_addr_t hotplugabble_address_space_size = + object_property_get_int(OBJECT(pcms), PC_MACHINE_MEMHP_REGION_SIZE, + NULL); srat_start = table_data->len; @@ -1188,7 +1234,7 @@ build_srat(GArray *table_data, GArray *linker, numa_start = table_data->len; numamem = acpi_data_push(table_data, sizeof *numamem); - acpi_build_srat_memory(numamem, 0, 640*1024, 0, 1); + acpi_build_srat_memory(numamem, 0, 640*1024, 0, MEM_AFFINITY_ENABLED); next_base = 1024 * 1024; for (i = 1; i < guest_info->numa_nodes + 1; ++i) { mem_base = next_base; @@ -1204,19 +1250,34 @@ build_srat(GArray *table_data, GArray *linker, mem_len -= next_base - guest_info->ram_size_below_4g; if (mem_len > 0) { numamem = acpi_data_push(table_data, sizeof *numamem); - acpi_build_srat_memory(numamem, mem_base, mem_len, i-1, 1); + acpi_build_srat_memory(numamem, mem_base, mem_len, i - 1, + MEM_AFFINITY_ENABLED); } mem_base = 1ULL << 32; mem_len = next_base - guest_info->ram_size_below_4g; next_base += (1ULL << 32) - guest_info->ram_size_below_4g; } numamem = acpi_data_push(table_data, sizeof *numamem); - acpi_build_srat_memory(numamem, mem_base, mem_len, i - 1, 1); + acpi_build_srat_memory(numamem, mem_base, mem_len, i - 1, + MEM_AFFINITY_ENABLED); } slots = (table_data->len - numa_start) / sizeof *numamem; for (; slots < guest_info->numa_nodes + 2; slots++) { numamem = acpi_data_push(table_data, sizeof *numamem); - acpi_build_srat_memory(numamem, 0, 0, 0, 0); + acpi_build_srat_memory(numamem, 0, 0, 0, MEM_AFFINITY_NOFLAGS); + } + + /* + * Entry is required for Windows to enable memory hotplug in OS. + * Memory devices may override proximity set by this entry, + * providing _PXM method if necessary. + */ + if (hotplugabble_address_space_size) { + numamem = acpi_data_push(table_data, sizeof *numamem); + acpi_build_srat_memory(numamem, pcms->hotplug_memory_base, + hotplugabble_address_space_size, 0, + MEM_AFFINITY_HOTPLUGGABLE | + MEM_AFFINITY_ENABLED); } build_header(linker, table_data, diff --git a/hw/i386/acpi-dsdt.dsl b/hw/i386/acpi-dsdt.dsl index 0a1e252d21..3cc0ea0f9a 100644 --- a/hw/i386/acpi-dsdt.dsl +++ b/hw/i386/acpi-dsdt.dsl @@ -306,7 +306,7 @@ DefinitionBlock ( } } -#include "hw/acpi/cpu_hotplug_defs.h" +#include "hw/acpi/pc-hotplug.h" #define CPU_STATUS_BASE PIIX4_CPU_HOTPLUG_IO_BASE #include "acpi-dsdt-cpu-hotplug.dsl" @@ -314,6 +314,7 @@ DefinitionBlock ( /**************************************************************** * General purpose events ****************************************************************/ + External(\_SB.PCI0.MEMORY_HOPTLUG_DEVICE.MEMORY_SLOT_SCAN_METHOD, MethodObj) Scope(\_GPE) { Name(_HID, "ACPI0006") @@ -330,7 +331,9 @@ DefinitionBlock ( // CPU hotplug event \_SB.PRSC() } - Method(_L03) { + Method(_E03) { + // Memory hotplug event + \_SB.PCI0.MEMORY_HOPTLUG_DEVICE.MEMORY_SLOT_SCAN_METHOD() } Method(_L04) { } diff --git a/hw/i386/acpi-dsdt.hex.generated b/hw/i386/acpi-dsdt.hex.generated index e61572a5dd..ee490e89c3 100644 --- a/hw/i386/acpi-dsdt.hex.generated +++ b/hw/i386/acpi-dsdt.hex.generated @@ -3,12 +3,12 @@ static unsigned char AcpiDsdtAmlCode[] = { 0x53, 0x44, 0x54, -0x80, +0x93, 0x11, 0x0, 0x0, 0x1, -0x60, +0xf5, 0x42, 0x58, 0x50, @@ -4285,8 +4285,8 @@ static unsigned char AcpiDsdtAmlCode[] = { 0xa, 0xb, 0x10, -0x42, -0xc, +0x45, +0xd, 0x5f, 0x47, 0x50, @@ -4389,12 +4389,31 @@ static unsigned char AcpiDsdtAmlCode[] = { 0x53, 0x43, 0x14, -0x6, +0x19, 0x5f, -0x4c, +0x45, 0x30, 0x33, 0x0, +0x5c, +0x2f, +0x4, +0x5f, +0x53, +0x42, +0x5f, +0x50, +0x43, +0x49, +0x30, +0x4d, +0x48, +0x50, +0x44, +0x4d, +0x53, +0x43, +0x4e, 0x14, 0x6, 0x5f, diff --git a/hw/i386/pc.c b/hw/i386/pc.c index 3e0ecf140d..67eb45089e 100644 --- a/hw/i386/pc.c +++ b/hw/i386/pc.c @@ -58,6 +58,9 @@ #include "hw/boards.h" #include "hw/pci/pci_host.h" #include "acpi-build.h" +#include "hw/mem/pc-dimm.h" +#include "trace.h" +#include "qapi/visitor.h" /* debug PC/ISA interrupts */ //#define DEBUG_IRQ @@ -701,14 +704,14 @@ static FWCfgState *bochs_bios_init(void) unsigned int apic_id = x86_cpu_apic_id_from_index(i); assert(apic_id < apic_id_limit); for (j = 0; j < nb_numa_nodes; j++) { - if (test_bit(i, node_cpumask[j])) { + if (test_bit(i, numa_info[j].node_cpu)) { numa_fw_cfg[apic_id + 1] = cpu_to_le64(j); break; } } } for (i = 0; i < nb_numa_nodes; i++) { - numa_fw_cfg[apic_id_limit + 1 + i] = cpu_to_le64(node_mem[i]); + numa_fw_cfg[apic_id_limit + 1 + i] = cpu_to_le64(numa_info[i].node_mem); } fw_cfg_add_bytes(fw_cfg, FW_CFG_NUMA, numa_fw_cfg, (1 + apic_id_limit + nb_numa_nodes) * @@ -1119,8 +1122,12 @@ PcGuestInfo *pc_guest_info_init(ram_addr_t below_4g_mem_size, guest_info->apic_id_limit = pc_apic_id_limit(max_cpus); guest_info->apic_xrupt_override = kvm_allows_irq0_override(); guest_info->numa_nodes = nb_numa_nodes; - guest_info->node_mem = g_memdup(node_mem, guest_info->numa_nodes * + guest_info->node_mem = g_malloc0(guest_info->numa_nodes * sizeof *guest_info->node_mem); + for (i = 0; i < nb_numa_nodes; i++) { + guest_info->node_mem[i] = numa_info[i].node_mem; + } + guest_info->node_cpu = g_malloc0(guest_info->apic_id_limit * sizeof *guest_info->node_cpu); @@ -1128,7 +1135,7 @@ PcGuestInfo *pc_guest_info_init(ram_addr_t below_4g_mem_size, unsigned int apic_id = x86_cpu_apic_id_from_index(i); assert(apic_id < guest_info->apic_id_limit); for (j = 0; j < nb_numa_nodes; j++) { - if (test_bit(i, node_cpumask[j])) { + if (test_bit(i, numa_info[j].node_cpu)) { guest_info->node_cpu[apic_id] = j; break; } @@ -1183,10 +1190,8 @@ void pc_acpi_init(const char *default_dsdt) } } -FWCfgState *pc_memory_init(MemoryRegion *system_memory, - const char *kernel_filename, - const char *kernel_cmdline, - const char *initrd_filename, +FWCfgState *pc_memory_init(MachineState *machine, + MemoryRegion *system_memory, ram_addr_t below_4g_mem_size, ram_addr_t above_4g_mem_size, MemoryRegion *rom_memory, @@ -1197,17 +1202,19 @@ FWCfgState *pc_memory_init(MemoryRegion *system_memory, MemoryRegion *ram, *option_rom_mr; MemoryRegion *ram_below_4g, *ram_above_4g; FWCfgState *fw_cfg; + PCMachineState *pcms = PC_MACHINE(machine); - linux_boot = (kernel_filename != NULL); + assert(machine->ram_size == below_4g_mem_size + above_4g_mem_size); + + linux_boot = (machine->kernel_filename != NULL); /* Allocate RAM. We allocate it as a single memory region and use * aliases to address portions of it, mostly for backwards compatibility * with older qemus that used qemu_ram_alloc(). */ ram = g_malloc(sizeof(*ram)); - memory_region_init_ram(ram, NULL, "pc.ram", - below_4g_mem_size + above_4g_mem_size); - vmstate_register_ram_global(ram); + memory_region_allocate_system_memory(ram, NULL, "pc.ram", + machine->ram_size); *ram_memory = ram; ram_below_4g = g_malloc(sizeof(*ram_below_4g)); memory_region_init_alias(ram_below_4g, NULL, "ram-below-4g", ram, @@ -1223,6 +1230,43 @@ FWCfgState *pc_memory_init(MemoryRegion *system_memory, e820_add_entry(0x100000000ULL, above_4g_mem_size, E820_RAM); } + if (!guest_info->has_reserved_memory && + (machine->ram_slots || + (machine->maxram_size > machine->ram_size))) { + MachineClass *mc = MACHINE_GET_CLASS(machine); + + error_report("\"-memory 'slots|maxmem'\" is not supported by: %s", + mc->name); + exit(EXIT_FAILURE); + } + + /* initialize hotplug memory address space */ + if (guest_info->has_reserved_memory && + (machine->ram_size < machine->maxram_size)) { + ram_addr_t hotplug_mem_size = + machine->maxram_size - machine->ram_size; + + if (machine->ram_slots > ACPI_MAX_RAM_SLOTS) { + error_report("unsupported amount of memory slots: %"PRIu64, + machine->ram_slots); + exit(EXIT_FAILURE); + } + + pcms->hotplug_memory_base = + ROUND_UP(0x100000000ULL + above_4g_mem_size, 1ULL << 30); + + if ((pcms->hotplug_memory_base + hotplug_mem_size) < + hotplug_mem_size) { + error_report("unsupported amount of maximum memory: " RAM_ADDR_FMT, + machine->maxram_size); + exit(EXIT_FAILURE); + } + + memory_region_init(&pcms->hotplug_memory, OBJECT(pcms), + "hotplug-memory", hotplug_mem_size); + memory_region_add_subregion(system_memory, pcms->hotplug_memory_base, + &pcms->hotplug_memory); + } /* Initialize PC system firmware */ pc_system_firmware_init(rom_memory, guest_info->isapc_ram_fw); @@ -1238,8 +1282,15 @@ FWCfgState *pc_memory_init(MemoryRegion *system_memory, fw_cfg = bochs_bios_init(); rom_set_fw(fw_cfg); + if (guest_info->has_reserved_memory && pcms->hotplug_memory_base) { + uint64_t *val = g_malloc(sizeof(*val)); + *val = cpu_to_le64(ROUND_UP(pcms->hotplug_memory_base, 0x1ULL << 30)); + fw_cfg_add_file(fw_cfg, "etc/reserved-memory-end", val, sizeof(*val)); + } + if (linux_boot) { - load_linux(fw_cfg, kernel_filename, initrd_filename, kernel_cmdline, below_4g_mem_size); + load_linux(fw_cfg, machine->kernel_filename, machine->initrd_filename, + machine->kernel_cmdline, below_4g_mem_size); } for (i = 0; i < nb_option_roms; i++) { @@ -1455,3 +1506,178 @@ void ioapic_init_gsi(GSIState *gsi_state, const char *parent_name) gsi_state->ioapic_irq[i] = qdev_get_gpio_in(dev, i); } } + +static void pc_generic_machine_class_init(ObjectClass *oc, void *data) +{ + MachineClass *mc = MACHINE_CLASS(oc); + QEMUMachine *qm = data; + + mc->name = qm->name; + mc->alias = qm->alias; + mc->desc = qm->desc; + mc->init = qm->init; + mc->reset = qm->reset; + mc->hot_add_cpu = qm->hot_add_cpu; + mc->kvm_type = qm->kvm_type; + mc->block_default_type = qm->block_default_type; + mc->max_cpus = qm->max_cpus; + mc->no_serial = qm->no_serial; + mc->no_parallel = qm->no_parallel; + mc->use_virtcon = qm->use_virtcon; + mc->use_sclp = qm->use_sclp; + mc->no_floppy = qm->no_floppy; + mc->no_cdrom = qm->no_cdrom; + mc->no_sdcard = qm->no_sdcard; + mc->is_default = qm->is_default; + mc->default_machine_opts = qm->default_machine_opts; + mc->default_boot_order = qm->default_boot_order; + mc->compat_props = qm->compat_props; + mc->hw_version = qm->hw_version; +} + +void qemu_register_pc_machine(QEMUMachine *m) +{ + char *name = g_strconcat(m->name, TYPE_MACHINE_SUFFIX, NULL); + TypeInfo ti = { + .name = name, + .parent = TYPE_PC_MACHINE, + .class_init = pc_generic_machine_class_init, + .class_data = (void *)m, + }; + + type_register(&ti); + g_free(name); +} + +static void pc_dimm_plug(HotplugHandler *hotplug_dev, + DeviceState *dev, Error **errp) +{ + int slot; + HotplugHandlerClass *hhc; + Error *local_err = NULL; + PCMachineState *pcms = PC_MACHINE(hotplug_dev); + MachineState *machine = MACHINE(hotplug_dev); + PCDIMMDevice *dimm = PC_DIMM(dev); + PCDIMMDeviceClass *ddc = PC_DIMM_GET_CLASS(dimm); + MemoryRegion *mr = ddc->get_memory_region(dimm); + uint64_t addr = object_property_get_int(OBJECT(dimm), PC_DIMM_ADDR_PROP, + &local_err); + if (local_err) { + goto out; + } + + addr = pc_dimm_get_free_addr(pcms->hotplug_memory_base, + memory_region_size(&pcms->hotplug_memory), + !addr ? NULL : &addr, + memory_region_size(mr), &local_err); + if (local_err) { + goto out; + } + + object_property_set_int(OBJECT(dev), addr, PC_DIMM_ADDR_PROP, &local_err); + if (local_err) { + goto out; + } + trace_mhp_pc_dimm_assigned_address(addr); + + slot = object_property_get_int(OBJECT(dev), PC_DIMM_SLOT_PROP, &local_err); + if (local_err) { + goto out; + } + + slot = pc_dimm_get_free_slot(slot == PC_DIMM_UNASSIGNED_SLOT ? NULL : &slot, + machine->ram_slots, &local_err); + if (local_err) { + goto out; + } + object_property_set_int(OBJECT(dev), slot, PC_DIMM_SLOT_PROP, &local_err); + if (local_err) { + goto out; + } + trace_mhp_pc_dimm_assigned_slot(slot); + + if (!pcms->acpi_dev) { + error_setg(&local_err, + "memory hotplug is not enabled: missing acpi device"); + goto out; + } + + memory_region_add_subregion(&pcms->hotplug_memory, + addr - pcms->hotplug_memory_base, mr); + vmstate_register_ram(mr, dev); + + hhc = HOTPLUG_HANDLER_GET_CLASS(pcms->acpi_dev); + hhc->plug(HOTPLUG_HANDLER(pcms->acpi_dev), dev, &local_err); +out: + error_propagate(errp, local_err); +} + +static void pc_machine_device_plug_cb(HotplugHandler *hotplug_dev, + DeviceState *dev, Error **errp) +{ + if (object_dynamic_cast(OBJECT(dev), TYPE_PC_DIMM)) { + pc_dimm_plug(hotplug_dev, dev, errp); + } +} + +static HotplugHandler *pc_get_hotpug_handler(MachineState *machine, + DeviceState *dev) +{ + PCMachineClass *pcmc = PC_MACHINE_GET_CLASS(machine); + + if (object_dynamic_cast(OBJECT(dev), TYPE_PC_DIMM)) { + return HOTPLUG_HANDLER(machine); + } + + return pcmc->get_hotplug_handler ? + pcmc->get_hotplug_handler(machine, dev) : NULL; +} + +static void +pc_machine_get_hotplug_memory_region_size(Object *obj, Visitor *v, void *opaque, + const char *name, Error **errp) +{ + PCMachineState *pcms = PC_MACHINE(obj); + int64_t value = memory_region_size(&pcms->hotplug_memory); + + visit_type_int(v, &value, name, errp); +} + +static void pc_machine_initfn(Object *obj) +{ + object_property_add(obj, PC_MACHINE_MEMHP_REGION_SIZE, "int", + pc_machine_get_hotplug_memory_region_size, + NULL, NULL, NULL, NULL); +} + +static void pc_machine_class_init(ObjectClass *oc, void *data) +{ + MachineClass *mc = MACHINE_CLASS(oc); + PCMachineClass *pcmc = PC_MACHINE_CLASS(oc); + HotplugHandlerClass *hc = HOTPLUG_HANDLER_CLASS(oc); + + pcmc->get_hotplug_handler = mc->get_hotplug_handler; + mc->get_hotplug_handler = pc_get_hotpug_handler; + hc->plug = pc_machine_device_plug_cb; +} + +static const TypeInfo pc_machine_info = { + .name = TYPE_PC_MACHINE, + .parent = TYPE_MACHINE, + .abstract = true, + .instance_size = sizeof(PCMachineState), + .instance_init = pc_machine_initfn, + .class_size = sizeof(PCMachineClass), + .class_init = pc_machine_class_init, + .interfaces = (InterfaceInfo[]) { + { TYPE_HOTPLUG_HANDLER }, + { } + }, +}; + +static void pc_machine_register_types(void) +{ + type_register_static(&pc_machine_info); +} + +type_init(pc_machine_register_types) diff --git a/hw/i386/pc_piix.c b/hw/i386/pc_piix.c index a48e26367d..3e7524b961 100644 --- a/hw/i386/pc_piix.c +++ b/hw/i386/pc_piix.c @@ -67,12 +67,14 @@ static bool smbios_legacy_mode; * pages in the host. */ static bool gigabyte_align = true; +static bool has_reserved_memory = true; /* PC hardware initialisation */ static void pc_init1(MachineState *machine, int pci_enabled, int kvmclock_enabled) { + PCMachineState *pc_machine = PC_MACHINE(machine); MemoryRegion *system_memory = get_system_memory(); MemoryRegion *system_io = get_system_io(); int i; @@ -143,6 +145,7 @@ static void pc_init1(MachineState *machine, guest_info->has_pci_info = has_pci_info; guest_info->isapc_ram_fw = !pci_enabled; + guest_info->has_reserved_memory = has_reserved_memory; if (smbios_defaults) { MachineClass *mc = MACHINE_GET_CLASS(machine); @@ -153,11 +156,9 @@ static void pc_init1(MachineState *machine, /* allocate ram and load rom/bios */ if (!xen_enabled()) { - fw_cfg = pc_memory_init(system_memory, - machine->kernel_filename, machine->kernel_cmdline, - machine->initrd_filename, - below_4g_mem_size, above_4g_mem_size, - rom_memory, &ram_memory, guest_info); + fw_cfg = pc_memory_init(machine, system_memory, + below_4g_mem_size, above_4g_mem_size, + rom_memory, &ram_memory, guest_info); } gsi_state = g_malloc0(sizeof(*gsi_state)); @@ -244,14 +245,23 @@ static void pc_init1(MachineState *machine, } if (pci_enabled && acpi_enabled) { + DeviceState *piix4_pm; I2CBus *smbus; smi_irq = qemu_allocate_irqs(pc_acpi_smi_interrupt, first_cpu, 1); /* TODO: Populate SPD eeprom data. */ smbus = piix4_pm_init(pci_bus, piix3_devfn + 3, 0xb100, gsi[9], *smi_irq, - kvm_enabled(), fw_cfg); + kvm_enabled(), fw_cfg, &piix4_pm); smbus_eeprom_init(smbus, 8, NULL, 0); + + object_property_add_link(OBJECT(machine), PC_MACHINE_ACPI_DEVICE_PROP, + TYPE_HOTPLUG_HANDLER, + (Object **)&pc_machine->acpi_dev, + object_property_allow_set_link, + OBJ_PROP_LINK_UNREF_ON_RELEASE, &error_abort); + object_property_set_link(OBJECT(machine), OBJECT(piix4_pm), + PC_MACHINE_ACPI_DEVICE_PROP, &error_abort); } if (pci_enabled) { @@ -267,6 +277,7 @@ static void pc_init_pci(MachineState *machine) static void pc_compat_2_0(MachineState *machine) { smbios_legacy_mode = true; + has_reserved_memory = false; } static void pc_compat_1_7(MachineState *machine) @@ -843,25 +854,25 @@ static QEMUMachine xenfv_machine = { static void pc_machine_init(void) { - qemu_register_machine(&pc_i440fx_machine_v2_1); - qemu_register_machine(&pc_i440fx_machine_v2_0); - qemu_register_machine(&pc_i440fx_machine_v1_7); - qemu_register_machine(&pc_i440fx_machine_v1_6); - qemu_register_machine(&pc_i440fx_machine_v1_5); - qemu_register_machine(&pc_i440fx_machine_v1_4); - qemu_register_machine(&pc_machine_v1_3); - qemu_register_machine(&pc_machine_v1_2); - qemu_register_machine(&pc_machine_v1_1); - qemu_register_machine(&pc_machine_v1_0); - qemu_register_machine(&pc_machine_v0_15); - qemu_register_machine(&pc_machine_v0_14); - qemu_register_machine(&pc_machine_v0_13); - qemu_register_machine(&pc_machine_v0_12); - qemu_register_machine(&pc_machine_v0_11); - qemu_register_machine(&pc_machine_v0_10); - qemu_register_machine(&isapc_machine); + qemu_register_pc_machine(&pc_i440fx_machine_v2_1); + qemu_register_pc_machine(&pc_i440fx_machine_v2_0); + qemu_register_pc_machine(&pc_i440fx_machine_v1_7); + qemu_register_pc_machine(&pc_i440fx_machine_v1_6); + qemu_register_pc_machine(&pc_i440fx_machine_v1_5); + qemu_register_pc_machine(&pc_i440fx_machine_v1_4); + qemu_register_pc_machine(&pc_machine_v1_3); + qemu_register_pc_machine(&pc_machine_v1_2); + qemu_register_pc_machine(&pc_machine_v1_1); + qemu_register_pc_machine(&pc_machine_v1_0); + qemu_register_pc_machine(&pc_machine_v0_15); + qemu_register_pc_machine(&pc_machine_v0_14); + qemu_register_pc_machine(&pc_machine_v0_13); + qemu_register_pc_machine(&pc_machine_v0_12); + qemu_register_pc_machine(&pc_machine_v0_11); + qemu_register_pc_machine(&pc_machine_v0_10); + qemu_register_pc_machine(&isapc_machine); #ifdef CONFIG_XEN - qemu_register_machine(&xenfv_machine); + qemu_register_pc_machine(&xenfv_machine); #endif } diff --git a/hw/i386/pc_q35.c b/hw/i386/pc_q35.c index b3c02c163d..aa71332ee1 100644 --- a/hw/i386/pc_q35.c +++ b/hw/i386/pc_q35.c @@ -57,10 +57,12 @@ static bool smbios_legacy_mode; * pages in the host. */ static bool gigabyte_align = true; +static bool has_reserved_memory = true; /* PC hardware initialisation */ static void pc_q35_init(MachineState *machine) { + PCMachineState *pc_machine = PC_MACHINE(machine); ram_addr_t below_4g_mem_size, above_4g_mem_size; Q35PCIHost *q35_host; PCIHostState *phb; @@ -130,6 +132,7 @@ static void pc_q35_init(MachineState *machine) guest_info->has_pci_info = has_pci_info; guest_info->isapc_ram_fw = false; guest_info->has_acpi_build = has_acpi_build; + guest_info->has_reserved_memory = has_reserved_memory; if (smbios_defaults) { MachineClass *mc = MACHINE_GET_CLASS(machine); @@ -140,9 +143,7 @@ static void pc_q35_init(MachineState *machine) /* allocate ram and load rom/bios */ if (!xen_enabled()) { - pc_memory_init(get_system_memory(), - machine->kernel_filename, machine->kernel_cmdline, - machine->initrd_filename, + pc_memory_init(machine, get_system_memory(), below_4g_mem_size, above_4g_mem_size, rom_memory, &ram_memory, guest_info); } @@ -176,6 +177,15 @@ static void pc_q35_init(MachineState *machine) lpc = pci_create_simple_multifunction(host_bus, PCI_DEVFN(ICH9_LPC_DEV, ICH9_LPC_FUNC), true, TYPE_ICH9_LPC_DEVICE); + + object_property_add_link(OBJECT(machine), PC_MACHINE_ACPI_DEVICE_PROP, + TYPE_HOTPLUG_HANDLER, + (Object **)&pc_machine->acpi_dev, + object_property_allow_set_link, + OBJ_PROP_LINK_UNREF_ON_RELEASE, &error_abort); + object_property_set_link(OBJECT(machine), OBJECT(lpc), + PC_MACHINE_ACPI_DEVICE_PROP, &error_abort); + ich9_lpc = ICH9_LPC_DEVICE(lpc); ich9_lpc->pic = gsi; ich9_lpc->ioapic = gsi_state->ioapic_irq; @@ -245,6 +255,7 @@ static void pc_q35_init(MachineState *machine) static void pc_compat_2_0(MachineState *machine) { smbios_legacy_mode = true; + has_reserved_memory = false; } static void pc_compat_1_7(MachineState *machine) @@ -384,12 +395,12 @@ static QEMUMachine pc_q35_machine_v1_4 = { static void pc_q35_machine_init(void) { - qemu_register_machine(&pc_q35_machine_v2_1); - qemu_register_machine(&pc_q35_machine_v2_0); - qemu_register_machine(&pc_q35_machine_v1_7); - qemu_register_machine(&pc_q35_machine_v1_6); - qemu_register_machine(&pc_q35_machine_v1_5); - qemu_register_machine(&pc_q35_machine_v1_4); + qemu_register_pc_machine(&pc_q35_machine_v2_1); + qemu_register_pc_machine(&pc_q35_machine_v2_0); + qemu_register_pc_machine(&pc_q35_machine_v1_7); + qemu_register_pc_machine(&pc_q35_machine_v1_6); + qemu_register_pc_machine(&pc_q35_machine_v1_5); + qemu_register_pc_machine(&pc_q35_machine_v1_4); } machine_init(pc_q35_machine_init); diff --git a/hw/i386/q35-acpi-dsdt.dsl b/hw/i386/q35-acpi-dsdt.dsl index f4d2a2daee..8c3eae73bf 100644 --- a/hw/i386/q35-acpi-dsdt.dsl +++ b/hw/i386/q35-acpi-dsdt.dsl @@ -402,7 +402,7 @@ DefinitionBlock ( define_gsi_link(GSIH, 0, 0x17) } -#include "hw/acpi/cpu_hotplug_defs.h" +#include "hw/acpi/pc-hotplug.h" #define CPU_STATUS_BASE ICH9_CPU_HOTPLUG_IO_BASE #include "acpi-dsdt-cpu-hotplug.dsl" @@ -410,6 +410,7 @@ DefinitionBlock ( /**************************************************************** * General purpose events ****************************************************************/ + External(\_SB.PCI0.MEMORY_HOPTLUG_DEVICE.MEMORY_SLOT_SCAN_METHOD, MethodObj) Scope(\_GPE) { Name(_HID, "ACPI0006") @@ -422,7 +423,9 @@ DefinitionBlock ( // CPU hotplug event \_SB.PRSC() } - Method(_L03) { + Method(_E03) { + // Memory hotplug event + \_SB.PCI0.MEMORY_HOPTLUG_DEVICE.MEMORY_SLOT_SCAN_METHOD() } Method(_L04) { } diff --git a/hw/i386/q35-acpi-dsdt.hex.generated b/hw/i386/q35-acpi-dsdt.hex.generated index 6b788c9be0..c9eb4ac6ad 100644 --- a/hw/i386/q35-acpi-dsdt.hex.generated +++ b/hw/i386/q35-acpi-dsdt.hex.generated @@ -3,12 +3,12 @@ static unsigned char Q35AcpiDsdtAmlCode[] = { 0x53, 0x44, 0x54, -0xd2, +0xe5, 0x1c, 0x0, 0x0, 0x1, -0x13, +0xb7, 0x42, 0x58, 0x50, @@ -7234,8 +7234,8 @@ static unsigned char Q35AcpiDsdtAmlCode[] = { 0xa, 0xb, 0x10, -0x4f, -0x8, +0x42, +0xa, 0x5f, 0x47, 0x50, @@ -7287,12 +7287,31 @@ static unsigned char Q35AcpiDsdtAmlCode[] = { 0x53, 0x43, 0x14, -0x6, +0x19, 0x5f, -0x4c, +0x45, 0x30, 0x33, 0x0, +0x5c, +0x2f, +0x4, +0x5f, +0x53, +0x42, +0x5f, +0x50, +0x43, +0x49, +0x30, +0x4d, +0x48, +0x50, +0x44, +0x4d, +0x53, +0x43, +0x4e, 0x14, 0x6, 0x5f, diff --git a/hw/i386/ssdt-mem.dsl b/hw/i386/ssdt-mem.dsl new file mode 100644 index 0000000000..8e17bd1f97 --- /dev/null +++ b/hw/i386/ssdt-mem.dsl @@ -0,0 +1,77 @@ +/* + * Memory hotplug ACPI DSDT static objects definitions + * + * Copyright ProfitBricks GmbH 2012 + * Copyright (C) 2013-2014 Red Hat Inc + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2 of the License, or (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library; if not, see <http://www.gnu.org/licenses/> + */ + +/* This file is the basis for the ssdt_mem[] variable in src/acpi.c. + * It defines the contents of the memory device object. At + * runtime, a dynamically generated SSDT will contain one copy of this + * AML snippet for every possible memory device in the system. The + * objects will be placed in the \_SB_ namespace. + * + * In addition to the aml code generated from this file, the + * src/acpi.c file creates a MTFY method with an entry for each memdevice: + * Method(MTFY, 2) { + * If (LEqual(Arg0, 0x00)) { Notify(MP00, Arg1) } + * If (LEqual(Arg0, 0x01)) { Notify(MP01, Arg1) } + * ... + * } + */ +#include "hw/acpi/pc-hotplug.h" + +ACPI_EXTRACT_ALL_CODE ssdm_mem_aml + +DefinitionBlock ("ssdt-mem.aml", "SSDT", 0x02, "BXPC", "CSSDT", 0x1) +{ + + External(\_SB.PCI0.MEMORY_HOPTLUG_DEVICE.MEMORY_SLOT_CRS_METHOD, MethodObj) + External(\_SB.PCI0.MEMORY_HOPTLUG_DEVICE.MEMORY_SLOT_STATUS_METHOD, MethodObj) + External(\_SB.PCI0.MEMORY_HOPTLUG_DEVICE.MEMORY_SLOT_OST_METHOD, MethodObj) + External(\_SB.PCI0.MEMORY_HOPTLUG_DEVICE.MEMORY_SLOT_PROXIMITY_METHOD, MethodObj) + + Scope(\_SB) { +/* v------------------ DO NOT EDIT ------------------v */ + ACPI_EXTRACT_DEVICE_START ssdt_mem_start + ACPI_EXTRACT_DEVICE_END ssdt_mem_end + ACPI_EXTRACT_DEVICE_STRING ssdt_mem_name + Device(MPAA) { + ACPI_EXTRACT_NAME_STRING ssdt_mem_id + Name(_UID, "0xAA") +/* ^------------------ DO NOT EDIT ------------------^ + * Don't change the above without also updating the C code. + */ + Name(_HID, EISAID("PNP0C80")) + + Method(_CRS, 0) { + Return(\_SB.PCI0.MEMORY_HOPTLUG_DEVICE.MEMORY_SLOT_CRS_METHOD(_UID)) + } + + Method(_STA, 0) { + Return(\_SB.PCI0.MEMORY_HOPTLUG_DEVICE.MEMORY_SLOT_STATUS_METHOD(_UID)) + } + + Method(_PXM, 0) { + Return(\_SB.PCI0.MEMORY_HOPTLUG_DEVICE.MEMORY_SLOT_PROXIMITY_METHOD(_UID)) + } + + Method(_OST, 3) { + \_SB.PCI0.MEMORY_HOPTLUG_DEVICE.MEMORY_SLOT_OST_METHOD(_UID, Arg0, Arg1, Arg2) + } + } + } +} diff --git a/hw/i386/ssdt-mem.hex.generated b/hw/i386/ssdt-mem.hex.generated new file mode 100644 index 0000000000..00bd34d269 --- /dev/null +++ b/hw/i386/ssdt-mem.hex.generated @@ -0,0 +1,213 @@ +static unsigned char ssdt_mem_id[] = { +0x35 +}; +static unsigned char ssdm_mem_aml[] = { +0x53, +0x53, +0x44, +0x54, +0xc7, +0x0, +0x0, +0x0, +0x2, +0x71, +0x42, +0x58, +0x50, +0x43, +0x0, +0x0, +0x43, +0x53, +0x53, +0x44, +0x54, +0x0, +0x0, +0x0, +0x1, +0x0, +0x0, +0x0, +0x49, +0x4e, +0x54, +0x4c, +0x15, +0x11, +0x13, +0x20, +0x10, +0x42, +0xa, +0x5c, +0x5f, +0x53, +0x42, +0x5f, +0x5b, +0x82, +0x49, +0x9, +0x4d, +0x50, +0x41, +0x41, +0x8, +0x5f, +0x55, +0x49, +0x44, +0xd, +0x30, +0x78, +0x41, +0x41, +0x0, +0x8, +0x5f, +0x48, +0x49, +0x44, +0xc, +0x41, +0xd0, +0xc, +0x80, +0x14, +0x1e, +0x5f, +0x43, +0x52, +0x53, +0x0, +0xa4, +0x5c, +0x2f, +0x4, +0x5f, +0x53, +0x42, +0x5f, +0x50, +0x43, +0x49, +0x30, +0x4d, +0x48, +0x50, +0x44, +0x4d, +0x43, +0x52, +0x53, +0x5f, +0x55, +0x49, +0x44, +0x14, +0x1e, +0x5f, +0x53, +0x54, +0x41, +0x0, +0xa4, +0x5c, +0x2f, +0x4, +0x5f, +0x53, +0x42, +0x5f, +0x50, +0x43, +0x49, +0x30, +0x4d, +0x48, +0x50, +0x44, +0x4d, +0x52, +0x53, +0x54, +0x5f, +0x55, +0x49, +0x44, +0x14, +0x1e, +0x5f, +0x50, +0x58, +0x4d, +0x0, +0xa4, +0x5c, +0x2f, +0x4, +0x5f, +0x53, +0x42, +0x5f, +0x50, +0x43, +0x49, +0x30, +0x4d, +0x48, +0x50, +0x44, +0x4d, +0x50, +0x58, +0x4d, +0x5f, +0x55, +0x49, +0x44, +0x14, +0x20, +0x5f, +0x4f, +0x53, +0x54, +0x3, +0x5c, +0x2f, +0x4, +0x5f, +0x53, +0x42, +0x5f, +0x50, +0x43, +0x49, +0x30, +0x4d, +0x48, +0x50, +0x44, +0x4d, +0x4f, +0x53, +0x54, +0x5f, +0x55, +0x49, +0x44, +0x68, +0x69, +0x6a +}; +static unsigned char ssdt_mem_start[] = { +0x2c +}; +static unsigned char ssdt_mem_end[] = { +0xc7 +}; +static unsigned char ssdt_mem_name[] = { +0x30 +}; diff --git a/hw/i386/ssdt-misc.dsl b/hw/i386/ssdt-misc.dsl index a4484b8176..d329b8ba57 100644 --- a/hw/i386/ssdt-misc.dsl +++ b/hw/i386/ssdt-misc.dsl @@ -12,6 +12,7 @@ * You should have received a copy of the GNU General Public License along * with this program; if not, see <http://www.gnu.org/licenses/>. */ +#include "hw/acpi/pc-hotplug.h" ACPI_EXTRACT_ALL_CODE ssdp_misc_aml @@ -116,4 +117,167 @@ DefinitionBlock ("ssdt-misc.aml", "SSDT", 0x01, "BXPC", "BXSSDTSUSP", 0x1) } } } + + External(MEMORY_SLOT_NOTIFY_METHOD, MethodObj) + Scope(\_SB.PCI0) { + Device(MEMORY_HOPTLUG_DEVICE) { + Name(_HID, "PNP0A06") + Name(_UID, "Memory hotplug resources") + + ACPI_EXTRACT_NAME_DWORD_CONST ssdt_mctrl_nr_slots + Name(MEMORY_SLOTS_NUMBER, 0x12345678) + + /* Memory hotplug IO registers */ + OperationRegion(MEMORY_HOTPLUG_IO_REGION, SystemIO, + ACPI_MEMORY_HOTPLUG_BASE, + ACPI_MEMORY_HOTPLUG_IO_LEN) + + Name(_CRS, ResourceTemplate() { + IO(Decode16, ACPI_MEMORY_HOTPLUG_BASE, ACPI_MEMORY_HOTPLUG_BASE, + 0, ACPI_MEMORY_HOTPLUG_IO_LEN, IO) + }) + + Method(_STA, 0) { + If (LEqual(MEMORY_SLOTS_NUMBER, Zero)) { + Return(0x0) + } + /* present, functioning, decoding, not shown in UI */ + Return(0xB) + } + + Field(MEMORY_HOTPLUG_IO_REGION, DWordAcc, NoLock, Preserve) { + MEMORY_SLOT_ADDR_LOW, 32, // read only + MEMORY_SLOT_ADDR_HIGH, 32, // read only + MEMORY_SLOT_SIZE_LOW, 32, // read only + MEMORY_SLOT_SIZE_HIGH, 32, // read only + MEMORY_SLOT_PROXIMITY, 32, // read only + } + Field(MEMORY_HOTPLUG_IO_REGION, ByteAcc, NoLock, Preserve) { + Offset(20), + MEMORY_SLOT_ENABLED, 1, // 1 if enabled, read only + MEMORY_SLOT_INSERT_EVENT, 1, // (read) 1 if has a insert event. (write) 1 to clear event + } + + Mutex (MEMORY_SLOT_LOCK, 0) + Field (MEMORY_HOTPLUG_IO_REGION, DWordAcc, NoLock, Preserve) { + MEMORY_SLOT_SLECTOR, 32, // DIMM selector, write only + MEMORY_SLOT_OST_EVENT, 32, // _OST event code, write only + MEMORY_SLOT_OST_STATUS, 32, // _OST status code, write only + } + + Method(MEMORY_SLOT_SCAN_METHOD, 0) { + If (LEqual(MEMORY_SLOTS_NUMBER, Zero)) { + Return(Zero) + } + + Store(Zero, Local0) // Mem devs iterrator + Acquire(MEMORY_SLOT_LOCK, 0xFFFF) + while (LLess(Local0, MEMORY_SLOTS_NUMBER)) { + Store(Local0, MEMORY_SLOT_SLECTOR) // select Local0 DIMM + If (LEqual(MEMORY_SLOT_INSERT_EVENT, One)) { // Memory device needs check + MEMORY_SLOT_NOTIFY_METHOD(Local0, 1) + Store(1, MEMORY_SLOT_INSERT_EVENT) + } + // TODO: handle memory eject request + Add(Local0, One, Local0) // goto next DIMM + } + Release(MEMORY_SLOT_LOCK) + Return(One) + } + + Method(MEMORY_SLOT_STATUS_METHOD, 1) { + Store(Zero, Local0) + + Acquire(MEMORY_SLOT_LOCK, 0xFFFF) + Store(ToInteger(Arg0), MEMORY_SLOT_SLECTOR) // select DIMM + + If (LEqual(MEMORY_SLOT_ENABLED, One)) { + Store(0xF, Local0) + } + + Release(MEMORY_SLOT_LOCK) + Return(Local0) + } + + Method(MEMORY_SLOT_CRS_METHOD, 1, Serialized) { + Acquire(MEMORY_SLOT_LOCK, 0xFFFF) + Store(ToInteger(Arg0), MEMORY_SLOT_SLECTOR) // select DIMM + + Name(MR64, ResourceTemplate() { + QWordMemory(ResourceProducer, PosDecode, MinFixed, MaxFixed, + Cacheable, ReadWrite, + 0x0000000000000000, // Address Space Granularity + 0x0000000000000000, // Address Range Minimum + 0xFFFFFFFFFFFFFFFE, // Address Range Maximum + 0x0000000000000000, // Address Translation Offset + 0xFFFFFFFFFFFFFFFF, // Address Length + ,, MW64, AddressRangeMemory, TypeStatic) + }) + + CreateDWordField(MR64, 14, MINL) + CreateDWordField(MR64, 18, MINH) + CreateDWordField(MR64, 38, LENL) + CreateDWordField(MR64, 42, LENH) + CreateDWordField(MR64, 22, MAXL) + CreateDWordField(MR64, 26, MAXH) + + Store(MEMORY_SLOT_ADDR_HIGH, MINH) + Store(MEMORY_SLOT_ADDR_LOW, MINL) + Store(MEMORY_SLOT_SIZE_HIGH, LENH) + Store(MEMORY_SLOT_SIZE_LOW, LENL) + + // 64-bit math: MAX = MIN + LEN - 1 + Add(MINL, LENL, MAXL) + Add(MINH, LENH, MAXH) + If (LLess(MAXL, MINL)) { + Add(MAXH, One, MAXH) + } + If (LLess(MAXL, One)) { + Subtract(MAXH, One, MAXH) + } + Subtract(MAXL, One, MAXL) + + If (LEqual(MAXH, Zero)){ + Name(MR32, ResourceTemplate() { + DWordMemory(ResourceProducer, PosDecode, MinFixed, MaxFixed, + Cacheable, ReadWrite, + 0x00000000, // Address Space Granularity + 0x00000000, // Address Range Minimum + 0xFFFFFFFE, // Address Range Maximum + 0x00000000, // Address Translation Offset + 0xFFFFFFFF, // Address Length + ,, MW32, AddressRangeMemory, TypeStatic) + }) + CreateDWordField(MR32, MW32._MIN, MIN) + CreateDWordField(MR32, MW32._MAX, MAX) + CreateDWordField(MR32, MW32._LEN, LEN) + Store(MINL, MIN) + Store(MAXL, MAX) + Store(LENL, LEN) + + Release(MEMORY_SLOT_LOCK) + Return(MR32) + } + + Release(MEMORY_SLOT_LOCK) + Return(MR64) + } + + Method(MEMORY_SLOT_PROXIMITY_METHOD, 1) { + Acquire(MEMORY_SLOT_LOCK, 0xFFFF) + Store(ToInteger(Arg0), MEMORY_SLOT_SLECTOR) // select DIMM + Store(MEMORY_SLOT_PROXIMITY, Local0) + Release(MEMORY_SLOT_LOCK) + Return(Local0) + } + + Method(MEMORY_SLOT_OST_METHOD, 4) { + Acquire(MEMORY_SLOT_LOCK, 0xFFFF) + Store(ToInteger(Arg0), MEMORY_SLOT_SLECTOR) // select DIMM + Store(Arg1, MEMORY_SLOT_OST_EVENT) + Store(Arg2, MEMORY_SLOT_OST_STATUS) + Release(MEMORY_SLOT_LOCK) + } + } // Device() + } // Scope() } diff --git a/hw/i386/ssdt-misc.hex.generated b/hw/i386/ssdt-misc.hex.generated index 55e3bd2aa6..ba4268a60b 100644 --- a/hw/i386/ssdt-misc.hex.generated +++ b/hw/i386/ssdt-misc.hex.generated @@ -4,6 +4,9 @@ static unsigned char acpi_pci64_length[] = { static unsigned char acpi_s4_pkg[] = { 0x8f }; +static unsigned short ssdt_mctrl_nr_slots[] = { +0x1aa +}; static unsigned char acpi_s3_name[] = { 0x7c }; @@ -18,12 +21,12 @@ static unsigned char ssdp_misc_aml[] = { 0x53, 0x44, 0x54, -0x62, -0x1, +0x7e, +0x4, 0x0, 0x0, 0x1, -0x76, +0x8b, 0x42, 0x58, 0x50, @@ -46,8 +49,8 @@ static unsigned char ssdp_misc_aml[] = { 0x4e, 0x54, 0x4c, -0x23, -0x8, +0x15, +0x11, 0x13, 0x20, 0x10, @@ -367,7 +370,803 @@ static unsigned char ssdp_misc_aml[] = { 0x49, 0x4f, 0x4d, -0x58 +0x58, +0x10, +0x4b, +0x31, +0x5c, +0x2e, +0x5f, +0x53, +0x42, +0x5f, +0x50, +0x43, +0x49, +0x30, +0x5b, +0x82, +0x4d, +0x30, +0x4d, +0x48, +0x50, +0x44, +0x8, +0x5f, +0x48, +0x49, +0x44, +0xd, +0x50, +0x4e, +0x50, +0x30, +0x41, +0x30, +0x36, +0x0, +0x8, +0x5f, +0x55, +0x49, +0x44, +0xd, +0x4d, +0x65, +0x6d, +0x6f, +0x72, +0x79, +0x20, +0x68, +0x6f, +0x74, +0x70, +0x6c, +0x75, +0x67, +0x20, +0x72, +0x65, +0x73, +0x6f, +0x75, +0x72, +0x63, +0x65, +0x73, +0x0, +0x8, +0x4d, +0x44, +0x4e, +0x52, +0xc, +0x78, +0x56, +0x34, +0x12, +0x5b, +0x80, +0x48, +0x50, +0x4d, +0x52, +0x1, +0xb, +0x0, +0xa, +0xa, +0x18, +0x8, +0x5f, +0x43, +0x52, +0x53, +0x11, +0xd, +0xa, +0xa, +0x47, +0x1, +0x0, +0xa, +0x0, +0xa, +0x0, +0x18, +0x79, +0x0, +0x14, +0x13, +0x5f, +0x53, +0x54, +0x41, +0x0, +0xa0, +0x9, +0x93, +0x4d, +0x44, +0x4e, +0x52, +0x0, +0xa4, +0x0, +0xa4, +0xa, +0xb, +0x5b, +0x81, +0x1f, +0x48, +0x50, +0x4d, +0x52, +0x3, +0x4d, +0x52, +0x42, +0x4c, +0x20, +0x4d, +0x52, +0x42, +0x48, +0x20, +0x4d, +0x52, +0x4c, +0x4c, +0x20, +0x4d, +0x52, +0x4c, +0x48, +0x20, +0x4d, +0x50, +0x58, +0x5f, +0x20, +0x5b, +0x81, +0x13, +0x48, +0x50, +0x4d, +0x52, +0x1, +0x0, +0x40, +0xa, +0x4d, +0x45, +0x53, +0x5f, +0x1, +0x4d, +0x49, +0x4e, +0x53, +0x1, +0x5b, +0x1, +0x4d, +0x4c, +0x43, +0x4b, +0x0, +0x5b, +0x81, +0x15, +0x48, +0x50, +0x4d, +0x52, +0x3, +0x4d, +0x53, +0x45, +0x4c, +0x20, +0x4d, +0x4f, +0x45, +0x56, +0x20, +0x4d, +0x4f, +0x53, +0x43, +0x20, +0x14, +0x4a, +0x4, +0x4d, +0x53, +0x43, +0x4e, +0x0, +0xa0, +0x9, +0x93, +0x4d, +0x44, +0x4e, +0x52, +0x0, +0xa4, +0x0, +0x70, +0x0, +0x60, +0x5b, +0x23, +0x4d, +0x4c, +0x43, +0x4b, +0xff, +0xff, +0xa2, +0x25, +0x95, +0x60, +0x4d, +0x44, +0x4e, +0x52, +0x70, +0x60, +0x4d, +0x53, +0x45, +0x4c, +0xa0, +0x13, +0x93, +0x4d, +0x49, +0x4e, +0x53, +0x1, +0x4d, +0x54, +0x46, +0x59, +0x60, +0x1, +0x70, +0x1, +0x4d, +0x49, +0x4e, +0x53, +0x72, +0x60, +0x1, +0x60, +0x5b, +0x27, +0x4d, +0x4c, +0x43, +0x4b, +0xa4, +0x1, +0x14, +0x2d, +0x4d, +0x52, +0x53, +0x54, +0x1, +0x70, +0x0, +0x60, +0x5b, +0x23, +0x4d, +0x4c, +0x43, +0x4b, +0xff, +0xff, +0x70, +0x99, +0x68, +0x0, +0x4d, +0x53, +0x45, +0x4c, +0xa0, +0xb, +0x93, +0x4d, +0x45, +0x53, +0x5f, +0x1, +0x70, +0xa, +0xf, +0x60, +0x5b, +0x27, +0x4d, +0x4c, +0x43, +0x4b, +0xa4, +0x60, +0x14, +0x41, +0x18, +0x4d, +0x43, +0x52, +0x53, +0x9, +0x5b, +0x23, +0x4d, +0x4c, +0x43, +0x4b, +0xff, +0xff, +0x70, +0x99, +0x68, +0x0, +0x4d, +0x53, +0x45, +0x4c, +0x8, +0x4d, +0x52, +0x36, +0x34, +0x11, +0x33, +0xa, +0x30, +0x8a, +0x2b, +0x0, +0x0, +0xc, +0x3, +0x0, +0x0, +0x0, +0x0, +0x0, +0x0, +0x0, +0x0, +0x0, +0x0, +0x0, +0x0, +0x0, +0x0, +0x0, +0x0, +0xfe, +0xff, +0xff, +0xff, +0xff, +0xff, +0xff, +0xff, +0x0, +0x0, +0x0, +0x0, +0x0, +0x0, +0x0, +0x0, +0xff, +0xff, +0xff, +0xff, +0xff, +0xff, +0xff, +0xff, +0x79, +0x0, +0x8a, +0x4d, +0x52, +0x36, +0x34, +0xa, +0xe, +0x4d, +0x49, +0x4e, +0x4c, +0x8a, +0x4d, +0x52, +0x36, +0x34, +0xa, +0x12, +0x4d, +0x49, +0x4e, +0x48, +0x8a, +0x4d, +0x52, +0x36, +0x34, +0xa, +0x26, +0x4c, +0x45, +0x4e, +0x4c, +0x8a, +0x4d, +0x52, +0x36, +0x34, +0xa, +0x2a, +0x4c, +0x45, +0x4e, +0x48, +0x8a, +0x4d, +0x52, +0x36, +0x34, +0xa, +0x16, +0x4d, +0x41, +0x58, +0x4c, +0x8a, +0x4d, +0x52, +0x36, +0x34, +0xa, +0x1a, +0x4d, +0x41, +0x58, +0x48, +0x70, +0x4d, +0x52, +0x42, +0x48, +0x4d, +0x49, +0x4e, +0x48, +0x70, +0x4d, +0x52, +0x42, +0x4c, +0x4d, +0x49, +0x4e, +0x4c, +0x70, +0x4d, +0x52, +0x4c, +0x48, +0x4c, +0x45, +0x4e, +0x48, +0x70, +0x4d, +0x52, +0x4c, +0x4c, +0x4c, +0x45, +0x4e, +0x4c, +0x72, +0x4d, +0x49, +0x4e, +0x4c, +0x4c, +0x45, +0x4e, +0x4c, +0x4d, +0x41, +0x58, +0x4c, +0x72, +0x4d, +0x49, +0x4e, +0x48, +0x4c, +0x45, +0x4e, +0x48, +0x4d, +0x41, +0x58, +0x48, +0xa0, +0x14, +0x95, +0x4d, +0x41, +0x58, +0x4c, +0x4d, +0x49, +0x4e, +0x4c, +0x72, +0x4d, +0x41, +0x58, +0x48, +0x1, +0x4d, +0x41, +0x58, +0x48, +0xa0, +0x11, +0x95, +0x4d, +0x41, +0x58, +0x4c, +0x1, +0x74, +0x4d, +0x41, +0x58, +0x48, +0x1, +0x4d, +0x41, +0x58, +0x48, +0x74, +0x4d, +0x41, +0x58, +0x4c, +0x1, +0x4d, +0x41, +0x58, +0x4c, +0xa0, +0x44, +0x7, +0x93, +0x4d, +0x41, +0x58, +0x48, +0x0, +0x8, +0x4d, +0x52, +0x33, +0x32, +0x11, +0x1f, +0xa, +0x1c, +0x87, +0x17, +0x0, +0x0, +0xc, +0x3, +0x0, +0x0, +0x0, +0x0, +0x0, +0x0, +0x0, +0x0, +0xfe, +0xff, +0xff, +0xff, +0x0, +0x0, +0x0, +0x0, +0xff, +0xff, +0xff, +0xff, +0x79, +0x0, +0x8a, +0x4d, +0x52, +0x33, +0x32, +0xa, +0xa, +0x4d, +0x49, +0x4e, +0x5f, +0x8a, +0x4d, +0x52, +0x33, +0x32, +0xa, +0xe, +0x4d, +0x41, +0x58, +0x5f, +0x8a, +0x4d, +0x52, +0x33, +0x32, +0xa, +0x16, +0x4c, +0x45, +0x4e, +0x5f, +0x70, +0x4d, +0x49, +0x4e, +0x4c, +0x4d, +0x49, +0x4e, +0x5f, +0x70, +0x4d, +0x41, +0x58, +0x4c, +0x4d, +0x41, +0x58, +0x5f, +0x70, +0x4c, +0x45, +0x4e, +0x4c, +0x4c, +0x45, +0x4e, +0x5f, +0x5b, +0x27, +0x4d, +0x4c, +0x43, +0x4b, +0xa4, +0x4d, +0x52, +0x33, +0x32, +0x5b, +0x27, +0x4d, +0x4c, +0x43, +0x4b, +0xa4, +0x4d, +0x52, +0x36, +0x34, +0x14, +0x24, +0x4d, +0x50, +0x58, +0x4d, +0x1, +0x5b, +0x23, +0x4d, +0x4c, +0x43, +0x4b, +0xff, +0xff, +0x70, +0x99, +0x68, +0x0, +0x4d, +0x53, +0x45, +0x4c, +0x70, +0x4d, +0x50, +0x58, +0x5f, +0x60, +0x5b, +0x27, +0x4d, +0x4c, +0x43, +0x4b, +0xa4, +0x60, +0x14, +0x28, +0x4d, +0x4f, +0x53, +0x54, +0x4, +0x5b, +0x23, +0x4d, +0x4c, +0x43, +0x4b, +0xff, +0xff, +0x70, +0x99, +0x68, +0x0, +0x4d, +0x53, +0x45, +0x4c, +0x70, +0x69, +0x4d, +0x4f, +0x45, +0x56, +0x70, +0x6a, +0x4d, +0x4f, +0x53, +0x43, +0x5b, +0x27, +0x4d, +0x4c, +0x43, +0x4b }; static unsigned char ssdt_isa_pest[] = { 0xd0 diff --git a/hw/i386/ssdt-pcihp.hex.generated b/hw/i386/ssdt-pcihp.hex.generated index b599b4663c..72ffa84800 100644 --- a/hw/i386/ssdt-pcihp.hex.generated +++ b/hw/i386/ssdt-pcihp.hex.generated @@ -32,7 +32,7 @@ static unsigned char ssdp_pcihp_aml[] = { 0x0, 0x0, 0x1, -0x6b, +0x70, 0x42, 0x58, 0x50, @@ -55,8 +55,8 @@ static unsigned char ssdp_pcihp_aml[] = { 0x4e, 0x54, 0x4c, -0x23, -0x8, +0x15, +0x11, 0x13, 0x20, 0x10, diff --git a/hw/i386/ssdt-proc.hex.generated b/hw/i386/ssdt-proc.hex.generated index 97e28d4820..4df0734c79 100644 --- a/hw/i386/ssdt-proc.hex.generated +++ b/hw/i386/ssdt-proc.hex.generated @@ -11,7 +11,7 @@ static unsigned char ssdp_proc_aml[] = { 0x0, 0x0, 0x1, -0x78, +0x7d, 0x42, 0x58, 0x50, @@ -34,8 +34,8 @@ static unsigned char ssdp_proc_aml[] = { 0x4e, 0x54, 0x4c, -0x23, -0x8, +0x15, +0x11, 0x13, 0x20, 0x5b, diff --git a/hw/intc/armv7m_nvic.c b/hw/intc/armv7m_nvic.c index 75d9c6e41e..1a7af450a7 100644 --- a/hw/intc/armv7m_nvic.c +++ b/hw/intc/armv7m_nvic.c @@ -211,7 +211,7 @@ static uint32_t nvic_readl(nvic_state *s, uint32_t offset) cpu = ARM_CPU(current_cpu); return cpu->env.v7m.vecbase; case 0xd0c: /* Application Interrupt/Reset Control. */ - return 0xfa05000; + return 0xfa050000; case 0xd10: /* System Control. */ /* TODO: Implement SLEEPONEXIT. */ return 0; @@ -346,6 +346,9 @@ static void nvic_writel(nvic_state *s, uint32_t offset, uint32_t value) if (value & 5) { qemu_log_mask(LOG_UNIMP, "AIRCR system reset unimplemented\n"); } + if (value & 0x700) { + qemu_log_mask(LOG_UNIMP, "PRIGROUP unimplemented\n"); + } } break; case 0xd10: /* System Control. */ diff --git a/hw/isa/lpc_ich9.c b/hw/isa/lpc_ich9.c index 97f69d6001..b846d81990 100644 --- a/hw/isa/lpc_ich9.c +++ b/hw/isa/lpc_ich9.c @@ -563,7 +563,14 @@ static void ich9_lpc_add_properties(ICH9LPCState *lpc) ich9_pm_add_properties(OBJECT(lpc), &lpc->pm, NULL); } -static int ich9_lpc_initfn(PCIDevice *d) +static void ich9_lpc_initfn(Object *obj) +{ + ICH9LPCState *lpc = ICH9_LPC_DEVICE(obj); + + ich9_lpc_add_properties(lpc); +} + +static int ich9_lpc_init(PCIDevice *d) { ICH9LPCState *lpc = ICH9_LPC_DEVICE(d); ISABus *isa_bus; @@ -589,10 +596,22 @@ static int ich9_lpc_initfn(PCIDevice *d) memory_region_add_subregion_overlap(pci_address_space_io(d), ICH9_RST_CNT_IOPORT, &lpc->rst_cnt_mem, 1); + return 0; +} - ich9_lpc_add_properties(lpc); +static void ich9_device_plug_cb(HotplugHandler *hotplug_dev, + DeviceState *dev, Error **errp) +{ + ICH9LPCState *lpc = ICH9_LPC_DEVICE(hotplug_dev); - return 0; + ich9_pm_device_plug_cb(&lpc->pm, dev, errp); +} + +static void ich9_device_unplug_cb(HotplugHandler *hotplug_dev, + DeviceState *dev, Error **errp) +{ + error_setg(errp, "acpi: device unplug request for not supported device" + " type: %s", object_get_typename(OBJECT(dev))); } static bool ich9_rst_cnt_needed(void *opaque) @@ -638,10 +657,12 @@ static void ich9_lpc_class_init(ObjectClass *klass, void *data) { DeviceClass *dc = DEVICE_CLASS(klass); PCIDeviceClass *k = PCI_DEVICE_CLASS(klass); + HotplugHandlerClass *hc = HOTPLUG_HANDLER_CLASS(klass); + AcpiDeviceIfClass *adevc = ACPI_DEVICE_IF_CLASS(klass); set_bit(DEVICE_CATEGORY_BRIDGE, dc->categories); dc->reset = ich9_lpc_reset; - k->init = ich9_lpc_initfn; + k->init = ich9_lpc_init; dc->vmsd = &vmstate_ich9_lpc; k->config_write = ich9_lpc_config_write; dc->desc = "ICH9 LPC bridge"; @@ -654,13 +675,22 @@ static void ich9_lpc_class_init(ObjectClass *klass, void *data) * pc_q35_init() */ dc->cannot_instantiate_with_device_add_yet = true; + hc->plug = ich9_device_plug_cb; + hc->unplug = ich9_device_unplug_cb; + adevc->ospm_status = ich9_pm_ospm_status; } static const TypeInfo ich9_lpc_info = { .name = TYPE_ICH9_LPC_DEVICE, .parent = TYPE_PCI_DEVICE, .instance_size = sizeof(struct ICH9LPCState), + .instance_init = ich9_lpc_initfn, .class_init = ich9_lpc_class_init, + .interfaces = (InterfaceInfo[]) { + { TYPE_HOTPLUG_HANDLER }, + { TYPE_ACPI_DEVICE_IF }, + { } + } }; static void ich9_lpc_register(void) diff --git a/hw/mem/Makefile.objs b/hw/mem/Makefile.objs new file mode 100644 index 0000000000..b000fb42bf --- /dev/null +++ b/hw/mem/Makefile.objs @@ -0,0 +1 @@ +common-obj-$(CONFIG_MEM_HOTPLUG) += pc-dimm.o diff --git a/hw/mem/pc-dimm.c b/hw/mem/pc-dimm.c new file mode 100644 index 0000000000..ad176b700b --- /dev/null +++ b/hw/mem/pc-dimm.c @@ -0,0 +1,281 @@ +/* + * Dimm device for Memory Hotplug + * + * Copyright ProfitBricks GmbH 2012 + * Copyright (C) 2014 Red Hat Inc + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2 of the License, or (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library; if not, see <http://www.gnu.org/licenses/> + */ + +#include "hw/mem/pc-dimm.h" +#include "qemu/config-file.h" +#include "qapi/visitor.h" +#include "qemu/range.h" + +int qmp_pc_dimm_device_list(Object *obj, void *opaque) +{ + MemoryDeviceInfoList ***prev = opaque; + + if (object_dynamic_cast(obj, TYPE_PC_DIMM)) { + DeviceState *dev = DEVICE(obj); + + if (dev->realized) { + MemoryDeviceInfoList *elem = g_new0(MemoryDeviceInfoList, 1); + MemoryDeviceInfo *info = g_new0(MemoryDeviceInfo, 1); + PCDIMMDeviceInfo *di = g_new0(PCDIMMDeviceInfo, 1); + DeviceClass *dc = DEVICE_GET_CLASS(obj); + PCDIMMDevice *dimm = PC_DIMM(obj); + + if (dev->id) { + di->has_id = true; + di->id = g_strdup(dev->id); + } + di->hotplugged = dev->hotplugged; + di->hotpluggable = dc->hotpluggable; + di->addr = dimm->addr; + di->slot = dimm->slot; + di->node = dimm->node; + di->size = object_property_get_int(OBJECT(dimm), PC_DIMM_SIZE_PROP, + NULL); + di->memdev = object_get_canonical_path(OBJECT(dimm->hostmem)); + + info->dimm = di; + elem->value = info; + elem->next = NULL; + **prev = elem; + *prev = &elem->next; + } + } + + object_child_foreach(obj, qmp_pc_dimm_device_list, opaque); + return 0; +} + +static int pc_dimm_slot2bitmap(Object *obj, void *opaque) +{ + unsigned long *bitmap = opaque; + + if (object_dynamic_cast(obj, TYPE_PC_DIMM)) { + DeviceState *dev = DEVICE(obj); + if (dev->realized) { /* count only realized DIMMs */ + PCDIMMDevice *d = PC_DIMM(obj); + set_bit(d->slot, bitmap); + } + } + + object_child_foreach(obj, pc_dimm_slot2bitmap, opaque); + return 0; +} + +int pc_dimm_get_free_slot(const int *hint, int max_slots, Error **errp) +{ + unsigned long *bitmap = bitmap_new(max_slots); + int slot = 0; + + object_child_foreach(qdev_get_machine(), pc_dimm_slot2bitmap, bitmap); + + /* check if requested slot is not occupied */ + if (hint) { + if (*hint >= max_slots) { + error_setg(errp, "invalid slot# %d, should be less than %d", + *hint, max_slots); + } else if (!test_bit(*hint, bitmap)) { + slot = *hint; + } else { + error_setg(errp, "slot %d is busy", *hint); + } + goto out; + } + + /* search for free slot */ + slot = find_first_zero_bit(bitmap, max_slots); + if (slot == max_slots) { + error_setg(errp, "no free slots available"); + } +out: + g_free(bitmap); + return slot; +} + +static gint pc_dimm_addr_sort(gconstpointer a, gconstpointer b) +{ + PCDIMMDevice *x = PC_DIMM(a); + PCDIMMDevice *y = PC_DIMM(b); + Int128 diff = int128_sub(int128_make64(x->addr), int128_make64(y->addr)); + + if (int128_lt(diff, int128_zero())) { + return -1; + } else if (int128_gt(diff, int128_zero())) { + return 1; + } + return 0; +} + +static int pc_dimm_built_list(Object *obj, void *opaque) +{ + GSList **list = opaque; + + if (object_dynamic_cast(obj, TYPE_PC_DIMM)) { + DeviceState *dev = DEVICE(obj); + if (dev->realized) { /* only realized DIMMs matter */ + *list = g_slist_insert_sorted(*list, dev, pc_dimm_addr_sort); + } + } + + object_child_foreach(obj, pc_dimm_built_list, opaque); + return 0; +} + +uint64_t pc_dimm_get_free_addr(uint64_t address_space_start, + uint64_t address_space_size, + uint64_t *hint, uint64_t size, + Error **errp) +{ + GSList *list = NULL, *item; + uint64_t new_addr, ret = 0; + uint64_t address_space_end = address_space_start + address_space_size; + + assert(address_space_end > address_space_size); + object_child_foreach(qdev_get_machine(), pc_dimm_built_list, &list); + + if (hint) { + new_addr = *hint; + } else { + new_addr = address_space_start; + } + + /* find address range that will fit new DIMM */ + for (item = list; item; item = g_slist_next(item)) { + PCDIMMDevice *dimm = item->data; + uint64_t dimm_size = object_property_get_int(OBJECT(dimm), + PC_DIMM_SIZE_PROP, + errp); + if (errp && *errp) { + goto out; + } + + if (ranges_overlap(dimm->addr, dimm_size, new_addr, size)) { + if (hint) { + DeviceState *d = DEVICE(dimm); + error_setg(errp, "address range conflicts with '%s'", d->id); + goto out; + } + new_addr = dimm->addr + dimm_size; + } + } + ret = new_addr; + + if (new_addr < address_space_start) { + error_setg(errp, "can't add memory [0x%" PRIx64 ":0x%" PRIx64 + "] at 0x%" PRIx64, new_addr, size, address_space_start); + } else if ((new_addr + size) > address_space_end) { + error_setg(errp, "can't add memory [0x%" PRIx64 ":0x%" PRIx64 + "] beyond 0x%" PRIx64, new_addr, size, address_space_end); + } + +out: + g_slist_free(list); + return ret; +} + +static Property pc_dimm_properties[] = { + DEFINE_PROP_UINT64(PC_DIMM_ADDR_PROP, PCDIMMDevice, addr, 0), + DEFINE_PROP_UINT32(PC_DIMM_NODE_PROP, PCDIMMDevice, node, 0), + DEFINE_PROP_INT32(PC_DIMM_SLOT_PROP, PCDIMMDevice, slot, + PC_DIMM_UNASSIGNED_SLOT), + DEFINE_PROP_END_OF_LIST(), +}; + +static void pc_dimm_get_size(Object *obj, Visitor *v, void *opaque, + const char *name, Error **errp) +{ + int64_t value; + MemoryRegion *mr; + PCDIMMDevice *dimm = PC_DIMM(obj); + + mr = host_memory_backend_get_memory(dimm->hostmem, errp); + value = memory_region_size(mr); + + visit_type_int(v, &value, name, errp); +} + +static void pc_dimm_check_memdev_is_busy(Object *obj, const char *name, + Object *val, Error **errp) +{ + MemoryRegion *mr; + + mr = host_memory_backend_get_memory(MEMORY_BACKEND(val), errp); + if (memory_region_is_mapped(mr)) { + char *path = object_get_canonical_path_component(val); + error_setg(errp, "can't use already busy memdev: %s", path); + g_free(path); + } else { + qdev_prop_allow_set_link_before_realize(obj, name, val, errp); + } +} + +static void pc_dimm_init(Object *obj) +{ + PCDIMMDevice *dimm = PC_DIMM(obj); + + object_property_add(obj, PC_DIMM_SIZE_PROP, "int", pc_dimm_get_size, + NULL, NULL, NULL, &error_abort); + object_property_add_link(obj, PC_DIMM_MEMDEV_PROP, TYPE_MEMORY_BACKEND, + (Object **)&dimm->hostmem, + pc_dimm_check_memdev_is_busy, + OBJ_PROP_LINK_UNREF_ON_RELEASE, + &error_abort); +} + +static void pc_dimm_realize(DeviceState *dev, Error **errp) +{ + PCDIMMDevice *dimm = PC_DIMM(dev); + + if (!dimm->hostmem) { + error_setg(errp, "'" PC_DIMM_MEMDEV_PROP "' property is not set"); + return; + } +} + +static MemoryRegion *pc_dimm_get_memory_region(PCDIMMDevice *dimm) +{ + return host_memory_backend_get_memory(dimm->hostmem, &error_abort); +} + +static void pc_dimm_class_init(ObjectClass *oc, void *data) +{ + DeviceClass *dc = DEVICE_CLASS(oc); + PCDIMMDeviceClass *ddc = PC_DIMM_CLASS(oc); + + dc->realize = pc_dimm_realize; + dc->props = pc_dimm_properties; + + ddc->get_memory_region = pc_dimm_get_memory_region; +} + +static TypeInfo pc_dimm_info = { + .name = TYPE_PC_DIMM, + .parent = TYPE_DEVICE, + .instance_size = sizeof(PCDIMMDevice), + .instance_init = pc_dimm_init, + .class_init = pc_dimm_class_init, + .class_size = sizeof(PCDIMMDeviceClass), +}; + +static void pc_dimm_register_types(void) +{ + type_register_static(&pc_dimm_info); +} + +type_init(pc_dimm_register_types) diff --git a/hw/mips/mips_malta.c b/hw/mips/mips_malta.c index 91b0ce5661..2868ee5b03 100644 --- a/hw/mips/mips_malta.c +++ b/hw/mips/mips_malta.c @@ -1143,7 +1143,7 @@ void mips_malta_init(MachineState *machine) pci_piix4_ide_init(pci_bus, hd, piix4_devfn + 1); pci_create_simple(pci_bus, piix4_devfn + 2, "piix4-usb-uhci"); smbus = piix4_pm_init(pci_bus, piix4_devfn + 3, 0x1100, - isa_get_irq(NULL, 9), NULL, 0, NULL); + isa_get_irq(NULL, 9), NULL, 0, NULL, NULL); smbus_eeprom_init(smbus, 8, smbus_eeprom_buf, smbus_eeprom_size); g_free(smbus_eeprom_buf); pit = pit_init(isa_bus, 0x40, 0, NULL); diff --git a/hw/net/vhost_net.c b/hw/net/vhost_net.c index a1de2f43a0..7ac7c21bdb 100644 --- a/hw/net/vhost_net.c +++ b/hw/net/vhost_net.c @@ -15,6 +15,7 @@ #include "net/net.h" #include "net/tap.h" +#include "net/vhost-user.h" #include "hw/virtio/virtio-net.h" #include "net/vhost_net.h" @@ -27,7 +28,6 @@ #include <sys/socket.h> #include <linux/kvm.h> #include <fcntl.h> -#include <sys/ioctl.h> #include <linux/virtio_ring.h> #include <netpacket/packet.h> #include <net/ethernet.h> @@ -46,39 +46,76 @@ struct vhost_net { NetClientState *nc; }; -unsigned vhost_net_get_features(struct vhost_net *net, unsigned features) +/* Features supported by host kernel. */ +static const int kernel_feature_bits[] = { + VIRTIO_F_NOTIFY_ON_EMPTY, + VIRTIO_RING_F_INDIRECT_DESC, + VIRTIO_RING_F_EVENT_IDX, + VIRTIO_NET_F_MRG_RXBUF, + VHOST_INVALID_FEATURE_BIT +}; + +/* Features supported by others. */ +const int user_feature_bits[] = { + VIRTIO_F_NOTIFY_ON_EMPTY, + VIRTIO_RING_F_INDIRECT_DESC, + VIRTIO_RING_F_EVENT_IDX, + + VIRTIO_F_ANY_LAYOUT, + VIRTIO_NET_F_CSUM, + VIRTIO_NET_F_GUEST_CSUM, + VIRTIO_NET_F_GSO, + VIRTIO_NET_F_GUEST_TSO4, + VIRTIO_NET_F_GUEST_TSO6, + VIRTIO_NET_F_GUEST_ECN, + VIRTIO_NET_F_GUEST_UFO, + VIRTIO_NET_F_HOST_TSO4, + VIRTIO_NET_F_HOST_TSO6, + VIRTIO_NET_F_HOST_ECN, + VIRTIO_NET_F_HOST_UFO, + VIRTIO_NET_F_MRG_RXBUF, + VIRTIO_NET_F_STATUS, + VIRTIO_NET_F_CTRL_VQ, + VIRTIO_NET_F_CTRL_RX, + VIRTIO_NET_F_CTRL_VLAN, + VIRTIO_NET_F_CTRL_RX_EXTRA, + VIRTIO_NET_F_CTRL_MAC_ADDR, + VIRTIO_NET_F_CTRL_GUEST_OFFLOADS, + + VIRTIO_NET_F_MQ, + + VHOST_INVALID_FEATURE_BIT +}; + +static const int *vhost_net_get_feature_bits(struct vhost_net *net) { - /* Clear features not supported by host kernel. */ - if (!(net->dev.features & (1 << VIRTIO_F_NOTIFY_ON_EMPTY))) { - features &= ~(1 << VIRTIO_F_NOTIFY_ON_EMPTY); - } - if (!(net->dev.features & (1 << VIRTIO_RING_F_INDIRECT_DESC))) { - features &= ~(1 << VIRTIO_RING_F_INDIRECT_DESC); - } - if (!(net->dev.features & (1 << VIRTIO_RING_F_EVENT_IDX))) { - features &= ~(1 << VIRTIO_RING_F_EVENT_IDX); - } - if (!(net->dev.features & (1 << VIRTIO_NET_F_MRG_RXBUF))) { - features &= ~(1 << VIRTIO_NET_F_MRG_RXBUF); + const int *feature_bits = 0; + + switch (net->nc->info->type) { + case NET_CLIENT_OPTIONS_KIND_TAP: + feature_bits = kernel_feature_bits; + break; + case NET_CLIENT_OPTIONS_KIND_VHOST_USER: + feature_bits = user_feature_bits; + break; + default: + error_report("Feature bits not defined for this type: %d", + net->nc->info->type); + break; } - return features; + + return feature_bits; +} + +unsigned vhost_net_get_features(struct vhost_net *net, unsigned features) +{ + return vhost_get_features(&net->dev, vhost_net_get_feature_bits(net), + features); } void vhost_net_ack_features(struct vhost_net *net, unsigned features) { - net->dev.acked_features = net->dev.backend_features; - if (features & (1 << VIRTIO_F_NOTIFY_ON_EMPTY)) { - net->dev.acked_features |= (1 << VIRTIO_F_NOTIFY_ON_EMPTY); - } - if (features & (1 << VIRTIO_RING_F_INDIRECT_DESC)) { - net->dev.acked_features |= (1 << VIRTIO_RING_F_INDIRECT_DESC); - } - if (features & (1 << VIRTIO_RING_F_EVENT_IDX)) { - net->dev.acked_features |= (1 << VIRTIO_RING_F_EVENT_IDX); - } - if (features & (1 << VIRTIO_NET_F_MRG_RXBUF)) { - net->dev.acked_features |= (1 << VIRTIO_NET_F_MRG_RXBUF); - } + vhost_ack_features(&net->dev, vhost_net_get_feature_bits(net), features); } static int vhost_net_get_fd(NetClientState *backend) @@ -92,42 +129,52 @@ static int vhost_net_get_fd(NetClientState *backend) } } -struct vhost_net *vhost_net_init(NetClientState *backend, int devfd, - bool force) +struct vhost_net *vhost_net_init(VhostNetOptions *options) { int r; + bool backend_kernel = options->backend_type == VHOST_BACKEND_TYPE_KERNEL; struct vhost_net *net = g_malloc(sizeof *net); - if (!backend) { - fprintf(stderr, "vhost-net requires backend to be setup\n"); + + if (!options->net_backend) { + fprintf(stderr, "vhost-net requires net backend to be setup\n"); goto fail; } - r = vhost_net_get_fd(backend); - if (r < 0) { - goto fail; + + if (backend_kernel) { + r = vhost_net_get_fd(options->net_backend); + if (r < 0) { + goto fail; + } + net->dev.backend_features = qemu_has_vnet_hdr(options->net_backend) + ? 0 : (1 << VHOST_NET_F_VIRTIO_NET_HDR); + net->backend = r; + } else { + net->dev.backend_features = 0; + net->backend = -1; } - net->nc = backend; - net->dev.backend_features = qemu_has_vnet_hdr(backend) ? 0 : - (1 << VHOST_NET_F_VIRTIO_NET_HDR); - net->backend = r; + net->nc = options->net_backend; net->dev.nvqs = 2; net->dev.vqs = net->vqs; - r = vhost_dev_init(&net->dev, devfd, "/dev/vhost-net", force); + r = vhost_dev_init(&net->dev, options->opaque, + options->backend_type, options->force); if (r < 0) { goto fail; } - if (!qemu_has_vnet_hdr_len(backend, + if (!qemu_has_vnet_hdr_len(options->net_backend, sizeof(struct virtio_net_hdr_mrg_rxbuf))) { net->dev.features &= ~(1 << VIRTIO_NET_F_MRG_RXBUF); } - if (~net->dev.features & net->dev.backend_features) { - fprintf(stderr, "vhost lacks feature mask %" PRIu64 " for backend\n", - (uint64_t)(~net->dev.features & net->dev.backend_features)); - vhost_dev_cleanup(&net->dev); - goto fail; + if (backend_kernel) { + if (~net->dev.features & net->dev.backend_features) { + fprintf(stderr, "vhost lacks feature mask %" PRIu64 + " for backend\n", + (uint64_t)(~net->dev.features & net->dev.backend_features)); + vhost_dev_cleanup(&net->dev); + goto fail; + } } - /* Set sane init value. Override when guest acks. */ vhost_net_ack_features(net, 0); return net; @@ -166,24 +213,37 @@ static int vhost_net_start_one(struct vhost_net *net, goto fail_start; } - net->nc->info->poll(net->nc, false); - qemu_set_fd_handler(net->backend, NULL, NULL, NULL); - file.fd = net->backend; - for (file.index = 0; file.index < net->dev.nvqs; ++file.index) { - r = ioctl(net->dev.control, VHOST_NET_SET_BACKEND, &file); - if (r < 0) { - r = -errno; - goto fail; + if (net->nc->info->poll) { + net->nc->info->poll(net->nc, false); + } + + if (net->nc->info->type == NET_CLIENT_OPTIONS_KIND_TAP) { + qemu_set_fd_handler(net->backend, NULL, NULL, NULL); + file.fd = net->backend; + for (file.index = 0; file.index < net->dev.nvqs; ++file.index) { + const VhostOps *vhost_ops = net->dev.vhost_ops; + r = vhost_ops->vhost_call(&net->dev, VHOST_NET_SET_BACKEND, + &file); + if (r < 0) { + r = -errno; + goto fail; + } } } return 0; fail: file.fd = -1; - while (file.index-- > 0) { - int r = ioctl(net->dev.control, VHOST_NET_SET_BACKEND, &file); - assert(r >= 0); + if (net->nc->info->type == NET_CLIENT_OPTIONS_KIND_TAP) { + while (file.index-- > 0) { + const VhostOps *vhost_ops = net->dev.vhost_ops; + int r = vhost_ops->vhost_call(&net->dev, VHOST_NET_SET_BACKEND, + &file); + assert(r >= 0); + } + } + if (net->nc->info->poll) { + net->nc->info->poll(net->nc, true); } - net->nc->info->poll(net->nc, true); vhost_dev_stop(&net->dev, dev); fail_start: vhost_dev_disable_notifiers(&net->dev, dev); @@ -200,11 +260,17 @@ static void vhost_net_stop_one(struct vhost_net *net, return; } - for (file.index = 0; file.index < net->dev.nvqs; ++file.index) { - int r = ioctl(net->dev.control, VHOST_NET_SET_BACKEND, &file); - assert(r >= 0); + if (net->nc->info->type == NET_CLIENT_OPTIONS_KIND_TAP) { + for (file.index = 0; file.index < net->dev.nvqs; ++file.index) { + const VhostOps *vhost_ops = net->dev.vhost_ops; + int r = vhost_ops->vhost_call(&net->dev, VHOST_NET_SET_BACKEND, + &file); + assert(r >= 0); + } + } + if (net->nc->info->poll) { + net->nc->info->poll(net->nc, true); } - net->nc->info->poll(net->nc, true); vhost_dev_stop(&net->dev, dev); vhost_dev_disable_notifiers(&net->dev, dev); } @@ -224,7 +290,7 @@ int vhost_net_start(VirtIODevice *dev, NetClientState *ncs, } for (i = 0; i < total_queues; i++) { - r = vhost_net_start_one(tap_get_vhost_net(ncs[i].peer), dev, i * 2); + r = vhost_net_start_one(get_vhost_net(ncs[i].peer), dev, i * 2); if (r < 0) { goto err; @@ -241,7 +307,7 @@ int vhost_net_start(VirtIODevice *dev, NetClientState *ncs, err: while (--i >= 0) { - vhost_net_stop_one(tap_get_vhost_net(ncs[i].peer), dev); + vhost_net_stop_one(get_vhost_net(ncs[i].peer), dev); } return r; } @@ -262,7 +328,7 @@ void vhost_net_stop(VirtIODevice *dev, NetClientState *ncs, assert(r >= 0); for (i = 0; i < total_queues; i++) { - vhost_net_stop_one(tap_get_vhost_net(ncs[i].peer), dev); + vhost_net_stop_one(get_vhost_net(ncs[i].peer), dev); } } @@ -282,9 +348,30 @@ void vhost_net_virtqueue_mask(VHostNetState *net, VirtIODevice *dev, { vhost_virtqueue_mask(&net->dev, dev, idx, mask); } + +VHostNetState *get_vhost_net(NetClientState *nc) +{ + VHostNetState *vhost_net = 0; + + if (!nc) { + return 0; + } + + switch (nc->info->type) { + case NET_CLIENT_OPTIONS_KIND_TAP: + vhost_net = tap_get_vhost_net(nc); + break; + case NET_CLIENT_OPTIONS_KIND_VHOST_USER: + vhost_net = vhost_user_get_vhost_net(nc); + break; + default: + break; + } + + return vhost_net; +} #else -struct vhost_net *vhost_net_init(NetClientState *backend, int devfd, - bool force) +struct vhost_net *vhost_net_init(VhostNetOptions *options) { error_report("vhost-net support is not compiled in"); return NULL; @@ -328,4 +415,9 @@ void vhost_net_virtqueue_mask(VHostNetState *net, VirtIODevice *dev, int idx, bool mask) { } + +VHostNetState *get_vhost_net(NetClientState *nc) +{ + return 0; +} #endif diff --git a/hw/net/virtio-net.c b/hw/net/virtio-net.c index 940a7cfe54..d8588f3808 100644 --- a/hw/net/virtio-net.c +++ b/hw/net/virtio-net.c @@ -99,20 +99,23 @@ static bool virtio_net_started(VirtIONet *n, uint8_t status) (n->status & VIRTIO_NET_S_LINK_UP) && vdev->vm_running; } +static void virtio_net_announce_timer(void *opaque) +{ + VirtIONet *n = opaque; + VirtIODevice *vdev = VIRTIO_DEVICE(n); + + n->announce_counter--; + n->status |= VIRTIO_NET_S_ANNOUNCE; + virtio_notify_config(vdev); +} + static void virtio_net_vhost_status(VirtIONet *n, uint8_t status) { VirtIODevice *vdev = VIRTIO_DEVICE(n); NetClientState *nc = qemu_get_queue(n->nic); int queues = n->multiqueue ? n->max_queues : 1; - if (!nc->peer) { - return; - } - if (nc->peer->info->type != NET_CLIENT_OPTIONS_KIND_TAP) { - return; - } - - if (!tap_get_vhost_net(nc->peer)) { + if (!get_vhost_net(nc->peer)) { return; } @@ -122,7 +125,7 @@ static void virtio_net_vhost_status(VirtIONet *n, uint8_t status) } if (!n->vhost_started) { int r; - if (!vhost_net_query(tap_get_vhost_net(nc->peer), vdev)) { + if (!vhost_net_query(get_vhost_net(nc->peer), vdev)) { return; } n->vhost_started = 1; @@ -322,6 +325,9 @@ static void virtio_net_reset(VirtIODevice *vdev) n->nobcast = 0; /* multiqueue is disabled by default */ n->curr_queues = 1; + timer_del(n->announce_timer); + n->announce_counter = 0; + n->status &= ~VIRTIO_NET_S_ANNOUNCE; /* Flush any MAC and VLAN filter table state */ n->mac_table.in_use = 0; @@ -452,13 +458,10 @@ static uint32_t virtio_net_get_features(VirtIODevice *vdev, uint32_t features) features &= ~(0x1 << VIRTIO_NET_F_HOST_UFO); } - if (!nc->peer || nc->peer->info->type != NET_CLIENT_OPTIONS_KIND_TAP) { + if (!get_vhost_net(nc->peer)) { return features; } - if (!tap_get_vhost_net(nc->peer)) { - return features; - } - return vhost_net_get_features(tap_get_vhost_net(nc->peer), features); + return vhost_net_get_features(get_vhost_net(nc->peer), features); } static uint32_t virtio_net_bad_features(VirtIODevice *vdev) @@ -522,13 +525,10 @@ static void virtio_net_set_features(VirtIODevice *vdev, uint32_t features) for (i = 0; i < n->max_queues; i++) { NetClientState *nc = qemu_get_subqueue(n->nic, i); - if (!nc->peer || nc->peer->info->type != NET_CLIENT_OPTIONS_KIND_TAP) { - continue; - } - if (!tap_get_vhost_net(nc->peer)) { + if (!get_vhost_net(nc->peer)) { continue; } - vhost_net_ack_features(tap_get_vhost_net(nc->peer), features); + vhost_net_ack_features(get_vhost_net(nc->peer), features); } if ((1 << VIRTIO_NET_F_CTRL_VLAN) & features) { @@ -731,6 +731,23 @@ static int virtio_net_handle_vlan_table(VirtIONet *n, uint8_t cmd, return VIRTIO_NET_OK; } +static int virtio_net_handle_announce(VirtIONet *n, uint8_t cmd, + struct iovec *iov, unsigned int iov_cnt) +{ + if (cmd == VIRTIO_NET_CTRL_ANNOUNCE_ACK && + n->status & VIRTIO_NET_S_ANNOUNCE) { + n->status &= ~VIRTIO_NET_S_ANNOUNCE; + if (n->announce_counter) { + timer_mod(n->announce_timer, + qemu_clock_get_ms(QEMU_CLOCK_VIRTUAL) + + self_announce_delay(n->announce_counter)); + } + return VIRTIO_NET_OK; + } else { + return VIRTIO_NET_ERR; + } +} + static int virtio_net_handle_mq(VirtIONet *n, uint8_t cmd, struct iovec *iov, unsigned int iov_cnt) { @@ -794,6 +811,8 @@ static void virtio_net_handle_ctrl(VirtIODevice *vdev, VirtQueue *vq) status = virtio_net_handle_mac(n, ctrl.cmd, iov, iov_cnt); } else if (ctrl.class == VIRTIO_NET_CTRL_VLAN) { status = virtio_net_handle_vlan_table(n, ctrl.cmd, iov, iov_cnt); + } else if (ctrl.class == VIRTIO_NET_CTRL_ANNOUNCE) { + status = virtio_net_handle_announce(n, ctrl.cmd, iov, iov_cnt); } else if (ctrl.class == VIRTIO_NET_CTRL_MQ) { status = virtio_net_handle_mq(n, ctrl.cmd, iov, iov_cnt); } else if (ctrl.class == VIRTIO_NET_CTRL_GUEST_OFFLOADS) { @@ -1451,6 +1470,12 @@ static int virtio_net_load(QEMUFile *f, void *opaque, int version_id) qemu_get_subqueue(n->nic, i)->link_down = link_down; } + if (vdev->guest_features & (0x1 << VIRTIO_NET_F_GUEST_ANNOUNCE) && + vdev->guest_features & (0x1 << VIRTIO_NET_F_CTRL_VQ)) { + n->announce_counter = SELF_ANNOUNCE_ROUNDS; + timer_mod(n->announce_timer, qemu_clock_get_ms(QEMU_CLOCK_VIRTUAL)); + } + return 0; } @@ -1476,7 +1501,7 @@ static bool virtio_net_guest_notifier_pending(VirtIODevice *vdev, int idx) VirtIONet *n = VIRTIO_NET(vdev); NetClientState *nc = qemu_get_subqueue(n->nic, vq2q(idx)); assert(n->vhost_started); - return vhost_net_virtqueue_pending(tap_get_vhost_net(nc->peer), idx); + return vhost_net_virtqueue_pending(get_vhost_net(nc->peer), idx); } static void virtio_net_guest_notifier_mask(VirtIODevice *vdev, int idx, @@ -1485,7 +1510,7 @@ static void virtio_net_guest_notifier_mask(VirtIODevice *vdev, int idx, VirtIONet *n = VIRTIO_NET(vdev); NetClientState *nc = qemu_get_subqueue(n->nic, vq2q(idx)); assert(n->vhost_started); - vhost_net_virtqueue_mask(tap_get_vhost_net(nc->peer), + vhost_net_virtqueue_mask(get_vhost_net(nc->peer), vdev, idx, mask); } @@ -1509,18 +1534,9 @@ void virtio_net_set_netclient_name(VirtIONet *n, const char *name, */ assert(type != NULL); - if (n->netclient_name) { - g_free(n->netclient_name); - n->netclient_name = NULL; - } - if (n->netclient_type) { - g_free(n->netclient_type); - n->netclient_type = NULL; - } - - if (name != NULL) { - n->netclient_name = g_strdup(name); - } + g_free(n->netclient_name); + g_free(n->netclient_type); + n->netclient_name = g_strdup(name); n->netclient_type = g_strdup(type); } @@ -1562,6 +1578,8 @@ static void virtio_net_device_realize(DeviceState *dev, Error **errp) qemu_macaddr_default_if_unset(&n->nic_conf.macaddr); memcpy(&n->mac[0], &n->nic_conf.macaddr, sizeof(n->mac)); n->status = VIRTIO_NET_S_LINK_UP; + n->announce_timer = timer_new_ms(QEMU_CLOCK_VIRTUAL, + virtio_net_announce_timer, n); if (n->netclient_type) { /* @@ -1616,14 +1634,10 @@ static void virtio_net_device_unrealize(DeviceState *dev, Error **errp) unregister_savevm(dev, "virtio-net", n); - if (n->netclient_name) { - g_free(n->netclient_name); - n->netclient_name = NULL; - } - if (n->netclient_type) { - g_free(n->netclient_type); - n->netclient_type = NULL; - } + g_free(n->netclient_name); + n->netclient_name = NULL; + g_free(n->netclient_type); + n->netclient_type = NULL; g_free(n->mac_table.macs); g_free(n->vlans); @@ -1642,6 +1656,8 @@ static void virtio_net_device_unrealize(DeviceState *dev, Error **errp) } } + timer_del(n->announce_timer); + timer_free(n->announce_timer); g_free(n->vqs); qemu_del_nic(n->nic); virtio_cleanup(vdev); diff --git a/hw/ppc/spapr.c b/hw/ppc/spapr.c index e06321cf15..82f183f173 100644 --- a/hw/ppc/spapr.c +++ b/hw/ppc/spapr.c @@ -673,8 +673,8 @@ static int spapr_populate_memory(sPAPREnvironment *spapr, void *fdt) int i, off; /* memory node(s) */ - if (nb_numa_nodes > 1 && node_mem[0] < ram_size) { - node0_size = node_mem[0]; + if (nb_numa_nodes > 1 && numa_info[0].node_mem < ram_size) { + node0_size = numa_info[0].node_mem; } else { node0_size = ram_size; } @@ -712,7 +712,7 @@ static int spapr_populate_memory(sPAPREnvironment *spapr, void *fdt) if (mem_start >= ram_size) { node_size = 0; } else { - node_size = node_mem[i]; + node_size = numa_info[i].node_mem; if (node_size > ram_size - mem_start) { node_size = ram_size - mem_start; } @@ -857,7 +857,8 @@ static void spapr_reset_htab(sPAPREnvironment *spapr) /* Update the RMA size if necessary */ if (spapr->vrma_adjust) { - hwaddr node0_size = (nb_numa_nodes > 1) ? node_mem[0] : ram_size; + hwaddr node0_size = (nb_numa_nodes > 1) ? + numa_info[0].node_mem : ram_size; spapr->rma_size = kvmppc_rma_size(node0_size, spapr->htab_shift); } } @@ -1289,7 +1290,7 @@ static void ppc_spapr_init(MachineState *machine) MemoryRegion *sysmem = get_system_memory(); MemoryRegion *ram = g_new(MemoryRegion, 1); hwaddr rma_alloc_size; - hwaddr node0_size = (nb_numa_nodes > 1) ? node_mem[0] : ram_size; + hwaddr node0_size = (nb_numa_nodes > 1) ? numa_info[0].node_mem : ram_size; uint32_t initrd_base = 0; long kernel_size = 0, initrd_size = 0; long load_limit, rtas_limit, fw_size; diff --git a/hw/scsi/megasas.c b/hw/scsi/megasas.c index b05c47abdc..c68a873f18 100644 --- a/hw/scsi/megasas.c +++ b/hw/scsi/megasas.c @@ -294,6 +294,7 @@ static void megasas_unmap_sgl(MegasasCmd *cmd) static int megasas_build_sense(MegasasCmd *cmd, uint8_t *sense_ptr, uint8_t sense_len) { + PCIDevice *pcid = PCI_DEVICE(cmd->state); uint32_t pa_hi = 0, pa_lo; hwaddr pa; @@ -306,7 +307,7 @@ static int megasas_build_sense(MegasasCmd *cmd, uint8_t *sense_ptr, pa_hi = le32_to_cpu(cmd->frame->pass.sense_addr_hi); } pa = ((uint64_t) pa_hi << 32) | pa_lo; - cpu_physical_memory_write(pa, sense_ptr, sense_len); + pci_dma_write(pcid, pa, sense_ptr, sense_len); cmd->frame->header.sense_len = sense_len; } return sense_len; @@ -472,6 +473,7 @@ static MegasasCmd *megasas_next_frame(MegasasState *s, static MegasasCmd *megasas_enqueue_frame(MegasasState *s, hwaddr frame, uint64_t context, int count) { + PCIDevice *pcid = PCI_DEVICE(s); MegasasCmd *cmd = NULL; int frame_size = MFI_FRAME_SIZE * 16; hwaddr frame_size_p = frame_size; @@ -484,11 +486,11 @@ static MegasasCmd *megasas_enqueue_frame(MegasasState *s, if (!cmd->pa) { cmd->pa = frame; /* Map all possible frames */ - cmd->frame = cpu_physical_memory_map(frame, &frame_size_p, 0); + cmd->frame = pci_dma_map(pcid, frame, &frame_size_p, 0); if (frame_size_p != frame_size) { trace_megasas_qf_map_failed(cmd->index, (unsigned long)frame); if (cmd->frame) { - cpu_physical_memory_unmap(cmd->frame, frame_size_p, 0, 0); + pci_dma_unmap(pcid, cmd->frame, frame_size_p, 0, 0); cmd->frame = NULL; cmd->pa = 0; } @@ -561,13 +563,14 @@ static void megasas_complete_frame(MegasasState *s, uint64_t context) static void megasas_reset_frames(MegasasState *s) { + PCIDevice *pcid = PCI_DEVICE(s); int i; MegasasCmd *cmd; for (i = 0; i < s->fw_cmds; i++) { cmd = &s->frames[i]; if (cmd->pa) { - cpu_physical_memory_unmap(cmd->frame, cmd->pa_size, 0, 0); + pci_dma_unmap(pcid, cmd->frame, cmd->pa_size, 0, 0); cmd->frame = NULL; cmd->pa = 0; } @@ -584,6 +587,7 @@ static void megasas_abort_command(MegasasCmd *cmd) static int megasas_init_firmware(MegasasState *s, MegasasCmd *cmd) { + PCIDevice *pcid = PCI_DEVICE(s); uint32_t pa_hi, pa_lo; hwaddr iq_pa, initq_size; struct mfi_init_qinfo *initq; @@ -595,7 +599,7 @@ static int megasas_init_firmware(MegasasState *s, MegasasCmd *cmd) iq_pa = (((uint64_t) pa_hi << 32) | pa_lo); trace_megasas_init_firmware((uint64_t)iq_pa); initq_size = sizeof(*initq); - initq = cpu_physical_memory_map(iq_pa, &initq_size, 0); + initq = pci_dma_map(pcid, iq_pa, &initq_size, 0); if (!initq || initq_size != sizeof(*initq)) { trace_megasas_initq_map_failed(cmd->index); s->event_count++; @@ -631,7 +635,7 @@ static int megasas_init_firmware(MegasasState *s, MegasasCmd *cmd) s->fw_state = MFI_FWSTATE_OPERATIONAL; out: if (initq) { - cpu_physical_memory_unmap(initq, initq_size, 0, 0); + pci_dma_unmap(pcid, initq, initq_size, 0, 0); } return ret; } diff --git a/hw/scsi/scsi-bus.c b/hw/scsi/scsi-bus.c index 003d2843df..ea1ac09c8a 100644 --- a/hw/scsi/scsi-bus.c +++ b/hw/scsi/scsi-bus.c @@ -1429,7 +1429,7 @@ int scsi_build_sense(uint8_t *in_buf, int in_len, } } -static const char *scsi_command_name(uint8_t cmd) +const char *scsi_command_name(uint8_t cmd) { static const char *names[] = { [ TEST_UNIT_READY ] = "TEST_UNIT_READY", @@ -1545,6 +1545,8 @@ static const char *scsi_command_name(uint8_t cmd) [ SET_READ_AHEAD ] = "SET_READ_AHEAD", [ ALLOW_OVERWRITE ] = "ALLOW_OVERWRITE", [ MECHANISM_STATUS ] = "MECHANISM_STATUS", + [ GET_EVENT_STATUS_NOTIFICATION ] = "GET_EVENT_STATUS_NOTIFICATION", + [ READ_DISC_INFORMATION ] = "READ_DISC_INFORMATION", }; if (cmd >= ARRAY_SIZE(names) || names[cmd] == NULL) diff --git a/hw/scsi/scsi-disk.c b/hw/scsi/scsi-disk.c index fc6e32ada5..a529ad24c7 100644 --- a/hw/scsi/scsi-disk.c +++ b/hw/scsi/scsi-disk.c @@ -2015,7 +2015,7 @@ static int32_t scsi_disk_emulate_command(SCSIRequest *req, uint8_t *buf) case VERIFY_10: case VERIFY_12: case VERIFY_16: - DPRINTF("Verify (bytchk %lu)\n", (r->req.buf[1] >> 1) & 3); + DPRINTF("Verify (bytchk %d)\n", (req->cmd.buf[1] >> 1) & 3); if (req->cmd.buf[1] & 6) { goto illegal_request; } @@ -2027,7 +2027,8 @@ static int32_t scsi_disk_emulate_command(SCSIRequest *req, uint8_t *buf) (long)r->req.cmd.xfer); break; default: - DPRINTF("Unknown SCSI command (%2.2x)\n", buf[0]); + DPRINTF("Unknown SCSI command (%2.2x=%s)\n", buf[0], + scsi_command_name(buf[0])); scsi_check_condition(r, SENSE_CODE(INVALID_OPCODE)); return 0; } @@ -2526,7 +2527,7 @@ static SCSIRequest *scsi_block_new_request(SCSIDevice *d, uint32_t tag, * ones (such as WRITE SAME or EXTENDED COPY, etc.). So, without * O_DIRECT everything must go through SG_IO. */ - if (bdrv_get_flags(s->qdev.conf.bs) & BDRV_O_NOCACHE) { + if (!(bdrv_get_flags(s->qdev.conf.bs) & BDRV_O_NOCACHE)) { break; } diff --git a/hw/scsi/spapr_vscsi.c b/hw/scsi/spapr_vscsi.c index f96b7af791..048cfc7b05 100644 --- a/hw/scsi/spapr_vscsi.c +++ b/hw/scsi/spapr_vscsi.c @@ -799,8 +799,9 @@ static int vscsi_queue_cmd(VSCSIState *s, vscsi_req *req) req->sreq = scsi_req_new(sdev, req->qtag, lun, srp->cmd.cdb, req); n = scsi_req_enqueue(req->sreq); - DPRINTF("VSCSI: Queued command tag 0x%x CMD 0x%x LUN %d ret: %d\n", - req->qtag, srp->cmd.cdb[0], lun, n); + DPRINTF("VSCSI: Queued command tag 0x%x CMD 0x%x=%s LUN %d ret: %d\n", + req->qtag, srp->cmd.cdb[0], scsi_command_name(srp->cmd.cdb[0]), + lun, n); if (n) { /* Transfer direction must be set before preprocessing the diff --git a/hw/scsi/vhost-scsi.c b/hw/scsi/vhost-scsi.c index 668bafa72a..ddfe76aed0 100644 --- a/hw/scsi/vhost-scsi.c +++ b/hw/scsi/vhost-scsi.c @@ -24,15 +24,25 @@ #include "hw/virtio/virtio-scsi.h" #include "hw/virtio/virtio-bus.h" +/* Features supported by host kernel. */ +static const int kernel_feature_bits[] = { + VIRTIO_F_NOTIFY_ON_EMPTY, + VIRTIO_RING_F_INDIRECT_DESC, + VIRTIO_RING_F_EVENT_IDX, + VIRTIO_SCSI_F_HOTPLUG, + VHOST_INVALID_FEATURE_BIT +}; + static int vhost_scsi_set_endpoint(VHostSCSI *s) { VirtIOSCSICommon *vs = VIRTIO_SCSI_COMMON(s); + const VhostOps *vhost_ops = s->dev.vhost_ops; struct vhost_scsi_target backend; int ret; memset(&backend, 0, sizeof(backend)); pstrcpy(backend.vhost_wwpn, sizeof(backend.vhost_wwpn), vs->conf.wwpn); - ret = ioctl(s->dev.control, VHOST_SCSI_SET_ENDPOINT, &backend); + ret = vhost_ops->vhost_call(&s->dev, VHOST_SCSI_SET_ENDPOINT, &backend); if (ret < 0) { return -errno; } @@ -43,10 +53,11 @@ static void vhost_scsi_clear_endpoint(VHostSCSI *s) { VirtIOSCSICommon *vs = VIRTIO_SCSI_COMMON(s); struct vhost_scsi_target backend; + const VhostOps *vhost_ops = s->dev.vhost_ops; memset(&backend, 0, sizeof(backend)); pstrcpy(backend.vhost_wwpn, sizeof(backend.vhost_wwpn), vs->conf.wwpn); - ioctl(s->dev.control, VHOST_SCSI_CLEAR_ENDPOINT, &backend); + vhost_ops->vhost_call(&s->dev, VHOST_SCSI_CLEAR_ENDPOINT, &backend); } static int vhost_scsi_start(VHostSCSI *s) @@ -55,13 +66,15 @@ static int vhost_scsi_start(VHostSCSI *s) VirtIODevice *vdev = VIRTIO_DEVICE(s); BusState *qbus = BUS(qdev_get_parent_bus(DEVICE(vdev))); VirtioBusClass *k = VIRTIO_BUS_GET_CLASS(qbus); + const VhostOps *vhost_ops = s->dev.vhost_ops; if (!k->set_guest_notifiers) { error_report("binding does not support guest notifiers"); return -ENOSYS; } - ret = ioctl(s->dev.control, VHOST_SCSI_GET_ABI_VERSION, &abi_version); + ret = vhost_ops->vhost_call(&s->dev, + VHOST_SCSI_GET_ABI_VERSION, &abi_version); if (ret < 0) { return -errno; } @@ -141,21 +154,7 @@ static uint32_t vhost_scsi_get_features(VirtIODevice *vdev, { VHostSCSI *s = VHOST_SCSI(vdev); - /* Clear features not supported by host kernel. */ - if (!(s->dev.features & (1 << VIRTIO_F_NOTIFY_ON_EMPTY))) { - features &= ~(1 << VIRTIO_F_NOTIFY_ON_EMPTY); - } - if (!(s->dev.features & (1 << VIRTIO_RING_F_INDIRECT_DESC))) { - features &= ~(1 << VIRTIO_RING_F_INDIRECT_DESC); - } - if (!(s->dev.features & (1 << VIRTIO_RING_F_EVENT_IDX))) { - features &= ~(1 << VIRTIO_RING_F_EVENT_IDX); - } - if (!(s->dev.features & (1 << VIRTIO_SCSI_F_HOTPLUG))) { - features &= ~(1 << VIRTIO_SCSI_F_HOTPLUG); - } - - return features; + return vhost_get_features(&s->dev, kernel_feature_bits, features); } static void vhost_scsi_set_config(VirtIODevice *vdev, @@ -196,6 +195,10 @@ static void vhost_scsi_set_status(VirtIODevice *vdev, uint8_t val) } } +static void vhost_dummy_handle_output(VirtIODevice *vdev, VirtQueue *vq) +{ +} + static void vhost_scsi_realize(DeviceState *dev, Error **errp) { VirtIOSCSICommon *vs = VIRTIO_SCSI_COMMON(dev); @@ -215,9 +218,18 @@ static void vhost_scsi_realize(DeviceState *dev, Error **errp) error_setg(errp, "vhost-scsi: unable to parse vhostfd"); return; } + } else { + vhostfd = open("/dev/vhost-scsi", O_RDWR); + if (vhostfd < 0) { + error_setg(errp, "vhost-scsi: open vhost char device failed: %s", + strerror(errno)); + return; + } } - virtio_scsi_common_realize(dev, &err); + virtio_scsi_common_realize(dev, &err, vhost_dummy_handle_output, + vhost_dummy_handle_output, + vhost_dummy_handle_output); if (err != NULL) { error_propagate(errp, err); return; @@ -227,7 +239,8 @@ static void vhost_scsi_realize(DeviceState *dev, Error **errp) s->dev.vqs = g_new(struct vhost_virtqueue, s->dev.nvqs); s->dev.vq_index = 0; - ret = vhost_dev_init(&s->dev, vhostfd, "/dev/vhost-scsi", true); + ret = vhost_dev_init(&s->dev, (void *)(uintptr_t)vhostfd, + VHOST_BACKEND_TYPE_KERNEL, true); if (ret < 0) { error_setg(errp, "vhost-scsi: vhost initialization failed: %s", strerror(-ret)); diff --git a/hw/scsi/virtio-scsi.c b/hw/scsi/virtio-scsi.c index b39880a9cd..8c8c9d1f61 100644 --- a/hw/scsi/virtio-scsi.c +++ b/hw/scsi/virtio-scsi.c @@ -15,6 +15,7 @@ #include "hw/virtio/virtio-scsi.h" #include "qemu/error-report.h" +#include "qemu/iov.h" #include <hw/scsi/scsi.h> #include <block/scsi.h> #include <hw/virtio/virtio-bus.h> @@ -25,21 +26,28 @@ typedef struct VirtIOSCSIReq { VirtQueueElement elem; QEMUSGList qsgl; SCSIRequest *sreq; + size_t resp_size; + enum SCSIXferMode mode; + QEMUIOVector resp_iov; union { - char *buf; - VirtIOSCSICmdReq *cmd; - VirtIOSCSICtrlTMFReq *tmf; - VirtIOSCSICtrlANReq *an; - } req; - union { - char *buf; - VirtIOSCSICmdResp *cmd; - VirtIOSCSICtrlTMFResp *tmf; - VirtIOSCSICtrlANResp *an; - VirtIOSCSIEvent *event; + VirtIOSCSICmdResp cmd; + VirtIOSCSICtrlTMFResp tmf; + VirtIOSCSICtrlANResp an; + VirtIOSCSIEvent event; } resp; + union { + struct { + VirtIOSCSICmdReq cmd; + uint8_t cdb[]; + } QEMU_PACKED; + VirtIOSCSICtrlTMFReq tmf; + VirtIOSCSICtrlANReq an; + } req; } VirtIOSCSIReq; +QEMU_BUILD_BUG_ON(offsetof(VirtIOSCSIReq, req.cdb) != + offsetof(VirtIOSCSIReq, req.cmd) + sizeof(VirtIOSCSICmdReq)); + static inline int virtio_scsi_get_lun(uint8_t *lun) { return ((lun[2] << 8) | lun[3]) & 0x3FFF; @@ -56,18 +64,41 @@ static inline SCSIDevice *virtio_scsi_device_find(VirtIOSCSI *s, uint8_t *lun) return scsi_device_find(&s->bus, 0, lun[1], virtio_scsi_get_lun(lun)); } +static VirtIOSCSIReq *virtio_scsi_init_req(VirtIOSCSI *s, VirtQueue *vq) +{ + VirtIOSCSIReq *req; + VirtIOSCSICommon *vs = VIRTIO_SCSI_COMMON(s); + + req = g_malloc0(sizeof(*req) + vs->cdb_size); + + req->vq = vq; + req->dev = s; + req->sreq = NULL; + qemu_sglist_init(&req->qsgl, DEVICE(s), 8, &address_space_memory); + qemu_iovec_init(&req->resp_iov, 1); + return req; +} + +static void virtio_scsi_free_req(VirtIOSCSIReq *req) +{ + qemu_iovec_destroy(&req->resp_iov); + qemu_sglist_destroy(&req->qsgl); + g_free(req); +} + static void virtio_scsi_complete_req(VirtIOSCSIReq *req) { VirtIOSCSI *s = req->dev; VirtQueue *vq = req->vq; VirtIODevice *vdev = VIRTIO_DEVICE(s); - virtqueue_push(vq, &req->elem, req->qsgl.size + req->elem.in_sg[0].iov_len); - qemu_sglist_destroy(&req->qsgl); + + qemu_iovec_from_buf(&req->resp_iov, 0, &req->resp, req->resp_size); + virtqueue_push(vq, &req->elem, req->qsgl.size + req->resp_iov.size); if (req->sreq) { req->sreq->hba_private = NULL; scsi_req_unref(req->sreq); } - g_free(req); + virtio_scsi_free_req(req); virtio_notify(vdev, vq); } @@ -77,50 +108,73 @@ static void virtio_scsi_bad_req(void) exit(1); } -static void qemu_sgl_init_external(VirtIOSCSIReq *req, struct iovec *sg, - hwaddr *addr, int num) +static size_t qemu_sgl_concat(VirtIOSCSIReq *req, struct iovec *iov, + hwaddr *addr, int num, size_t skip) { QEMUSGList *qsgl = &req->qsgl; - - qemu_sglist_init(qsgl, DEVICE(req->dev), num, &address_space_memory); - while (num--) { - qemu_sglist_add(qsgl, *(addr++), (sg++)->iov_len); + size_t copied = 0; + + while (num) { + if (skip >= iov->iov_len) { + skip -= iov->iov_len; + } else { + qemu_sglist_add(qsgl, *addr + skip, iov->iov_len - skip); + copied += iov->iov_len - skip; + skip = 0; + } + iov++; + addr++; + num--; } + + assert(skip == 0); + return copied; } -static void virtio_scsi_parse_req(VirtIOSCSI *s, VirtQueue *vq, - VirtIOSCSIReq *req) +static int virtio_scsi_parse_req(VirtIOSCSIReq *req, + unsigned req_size, unsigned resp_size) { - assert(req->elem.in_num); - req->vq = vq; - req->dev = s; - req->sreq = NULL; - if (req->elem.out_num) { - req->req.buf = req->elem.out_sg[0].iov_base; + size_t in_size, out_size; + + if (iov_to_buf(req->elem.out_sg, req->elem.out_num, 0, + &req->req, req_size) < req_size) { + return -EINVAL; } - req->resp.buf = req->elem.in_sg[0].iov_base; - if (req->elem.out_num > 1) { - qemu_sgl_init_external(req, &req->elem.out_sg[1], - &req->elem.out_addr[1], - req->elem.out_num - 1); - } else { - qemu_sgl_init_external(req, &req->elem.in_sg[1], - &req->elem.in_addr[1], - req->elem.in_num - 1); + if (qemu_iovec_concat_iov(&req->resp_iov, + req->elem.in_sg, req->elem.in_num, 0, + resp_size) < resp_size) { + return -EINVAL; + } + req->resp_size = resp_size; + + out_size = qemu_sgl_concat(req, req->elem.out_sg, + &req->elem.out_addr[0], req->elem.out_num, + req_size); + in_size = qemu_sgl_concat(req, req->elem.in_sg, + &req->elem.in_addr[0], req->elem.in_num, + resp_size); + + if (out_size && in_size) { + return -ENOTSUP; + } + + if (out_size) { + req->mode = SCSI_XFER_TO_DEV; + } else if (in_size) { + req->mode = SCSI_XFER_FROM_DEV; } + + return 0; } static VirtIOSCSIReq *virtio_scsi_pop_req(VirtIOSCSI *s, VirtQueue *vq) { - VirtIOSCSIReq *req; - req = g_malloc(sizeof(*req)); + VirtIOSCSIReq *req = virtio_scsi_init_req(s, vq); if (!virtqueue_pop(vq, &req->elem)) { - g_free(req); + virtio_scsi_free_req(req); return NULL; } - - virtio_scsi_parse_req(s, vq, req); return req; } @@ -143,9 +197,9 @@ static void *virtio_scsi_load_request(QEMUFile *f, SCSIRequest *sreq) VirtIOSCSIReq *req; uint32_t n; - req = g_malloc(sizeof(*req)); qemu_get_be32s(f, &n); assert(n < vs->conf.num_queues); + req = virtio_scsi_init_req(s, vs->cmd_vqs[n]); qemu_get_buffer(f, (unsigned char *)&req->elem, sizeof(req->elem)); /* TODO: add a way for SCSIBusInfo's load_request to fail, * and fail migration instead of asserting here. @@ -156,41 +210,44 @@ static void *virtio_scsi_load_request(QEMUFile *f, SCSIRequest *sreq) #endif assert(req->elem.in_num <= ARRAY_SIZE(req->elem.in_sg)); assert(req->elem.out_num <= ARRAY_SIZE(req->elem.out_sg)); - virtio_scsi_parse_req(s, vs->cmd_vqs[n], req); + + if (virtio_scsi_parse_req(req, sizeof(VirtIOSCSICmdReq) + vs->cdb_size, + sizeof(VirtIOSCSICmdResp) + vs->sense_size) < 0) { + error_report("invalid SCSI request migration data"); + exit(1); + } scsi_req_ref(sreq); req->sreq = sreq; if (req->sreq->cmd.mode != SCSI_XFER_NONE) { - int req_mode = - (req->elem.in_num > 1 ? SCSI_XFER_FROM_DEV : SCSI_XFER_TO_DEV); - - assert(req->sreq->cmd.mode == req_mode); + assert(req->sreq->cmd.mode == req->mode); } return req; } static void virtio_scsi_do_tmf(VirtIOSCSI *s, VirtIOSCSIReq *req) { - SCSIDevice *d = virtio_scsi_device_find(s, req->req.tmf->lun); + SCSIDevice *d = virtio_scsi_device_find(s, req->req.tmf.lun); SCSIRequest *r, *next; BusChild *kid; int target; /* Here VIRTIO_SCSI_S_OK means "FUNCTION COMPLETE". */ - req->resp.tmf->response = VIRTIO_SCSI_S_OK; + req->resp.tmf.response = VIRTIO_SCSI_S_OK; - switch (req->req.tmf->subtype) { + tswap32s(&req->req.tmf.subtype); + switch (req->req.tmf.subtype) { case VIRTIO_SCSI_T_TMF_ABORT_TASK: case VIRTIO_SCSI_T_TMF_QUERY_TASK: if (!d) { goto fail; } - if (d->lun != virtio_scsi_get_lun(req->req.tmf->lun)) { + if (d->lun != virtio_scsi_get_lun(req->req.tmf.lun)) { goto incorrect_lun; } QTAILQ_FOREACH_SAFE(r, &d->requests, next, next) { VirtIOSCSIReq *cmd_req = r->hba_private; - if (cmd_req && cmd_req->req.cmd->tag == req->req.tmf->tag) { + if (cmd_req && cmd_req->req.cmd.tag == req->req.tmf.tag) { break; } } @@ -200,11 +257,11 @@ static void virtio_scsi_do_tmf(VirtIOSCSI *s, VirtIOSCSIReq *req) * check for it in the loop above. */ assert(r->hba_private); - if (req->req.tmf->subtype == VIRTIO_SCSI_T_TMF_QUERY_TASK) { + if (req->req.tmf.subtype == VIRTIO_SCSI_T_TMF_QUERY_TASK) { /* "If the specified command is present in the task set, then * return a service response set to FUNCTION SUCCEEDED". */ - req->resp.tmf->response = VIRTIO_SCSI_S_FUNCTION_SUCCEEDED; + req->resp.tmf.response = VIRTIO_SCSI_S_FUNCTION_SUCCEEDED; } else { scsi_req_cancel(r); } @@ -215,7 +272,7 @@ static void virtio_scsi_do_tmf(VirtIOSCSI *s, VirtIOSCSIReq *req) if (!d) { goto fail; } - if (d->lun != virtio_scsi_get_lun(req->req.tmf->lun)) { + if (d->lun != virtio_scsi_get_lun(req->req.tmf.lun)) { goto incorrect_lun; } s->resetting++; @@ -229,16 +286,16 @@ static void virtio_scsi_do_tmf(VirtIOSCSI *s, VirtIOSCSIReq *req) if (!d) { goto fail; } - if (d->lun != virtio_scsi_get_lun(req->req.tmf->lun)) { + if (d->lun != virtio_scsi_get_lun(req->req.tmf.lun)) { goto incorrect_lun; } QTAILQ_FOREACH_SAFE(r, &d->requests, next, next) { if (r->hba_private) { - if (req->req.tmf->subtype == VIRTIO_SCSI_T_TMF_QUERY_TASK_SET) { + if (req->req.tmf.subtype == VIRTIO_SCSI_T_TMF_QUERY_TASK_SET) { /* "If there is any command present in the task set, then * return a service response set to FUNCTION SUCCEEDED". */ - req->resp.tmf->response = VIRTIO_SCSI_S_FUNCTION_SUCCEEDED; + req->resp.tmf.response = VIRTIO_SCSI_S_FUNCTION_SUCCEEDED; break; } else { scsi_req_cancel(r); @@ -248,7 +305,7 @@ static void virtio_scsi_do_tmf(VirtIOSCSI *s, VirtIOSCSIReq *req) break; case VIRTIO_SCSI_T_TMF_I_T_NEXUS_RESET: - target = req->req.tmf->lun[1]; + target = req->req.tmf.lun[1]; s->resetting++; QTAILQ_FOREACH(kid, &s->bus.qbus.children, sibling) { d = DO_UPCAST(SCSIDevice, qdev, kid->child); @@ -261,18 +318,18 @@ static void virtio_scsi_do_tmf(VirtIOSCSI *s, VirtIOSCSIReq *req) case VIRTIO_SCSI_T_TMF_CLEAR_ACA: default: - req->resp.tmf->response = VIRTIO_SCSI_S_FUNCTION_REJECTED; + req->resp.tmf.response = VIRTIO_SCSI_S_FUNCTION_REJECTED; break; } return; incorrect_lun: - req->resp.tmf->response = VIRTIO_SCSI_S_INCORRECT_LUN; + req->resp.tmf.response = VIRTIO_SCSI_S_INCORRECT_LUN; return; fail: - req->resp.tmf->response = VIRTIO_SCSI_S_BAD_TARGET; + req->resp.tmf.response = VIRTIO_SCSI_S_BAD_TARGET; } static void virtio_scsi_handle_ctrl(VirtIODevice *vdev, VirtQueue *vq) @@ -281,57 +338,70 @@ static void virtio_scsi_handle_ctrl(VirtIODevice *vdev, VirtQueue *vq) VirtIOSCSIReq *req; while ((req = virtio_scsi_pop_req(s, vq))) { - int out_size, in_size; - if (req->elem.out_num < 1 || req->elem.in_num < 1) { + int type; + + if (iov_to_buf(req->elem.out_sg, req->elem.out_num, 0, + &type, sizeof(type)) < sizeof(type)) { virtio_scsi_bad_req(); continue; } - out_size = req->elem.out_sg[0].iov_len; - in_size = req->elem.in_sg[0].iov_len; - if (req->req.tmf->type == VIRTIO_SCSI_T_TMF) { - if (out_size < sizeof(VirtIOSCSICtrlTMFReq) || - in_size < sizeof(VirtIOSCSICtrlTMFResp)) { + tswap32s(&req->req.tmf.type); + if (req->req.tmf.type == VIRTIO_SCSI_T_TMF) { + if (virtio_scsi_parse_req(req, sizeof(VirtIOSCSICtrlTMFReq), + sizeof(VirtIOSCSICtrlTMFResp)) < 0) { virtio_scsi_bad_req(); + } else { + virtio_scsi_do_tmf(s, req); } - virtio_scsi_do_tmf(s, req); - } else if (req->req.tmf->type == VIRTIO_SCSI_T_AN_QUERY || - req->req.tmf->type == VIRTIO_SCSI_T_AN_SUBSCRIBE) { - if (out_size < sizeof(VirtIOSCSICtrlANReq) || - in_size < sizeof(VirtIOSCSICtrlANResp)) { + } else if (req->req.tmf.type == VIRTIO_SCSI_T_AN_QUERY || + req->req.tmf.type == VIRTIO_SCSI_T_AN_SUBSCRIBE) { + if (virtio_scsi_parse_req(req, sizeof(VirtIOSCSICtrlANReq), + sizeof(VirtIOSCSICtrlANResp)) < 0) { virtio_scsi_bad_req(); + } else { + req->resp.an.event_actual = 0; + req->resp.an.response = VIRTIO_SCSI_S_OK; } - req->resp.an->event_actual = 0; - req->resp.an->response = VIRTIO_SCSI_S_OK; } virtio_scsi_complete_req(req); } } +static void virtio_scsi_complete_cmd_req(VirtIOSCSIReq *req) +{ + /* Sense data is not in req->resp and is copied separately + * in virtio_scsi_command_complete. + */ + req->resp_size = sizeof(VirtIOSCSICmdResp); + virtio_scsi_complete_req(req); +} + static void virtio_scsi_command_complete(SCSIRequest *r, uint32_t status, size_t resid) { VirtIOSCSIReq *req = r->hba_private; - VirtIOSCSI *s = req->dev; - VirtIOSCSICommon *vs = VIRTIO_SCSI_COMMON(s); + uint8_t sense[SCSI_SENSE_BUF_SIZE]; uint32_t sense_len; if (r->io_canceled) { return; } - req->resp.cmd->response = VIRTIO_SCSI_S_OK; - req->resp.cmd->status = status; - if (req->resp.cmd->status == GOOD) { - req->resp.cmd->resid = tswap32(resid); + req->resp.cmd.response = VIRTIO_SCSI_S_OK; + req->resp.cmd.status = status; + if (req->resp.cmd.status == GOOD) { + req->resp.cmd.resid = tswap32(resid); } else { - req->resp.cmd->resid = 0; - sense_len = scsi_req_get_sense(r, req->resp.cmd->sense, - vs->sense_size); - req->resp.cmd->sense_len = tswap32(sense_len); + req->resp.cmd.resid = 0; + sense_len = scsi_req_get_sense(r, sense, sizeof(sense)); + sense_len = MIN(sense_len, req->resp_iov.size - sizeof(req->resp.cmd)); + qemu_iovec_from_buf(&req->resp_iov, sizeof(req->resp.cmd), + &req->resp, sense_len); + req->resp.cmd.sense_len = tswap32(sense_len); } - virtio_scsi_complete_req(req); + virtio_scsi_complete_cmd_req(req); } static QEMUSGList *virtio_scsi_get_sg_list(SCSIRequest *r) @@ -349,17 +419,17 @@ static void virtio_scsi_request_cancelled(SCSIRequest *r) return; } if (req->dev->resetting) { - req->resp.cmd->response = VIRTIO_SCSI_S_RESET; + req->resp.cmd.response = VIRTIO_SCSI_S_RESET; } else { - req->resp.cmd->response = VIRTIO_SCSI_S_ABORTED; + req->resp.cmd.response = VIRTIO_SCSI_S_ABORTED; } - virtio_scsi_complete_req(req); + virtio_scsi_complete_cmd_req(req); } static void virtio_scsi_fail_cmd_req(VirtIOSCSIReq *req) { - req->resp.cmd->response = VIRTIO_SCSI_S_FAILURE; - virtio_scsi_complete_req(req); + req->resp.cmd.response = VIRTIO_SCSI_S_FAILURE; + virtio_scsi_complete_cmd_req(req); } static void virtio_scsi_handle_cmd(VirtIODevice *vdev, VirtQueue *vq) @@ -373,43 +443,35 @@ static void virtio_scsi_handle_cmd(VirtIODevice *vdev, VirtQueue *vq) while ((req = virtio_scsi_pop_req(s, vq))) { SCSIDevice *d; - int out_size, in_size; - if (req->elem.out_num < 1 || req->elem.in_num < 1) { - virtio_scsi_bad_req(); - } + int rc; - out_size = req->elem.out_sg[0].iov_len; - in_size = req->elem.in_sg[0].iov_len; - if (out_size < sizeof(VirtIOSCSICmdReq) + vs->cdb_size || - in_size < sizeof(VirtIOSCSICmdResp) + vs->sense_size) { - virtio_scsi_bad_req(); - } - - if (req->elem.out_num > 1 && req->elem.in_num > 1) { - virtio_scsi_fail_cmd_req(req); + rc = virtio_scsi_parse_req(req, sizeof(VirtIOSCSICmdReq) + vs->cdb_size, + sizeof(VirtIOSCSICmdResp) + vs->sense_size); + if (rc < 0) { + if (rc == -ENOTSUP) { + virtio_scsi_fail_cmd_req(req); + } else { + virtio_scsi_bad_req(); + } continue; } - d = virtio_scsi_device_find(s, req->req.cmd->lun); + d = virtio_scsi_device_find(s, req->req.cmd.lun); if (!d) { - req->resp.cmd->response = VIRTIO_SCSI_S_BAD_TARGET; - virtio_scsi_complete_req(req); + req->resp.cmd.response = VIRTIO_SCSI_S_BAD_TARGET; + virtio_scsi_complete_cmd_req(req); continue; } - req->sreq = scsi_req_new(d, req->req.cmd->tag, - virtio_scsi_get_lun(req->req.cmd->lun), - req->req.cmd->cdb, req); - - if (req->sreq->cmd.mode != SCSI_XFER_NONE) { - int req_mode = - (req->elem.in_num > 1 ? SCSI_XFER_FROM_DEV : SCSI_XFER_TO_DEV); - - if (req->sreq->cmd.mode != req_mode || - req->sreq->cmd.xfer > req->qsgl.size) { - req->resp.cmd->response = VIRTIO_SCSI_S_OVERRUN; - virtio_scsi_complete_req(req); - continue; - } + req->sreq = scsi_req_new(d, req->req.cmd.tag, + virtio_scsi_get_lun(req->req.cmd.lun), + req->req.cdb, req); + + if (req->sreq->cmd.mode != SCSI_XFER_NONE + && (req->sreq->cmd.mode != req->mode || + req->sreq->cmd.xfer > req->qsgl.size)) { + req->resp.cmd.response = VIRTIO_SCSI_S_OVERRUN; + virtio_scsi_complete_cmd_req(req); + continue; } n = scsi_req_enqueue(req->sreq); @@ -513,7 +575,7 @@ static void virtio_scsi_push_event(VirtIOSCSI *s, SCSIDevice *dev, return; } - if (req->elem.out_num || req->elem.in_num != 1) { + if (req->elem.out_num) { virtio_scsi_bad_req(); } @@ -522,12 +584,12 @@ static void virtio_scsi_push_event(VirtIOSCSI *s, SCSIDevice *dev, s->events_dropped = false; } - in_size = req->elem.in_sg[0].iov_len; + in_size = iov_size(req->elem.in_sg, req->elem.in_num); if (in_size < sizeof(VirtIOSCSIEvent)) { virtio_scsi_bad_req(); } - evt = req->resp.event; + evt = &req->resp.event; memset(evt, 0, sizeof(VirtIOSCSIEvent)); evt->event = event; evt->reason = reason; @@ -605,7 +667,9 @@ static struct SCSIBusInfo virtio_scsi_scsi_info = { .load_request = virtio_scsi_load_request, }; -void virtio_scsi_common_realize(DeviceState *dev, Error **errp) +void virtio_scsi_common_realize(DeviceState *dev, Error **errp, + HandleOutput ctrl, HandleOutput evt, + HandleOutput cmd) { VirtIODevice *vdev = VIRTIO_DEVICE(dev); VirtIOSCSICommon *s = VIRTIO_SCSI_COMMON(dev); @@ -619,12 +683,12 @@ void virtio_scsi_common_realize(DeviceState *dev, Error **errp) s->cdb_size = VIRTIO_SCSI_CDB_SIZE; s->ctrl_vq = virtio_add_queue(vdev, VIRTIO_SCSI_VQ_SIZE, - virtio_scsi_handle_ctrl); + ctrl); s->event_vq = virtio_add_queue(vdev, VIRTIO_SCSI_VQ_SIZE, - virtio_scsi_handle_event); + evt); for (i = 0; i < s->conf.num_queues; i++) { s->cmd_vqs[i] = virtio_add_queue(vdev, VIRTIO_SCSI_VQ_SIZE, - virtio_scsi_handle_cmd); + cmd); } } @@ -635,7 +699,9 @@ static void virtio_scsi_device_realize(DeviceState *dev, Error **errp) static int virtio_scsi_id; Error *err = NULL; - virtio_scsi_common_realize(dev, &err); + virtio_scsi_common_realize(dev, &err, virtio_scsi_handle_ctrl, + virtio_scsi_handle_event, + virtio_scsi_handle_cmd); if (err != NULL) { error_propagate(errp, err); return; diff --git a/hw/virtio/Makefile.objs b/hw/virtio/Makefile.objs index 1ba53d9cc3..ec9e855bc1 100644 --- a/hw/virtio/Makefile.objs +++ b/hw/virtio/Makefile.objs @@ -5,4 +5,4 @@ common-obj-y += virtio-mmio.o common-obj-$(CONFIG_VIRTIO_BLK_DATA_PLANE) += dataplane/ obj-y += virtio.o virtio-balloon.o -obj-$(CONFIG_LINUX) += vhost.o +obj-$(CONFIG_LINUX) += vhost.o vhost-backend.o vhost-user.o diff --git a/hw/virtio/vhost-backend.c b/hw/virtio/vhost-backend.c new file mode 100644 index 0000000000..35316c40d9 --- /dev/null +++ b/hw/virtio/vhost-backend.c @@ -0,0 +1,71 @@ +/* + * vhost-backend + * + * Copyright (c) 2013 Virtual Open Systems Sarl. + * + * This work is licensed under the terms of the GNU GPL, version 2 or later. + * See the COPYING file in the top-level directory. + * + */ + +#include "hw/virtio/vhost.h" +#include "hw/virtio/vhost-backend.h" +#include "qemu/error-report.h" + +#include <sys/ioctl.h> + +extern const VhostOps user_ops; + +static int vhost_kernel_call(struct vhost_dev *dev, unsigned long int request, + void *arg) +{ + int fd = (uintptr_t) dev->opaque; + + assert(dev->vhost_ops->backend_type == VHOST_BACKEND_TYPE_KERNEL); + + return ioctl(fd, request, arg); +} + +static int vhost_kernel_init(struct vhost_dev *dev, void *opaque) +{ + assert(dev->vhost_ops->backend_type == VHOST_BACKEND_TYPE_KERNEL); + + dev->opaque = opaque; + + return 0; +} + +static int vhost_kernel_cleanup(struct vhost_dev *dev) +{ + int fd = (uintptr_t) dev->opaque; + + assert(dev->vhost_ops->backend_type == VHOST_BACKEND_TYPE_KERNEL); + + return close(fd); +} + +static const VhostOps kernel_ops = { + .backend_type = VHOST_BACKEND_TYPE_KERNEL, + .vhost_call = vhost_kernel_call, + .vhost_backend_init = vhost_kernel_init, + .vhost_backend_cleanup = vhost_kernel_cleanup +}; + +int vhost_set_backend_type(struct vhost_dev *dev, VhostBackendType backend_type) +{ + int r = 0; + + switch (backend_type) { + case VHOST_BACKEND_TYPE_KERNEL: + dev->vhost_ops = &kernel_ops; + break; + case VHOST_BACKEND_TYPE_USER: + dev->vhost_ops = &user_ops; + break; + default: + error_report("Unknown vhost backend type\n"); + r = -1; + } + + return r; +} diff --git a/hw/virtio/vhost-user.c b/hw/virtio/vhost-user.c new file mode 100644 index 0000000000..0df6a936a0 --- /dev/null +++ b/hw/virtio/vhost-user.c @@ -0,0 +1,342 @@ +/* + * vhost-user + * + * Copyright (c) 2013 Virtual Open Systems Sarl. + * + * This work is licensed under the terms of the GNU GPL, version 2 or later. + * See the COPYING file in the top-level directory. + * + */ + +#include "hw/virtio/vhost.h" +#include "hw/virtio/vhost-backend.h" +#include "sysemu/char.h" +#include "sysemu/kvm.h" +#include "qemu/error-report.h" +#include "qemu/sockets.h" + +#include <fcntl.h> +#include <unistd.h> +#include <sys/ioctl.h> +#include <sys/socket.h> +#include <sys/un.h> +#include <linux/vhost.h> + +#define VHOST_MEMORY_MAX_NREGIONS 8 + +typedef enum VhostUserRequest { + VHOST_USER_NONE = 0, + VHOST_USER_GET_FEATURES = 1, + VHOST_USER_SET_FEATURES = 2, + VHOST_USER_SET_OWNER = 3, + VHOST_USER_RESET_OWNER = 4, + VHOST_USER_SET_MEM_TABLE = 5, + VHOST_USER_SET_LOG_BASE = 6, + VHOST_USER_SET_LOG_FD = 7, + VHOST_USER_SET_VRING_NUM = 8, + VHOST_USER_SET_VRING_ADDR = 9, + VHOST_USER_SET_VRING_BASE = 10, + VHOST_USER_GET_VRING_BASE = 11, + VHOST_USER_SET_VRING_KICK = 12, + VHOST_USER_SET_VRING_CALL = 13, + VHOST_USER_SET_VRING_ERR = 14, + VHOST_USER_MAX +} VhostUserRequest; + +typedef struct VhostUserMemoryRegion { + uint64_t guest_phys_addr; + uint64_t memory_size; + uint64_t userspace_addr; +} VhostUserMemoryRegion; + +typedef struct VhostUserMemory { + uint32_t nregions; + uint32_t padding; + VhostUserMemoryRegion regions[VHOST_MEMORY_MAX_NREGIONS]; +} VhostUserMemory; + +typedef struct VhostUserMsg { + VhostUserRequest request; + +#define VHOST_USER_VERSION_MASK (0x3) +#define VHOST_USER_REPLY_MASK (0x1<<2) + uint32_t flags; + uint32_t size; /* the following payload size */ + union { +#define VHOST_USER_VRING_IDX_MASK (0xff) +#define VHOST_USER_VRING_NOFD_MASK (0x1<<8) + uint64_t u64; + struct vhost_vring_state state; + struct vhost_vring_addr addr; + VhostUserMemory memory; + }; +} QEMU_PACKED VhostUserMsg; + +static VhostUserMsg m __attribute__ ((unused)); +#define VHOST_USER_HDR_SIZE (sizeof(m.request) \ + + sizeof(m.flags) \ + + sizeof(m.size)) + +#define VHOST_USER_PAYLOAD_SIZE (sizeof(m) - VHOST_USER_HDR_SIZE) + +/* The version of the protocol we support */ +#define VHOST_USER_VERSION (0x1) + +static bool ioeventfd_enabled(void) +{ + return kvm_enabled() && kvm_eventfds_enabled(); +} + +static unsigned long int ioctl_to_vhost_user_request[VHOST_USER_MAX] = { + -1, /* VHOST_USER_NONE */ + VHOST_GET_FEATURES, /* VHOST_USER_GET_FEATURES */ + VHOST_SET_FEATURES, /* VHOST_USER_SET_FEATURES */ + VHOST_SET_OWNER, /* VHOST_USER_SET_OWNER */ + VHOST_RESET_OWNER, /* VHOST_USER_RESET_OWNER */ + VHOST_SET_MEM_TABLE, /* VHOST_USER_SET_MEM_TABLE */ + VHOST_SET_LOG_BASE, /* VHOST_USER_SET_LOG_BASE */ + VHOST_SET_LOG_FD, /* VHOST_USER_SET_LOG_FD */ + VHOST_SET_VRING_NUM, /* VHOST_USER_SET_VRING_NUM */ + VHOST_SET_VRING_ADDR, /* VHOST_USER_SET_VRING_ADDR */ + VHOST_SET_VRING_BASE, /* VHOST_USER_SET_VRING_BASE */ + VHOST_GET_VRING_BASE, /* VHOST_USER_GET_VRING_BASE */ + VHOST_SET_VRING_KICK, /* VHOST_USER_SET_VRING_KICK */ + VHOST_SET_VRING_CALL, /* VHOST_USER_SET_VRING_CALL */ + VHOST_SET_VRING_ERR /* VHOST_USER_SET_VRING_ERR */ +}; + +static VhostUserRequest vhost_user_request_translate(unsigned long int request) +{ + VhostUserRequest idx; + + for (idx = 0; idx < VHOST_USER_MAX; idx++) { + if (ioctl_to_vhost_user_request[idx] == request) { + break; + } + } + + return (idx == VHOST_USER_MAX) ? VHOST_USER_NONE : idx; +} + +static int vhost_user_read(struct vhost_dev *dev, VhostUserMsg *msg) +{ + CharDriverState *chr = dev->opaque; + uint8_t *p = (uint8_t *) msg; + int r, size = VHOST_USER_HDR_SIZE; + + r = qemu_chr_fe_read_all(chr, p, size); + if (r != size) { + error_report("Failed to read msg header. Read %d instead of %d.\n", r, + size); + goto fail; + } + + /* validate received flags */ + if (msg->flags != (VHOST_USER_REPLY_MASK | VHOST_USER_VERSION)) { + error_report("Failed to read msg header." + " Flags 0x%x instead of 0x%x.\n", msg->flags, + VHOST_USER_REPLY_MASK | VHOST_USER_VERSION); + goto fail; + } + + /* validate message size is sane */ + if (msg->size > VHOST_USER_PAYLOAD_SIZE) { + error_report("Failed to read msg header." + " Size %d exceeds the maximum %zu.\n", msg->size, + VHOST_USER_PAYLOAD_SIZE); + goto fail; + } + + if (msg->size) { + p += VHOST_USER_HDR_SIZE; + size = msg->size; + r = qemu_chr_fe_read_all(chr, p, size); + if (r != size) { + error_report("Failed to read msg payload." + " Read %d instead of %d.\n", r, msg->size); + goto fail; + } + } + + return 0; + +fail: + return -1; +} + +static int vhost_user_write(struct vhost_dev *dev, VhostUserMsg *msg, + int *fds, int fd_num) +{ + CharDriverState *chr = dev->opaque; + int size = VHOST_USER_HDR_SIZE + msg->size; + + if (fd_num) { + qemu_chr_fe_set_msgfds(chr, fds, fd_num); + } + + return qemu_chr_fe_write_all(chr, (const uint8_t *) msg, size) == size ? + 0 : -1; +} + +static int vhost_user_call(struct vhost_dev *dev, unsigned long int request, + void *arg) +{ + VhostUserMsg msg; + VhostUserRequest msg_request; + RAMBlock *block = 0; + struct vhost_vring_file *file = 0; + int need_reply = 0; + int fds[VHOST_MEMORY_MAX_NREGIONS]; + size_t fd_num = 0; + + assert(dev->vhost_ops->backend_type == VHOST_BACKEND_TYPE_USER); + + msg_request = vhost_user_request_translate(request); + msg.request = msg_request; + msg.flags = VHOST_USER_VERSION; + msg.size = 0; + + switch (request) { + case VHOST_GET_FEATURES: + need_reply = 1; + break; + + case VHOST_SET_FEATURES: + case VHOST_SET_LOG_BASE: + msg.u64 = *((__u64 *) arg); + msg.size = sizeof(m.u64); + break; + + case VHOST_SET_OWNER: + case VHOST_RESET_OWNER: + break; + + case VHOST_SET_MEM_TABLE: + QTAILQ_FOREACH(block, &ram_list.blocks, next) + { + if (block->fd > 0) { + msg.memory.regions[fd_num].userspace_addr = + (uintptr_t) block->host; + msg.memory.regions[fd_num].memory_size = block->length; + msg.memory.regions[fd_num].guest_phys_addr = block->offset; + fds[fd_num++] = block->fd; + } + } + + msg.memory.nregions = fd_num; + + if (!fd_num) { + error_report("Failed initializing vhost-user memory map\n" + "consider using -object memory-backend-file share=on\n"); + return -1; + } + + msg.size = sizeof(m.memory.nregions); + msg.size += sizeof(m.memory.padding); + msg.size += fd_num * sizeof(VhostUserMemoryRegion); + + break; + + case VHOST_SET_LOG_FD: + fds[fd_num++] = *((int *) arg); + break; + + case VHOST_SET_VRING_NUM: + case VHOST_SET_VRING_BASE: + memcpy(&msg.state, arg, sizeof(struct vhost_vring_state)); + msg.size = sizeof(m.state); + break; + + case VHOST_GET_VRING_BASE: + memcpy(&msg.state, arg, sizeof(struct vhost_vring_state)); + msg.size = sizeof(m.state); + need_reply = 1; + break; + + case VHOST_SET_VRING_ADDR: + memcpy(&msg.addr, arg, sizeof(struct vhost_vring_addr)); + msg.size = sizeof(m.addr); + break; + + case VHOST_SET_VRING_KICK: + case VHOST_SET_VRING_CALL: + case VHOST_SET_VRING_ERR: + file = arg; + msg.u64 = file->index & VHOST_USER_VRING_IDX_MASK; + msg.size = sizeof(m.u64); + if (ioeventfd_enabled() && file->fd > 0) { + fds[fd_num++] = file->fd; + } else { + msg.u64 |= VHOST_USER_VRING_NOFD_MASK; + } + break; + default: + error_report("vhost-user trying to send unhandled ioctl\n"); + return -1; + break; + } + + if (vhost_user_write(dev, &msg, fds, fd_num) < 0) { + return 0; + } + + if (need_reply) { + if (vhost_user_read(dev, &msg) < 0) { + return 0; + } + + if (msg_request != msg.request) { + error_report("Received unexpected msg type." + " Expected %d received %d\n", msg_request, msg.request); + return -1; + } + + switch (msg_request) { + case VHOST_USER_GET_FEATURES: + if (msg.size != sizeof(m.u64)) { + error_report("Received bad msg size.\n"); + return -1; + } + *((__u64 *) arg) = msg.u64; + break; + case VHOST_USER_GET_VRING_BASE: + if (msg.size != sizeof(m.state)) { + error_report("Received bad msg size.\n"); + return -1; + } + memcpy(arg, &msg.state, sizeof(struct vhost_vring_state)); + break; + default: + error_report("Received unexpected msg type.\n"); + return -1; + break; + } + } + + return 0; +} + +static int vhost_user_init(struct vhost_dev *dev, void *opaque) +{ + assert(dev->vhost_ops->backend_type == VHOST_BACKEND_TYPE_USER); + + dev->opaque = opaque; + + return 0; +} + +static int vhost_user_cleanup(struct vhost_dev *dev) +{ + assert(dev->vhost_ops->backend_type == VHOST_BACKEND_TYPE_USER); + + dev->opaque = 0; + + return 0; +} + +const VhostOps user_ops = { + .backend_type = VHOST_BACKEND_TYPE_USER, + .vhost_call = vhost_user_call, + .vhost_backend_init = vhost_user_init, + .vhost_backend_cleanup = vhost_user_cleanup + }; diff --git a/hw/virtio/vhost.c b/hw/virtio/vhost.c index f62cfaf38e..c1b1aad6cf 100644 --- a/hw/virtio/vhost.c +++ b/hw/virtio/vhost.c @@ -13,7 +13,6 @@ * GNU GPL, version 2 or (at your option) any later version. */ -#include <sys/ioctl.h> #include "hw/virtio/vhost.h" #include "hw/hw.h" #include "qemu/atomic.h" @@ -289,15 +288,13 @@ static inline void vhost_dev_log_resize(struct vhost_dev* dev, uint64_t size) log = g_malloc0(size * sizeof *log); log_base = (uint64_t)(unsigned long)log; - r = ioctl(dev->control, VHOST_SET_LOG_BASE, &log_base); + r = dev->vhost_ops->vhost_call(dev, VHOST_SET_LOG_BASE, &log_base); assert(r >= 0); /* Sync only the range covered by the old log */ if (dev->log_size) { vhost_log_sync_range(dev, 0, dev->log_size * VHOST_LOG_CHUNK - 1); } - if (dev->log) { - g_free(dev->log); - } + g_free(dev->log); dev->log = log; dev->log_size = size; } @@ -458,7 +455,7 @@ static void vhost_commit(MemoryListener *listener) } if (!dev->log_enabled) { - r = ioctl(dev->control, VHOST_SET_MEM_TABLE, dev->mem); + r = dev->vhost_ops->vhost_call(dev, VHOST_SET_MEM_TABLE, dev->mem); assert(r >= 0); dev->memory_changed = false; return; @@ -471,7 +468,7 @@ static void vhost_commit(MemoryListener *listener) if (dev->log_size < log_size) { vhost_dev_log_resize(dev, log_size + VHOST_LOG_BUFFER); } - r = ioctl(dev->control, VHOST_SET_MEM_TABLE, dev->mem); + r = dev->vhost_ops->vhost_call(dev, VHOST_SET_MEM_TABLE, dev->mem); assert(r >= 0); /* To log less, can only decrease log size after table update. */ if (dev->log_size > log_size + VHOST_LOG_BUFFER) { @@ -539,7 +536,7 @@ static int vhost_virtqueue_set_addr(struct vhost_dev *dev, .log_guest_addr = vq->used_phys, .flags = enable_log ? (1 << VHOST_VRING_F_LOG) : 0, }; - int r = ioctl(dev->control, VHOST_SET_VRING_ADDR, &addr); + int r = dev->vhost_ops->vhost_call(dev, VHOST_SET_VRING_ADDR, &addr); if (r < 0) { return -errno; } @@ -553,7 +550,7 @@ static int vhost_dev_set_features(struct vhost_dev *dev, bool enable_log) if (enable_log) { features |= 0x1 << VHOST_F_LOG_ALL; } - r = ioctl(dev->control, VHOST_SET_FEATURES, &features); + r = dev->vhost_ops->vhost_call(dev, VHOST_SET_FEATURES, &features); return r < 0 ? -errno : 0; } @@ -601,9 +598,7 @@ static int vhost_migration_log(MemoryListener *listener, int enable) if (r < 0) { return r; } - if (dev->log) { - g_free(dev->log); - } + g_free(dev->log); dev->log = NULL; dev->log_size = 0; } else { @@ -668,13 +663,13 @@ static int vhost_virtqueue_start(struct vhost_dev *dev, assert(idx >= dev->vq_index && idx < dev->vq_index + dev->nvqs); vq->num = state.num = virtio_queue_get_num(vdev, idx); - r = ioctl(dev->control, VHOST_SET_VRING_NUM, &state); + r = dev->vhost_ops->vhost_call(dev, VHOST_SET_VRING_NUM, &state); if (r) { return -errno; } state.num = virtio_queue_get_last_avail_idx(vdev, idx); - r = ioctl(dev->control, VHOST_SET_VRING_BASE, &state); + r = dev->vhost_ops->vhost_call(dev, VHOST_SET_VRING_BASE, &state); if (r) { return -errno; } @@ -716,7 +711,7 @@ static int vhost_virtqueue_start(struct vhost_dev *dev, } file.fd = event_notifier_get_fd(virtio_queue_get_host_notifier(vvq)); - r = ioctl(dev->control, VHOST_SET_VRING_KICK, &file); + r = dev->vhost_ops->vhost_call(dev, VHOST_SET_VRING_KICK, &file); if (r) { r = -errno; goto fail_kick; @@ -754,7 +749,7 @@ static void vhost_virtqueue_stop(struct vhost_dev *dev, }; int r; assert(idx >= dev->vq_index && idx < dev->vq_index + dev->nvqs); - r = ioctl(dev->control, VHOST_GET_VRING_BASE, &state); + r = dev->vhost_ops->vhost_call(dev, VHOST_GET_VRING_BASE, &state); if (r < 0) { fprintf(stderr, "vhost VQ %d ring restore failed: %d\n", idx, r); fflush(stderr); @@ -796,7 +791,7 @@ static int vhost_virtqueue_init(struct vhost_dev *dev, } file.fd = event_notifier_get_fd(&vq->masked_notifier); - r = ioctl(dev->control, VHOST_SET_VRING_CALL, &file); + r = dev->vhost_ops->vhost_call(dev, VHOST_SET_VRING_CALL, &file); if (r) { r = -errno; goto fail_call; @@ -812,25 +807,26 @@ static void vhost_virtqueue_cleanup(struct vhost_virtqueue *vq) event_notifier_cleanup(&vq->masked_notifier); } -int vhost_dev_init(struct vhost_dev *hdev, int devfd, const char *devpath, - bool force) +int vhost_dev_init(struct vhost_dev *hdev, void *opaque, + VhostBackendType backend_type, bool force) { uint64_t features; int i, r; - if (devfd >= 0) { - hdev->control = devfd; - } else { - hdev->control = open(devpath, O_RDWR); - if (hdev->control < 0) { - return -errno; - } + + if (vhost_set_backend_type(hdev, backend_type) < 0) { + return -1; } - r = ioctl(hdev->control, VHOST_SET_OWNER, NULL); + + if (hdev->vhost_ops->vhost_backend_init(hdev, opaque) < 0) { + return -errno; + } + + r = hdev->vhost_ops->vhost_call(hdev, VHOST_SET_OWNER, NULL); if (r < 0) { goto fail; } - r = ioctl(hdev->control, VHOST_GET_FEATURES, &features); + r = hdev->vhost_ops->vhost_call(hdev, VHOST_GET_FEATURES, &features); if (r < 0) { goto fail; } @@ -875,7 +871,7 @@ fail_vq: } fail: r = -errno; - close(hdev->control); + hdev->vhost_ops->vhost_backend_cleanup(hdev); return r; } @@ -888,7 +884,7 @@ void vhost_dev_cleanup(struct vhost_dev *hdev) memory_listener_unregister(&hdev->memory_listener); g_free(hdev->mem); g_free(hdev->mem_sections); - close(hdev->control); + hdev->vhost_ops->vhost_backend_cleanup(hdev); } bool vhost_dev_query(struct vhost_dev *hdev, VirtIODevice *vdev) @@ -990,10 +986,37 @@ void vhost_virtqueue_mask(struct vhost_dev *hdev, VirtIODevice *vdev, int n, } else { file.fd = event_notifier_get_fd(virtio_queue_get_guest_notifier(vvq)); } - r = ioctl(hdev->control, VHOST_SET_VRING_CALL, &file); + r = hdev->vhost_ops->vhost_call(hdev, VHOST_SET_VRING_CALL, &file); assert(r >= 0); } +unsigned vhost_get_features(struct vhost_dev *hdev, const int *feature_bits, + unsigned features) +{ + const int *bit = feature_bits; + while (*bit != VHOST_INVALID_FEATURE_BIT) { + unsigned bit_mask = (1 << *bit); + if (!(hdev->features & bit_mask)) { + features &= ~bit_mask; + } + bit++; + } + return features; +} + +void vhost_ack_features(struct vhost_dev *hdev, const int *feature_bits, + unsigned features) +{ + const int *bit = feature_bits; + while (*bit != VHOST_INVALID_FEATURE_BIT) { + unsigned bit_mask = (1 << *bit); + if (features & bit_mask) { + hdev->acked_features |= bit_mask; + } + bit++; + } +} + /* Host notifiers must be enabled at this point. */ int vhost_dev_start(struct vhost_dev *hdev, VirtIODevice *vdev) { @@ -1005,7 +1028,7 @@ int vhost_dev_start(struct vhost_dev *hdev, VirtIODevice *vdev) if (r < 0) { goto fail_features; } - r = ioctl(hdev->control, VHOST_SET_MEM_TABLE, hdev->mem); + r = hdev->vhost_ops->vhost_call(hdev, VHOST_SET_MEM_TABLE, hdev->mem); if (r < 0) { r = -errno; goto fail_mem; @@ -1024,8 +1047,7 @@ int vhost_dev_start(struct vhost_dev *hdev, VirtIODevice *vdev) hdev->log_size = vhost_get_log_size(hdev); hdev->log = hdev->log_size ? g_malloc0(hdev->log_size * sizeof *hdev->log) : NULL; - r = ioctl(hdev->control, VHOST_SET_LOG_BASE, - (uint64_t)(unsigned long)hdev->log); + r = hdev->vhost_ops->vhost_call(hdev, VHOST_SET_LOG_BASE, hdev->log); if (r < 0) { r = -errno; goto fail_log; diff --git a/hw/virtio/virtio.c b/hw/virtio/virtio.c index a07ae8ad91..a3082d569d 100644 --- a/hw/virtio/virtio.c +++ b/hw/virtio/virtio.c @@ -1164,14 +1164,8 @@ EventNotifier *virtio_queue_get_host_notifier(VirtQueue *vq) void virtio_device_set_child_bus_name(VirtIODevice *vdev, char *bus_name) { - if (vdev->bus_name) { - g_free(vdev->bus_name); - vdev->bus_name = NULL; - } - - if (bus_name) { - vdev->bus_name = g_strdup(bus_name); - } + g_free(vdev->bus_name); + vdev->bus_name = g_strdup(bus_name); } static void virtio_device_realize(DeviceState *dev, Error **errp) @@ -1206,10 +1200,8 @@ static void virtio_device_unrealize(DeviceState *dev, Error **errp) } } - if (vdev->bus_name) { - g_free(vdev->bus_name); - vdev->bus_name = NULL; - } + g_free(vdev->bus_name); + vdev->bus_name = NULL; } static void virtio_device_class_init(ObjectClass *klass, void *data) diff --git a/include/block/scsi.h b/include/block/scsi.h index 9ab045b613..edde960d18 100644 --- a/include/block/scsi.h +++ b/include/block/scsi.h @@ -143,6 +143,8 @@ #define READ_CD 0xbe #define SEND_DVD_STRUCTURE 0xbf +const char *scsi_command_name(uint8_t cmd); + /* * SERVICE ACTION IN subcodes */ diff --git a/include/exec/cpu-all.h b/include/exec/cpu-all.h index e8363d7248..f91581fc65 100644 --- a/include/exec/cpu-all.h +++ b/include/exec/cpu-all.h @@ -297,11 +297,6 @@ CPUArchState *cpu_copy(CPUArchState *env); /* memory API */ -extern ram_addr_t ram_size; - -/* RAM is pre-allocated and passed into qemu_ram_alloc_from_ptr */ -#define RAM_PREALLOC_MASK (1 << 0) - typedef struct RAMBlock { struct MemoryRegion *mr; uint8_t *host; @@ -327,9 +322,6 @@ typedef struct RAMList { } RAMList; extern RAMList ram_list; -extern const char *mem_path; -extern int mem_prealloc; - /* Flags stored in the low bits of the TLB virtual address. These are defined so that fast path ram access is all zeros. */ /* Zero if TLB entry is valid. */ diff --git a/include/exec/cpu-common.h b/include/exec/cpu-common.h index a21b65a893..e3ec4c8e0c 100644 --- a/include/exec/cpu-common.h +++ b/include/exec/cpu-common.h @@ -45,6 +45,8 @@ typedef uintptr_t ram_addr_t; # define RAM_ADDR_FMT "%" PRIxPTR #endif +extern ram_addr_t ram_size; + /* memory API */ typedef void CPUWriteMemoryFunc(void *opaque, hwaddr addr, uint32_t value); @@ -54,6 +56,7 @@ void qemu_ram_remap(ram_addr_t addr, ram_addr_t length); /* This should not be used by devices. */ MemoryRegion *qemu_ram_addr_from_host(void *ptr, ram_addr_t *ram_addr); void qemu_ram_set_idstr(ram_addr_t addr, const char *name, DeviceState *dev); +void qemu_ram_unset_idstr(ram_addr_t addr); void cpu_physical_memory_rw(hwaddr addr, uint8_t *buf, int len, int is_write); diff --git a/include/exec/memory.h b/include/exec/memory.h index 1d55ad94a4..3d778d70f0 100644 --- a/include/exec/memory.h +++ b/include/exec/memory.h @@ -31,6 +31,7 @@ #include "qemu/queue.h" #include "qemu/int128.h" #include "qemu/notify.h" +#include "qapi/error.h" #define MAX_PHYS_ADDR_SPACE_BITS 62 #define MAX_PHYS_ADDR (((hwaddr)1 << MAX_PHYS_ADDR_SPACE_BITS) - 1) @@ -135,7 +136,7 @@ struct MemoryRegion { const MemoryRegionIOMMUOps *iommu_ops; void *opaque; struct Object *owner; - MemoryRegion *parent; + MemoryRegion *container; Int128 size; hwaddr addr; void (*destructor)(MemoryRegion *mr); @@ -311,6 +312,28 @@ void memory_region_init_ram(MemoryRegion *mr, const char *name, uint64_t size); +#ifdef __linux__ +/** + * memory_region_init_ram_from_file: Initialize RAM memory region with a + * mmap-ed backend. + * + * @mr: the #MemoryRegion to be initialized. + * @owner: the object that tracks the region's reference count + * @name: the name of the region. + * @size: size of the region. + * @share: %true if memory must be mmaped with the MAP_SHARED flag + * @path: the path in which to allocate the RAM. + * @errp: pointer to Error*, to store an error if it happens. + */ +void memory_region_init_ram_from_file(MemoryRegion *mr, + struct Object *owner, + const char *name, + uint64_t size, + bool share, + const char *path, + Error **errp); +#endif + /** * memory_region_init_ram_ptr: Initialize RAM memory region from a * user-provided pointer. Accesses into the @@ -513,6 +536,16 @@ bool memory_region_is_logging(MemoryRegion *mr); bool memory_region_is_rom(MemoryRegion *mr); /** + * memory_region_get_fd: Get a file descriptor backing a RAM memory region. + * + * Returns a file descriptor backing a file-based RAM memory region, + * or -1 if the region is not a file-based RAM memory region. + * + * @mr: the RAM or alias memory region being queried. + */ +int memory_region_get_fd(MemoryRegion *mr); + +/** * memory_region_get_ram_ptr: Get a pointer into a RAM memory region. * * Returns a host pointer to a RAM memory region (created with @@ -815,11 +848,11 @@ void memory_region_set_enabled(MemoryRegion *mr, bool enabled); /* * memory_region_set_address: dynamically update the address of a region * - * Dynamically updates the address of a region, relative to its parent. + * Dynamically updates the address of a region, relative to its container. * May be used on regions are currently part of a memory hierarchy. * * @mr: the region to be updated - * @addr: new address, relative to parent region + * @addr: new address, relative to container region */ void memory_region_set_address(MemoryRegion *mr, hwaddr addr); @@ -836,16 +869,24 @@ void memory_region_set_alias_offset(MemoryRegion *mr, hwaddr offset); /** - * memory_region_present: checks if an address relative to a @parent - * translates into #MemoryRegion within @parent + * memory_region_present: checks if an address relative to a @container + * translates into #MemoryRegion within @container * - * Answer whether a #MemoryRegion within @parent covers the address + * Answer whether a #MemoryRegion within @container covers the address * @addr. * - * @parent: a #MemoryRegion within which @addr is a relative address - * @addr: the area within @parent to be searched + * @container: a #MemoryRegion within which @addr is a relative address + * @addr: the area within @container to be searched + */ +bool memory_region_present(MemoryRegion *container, hwaddr addr); + +/** + * memory_region_is_mapped: returns true if #MemoryRegion is mapped + * into any address space. + * + * @mr: a #MemoryRegion which should be checked if it's mapped */ -bool memory_region_present(MemoryRegion *parent, hwaddr addr); +bool memory_region_is_mapped(MemoryRegion *mr); /** * memory_region_find: translate an address/size relative to a @@ -866,7 +907,7 @@ bool memory_region_present(MemoryRegion *parent, hwaddr addr); * Similarly, the .@offset_within_address_space is relative to the * address space that contains both regions, the passed and the * returned one. However, in the special case where the @mr argument - * has no parent (and thus is the root of the address space), the + * has no container (and thus is the root of the address space), the * following will hold: * .@offset_within_address_space >= @addr * .@offset_within_address_space + .@size <= @addr + @size diff --git a/include/exec/ram_addr.h b/include/exec/ram_addr.h index b94de02ea7..55ca67681f 100644 --- a/include/exec/ram_addr.h +++ b/include/exec/ram_addr.h @@ -22,9 +22,13 @@ #ifndef CONFIG_USER_ONLY #include "hw/xen/xen.h" +ram_addr_t qemu_ram_alloc_from_file(ram_addr_t size, MemoryRegion *mr, + bool share, const char *mem_path, + Error **errp); ram_addr_t qemu_ram_alloc_from_ptr(ram_addr_t size, void *host, MemoryRegion *mr); ram_addr_t qemu_ram_alloc(ram_addr_t size, MemoryRegion *mr); +int qemu_get_ram_fd(ram_addr_t addr); void *qemu_get_ram_ptr(ram_addr_t addr); void qemu_ram_free(ram_addr_t addr); void qemu_ram_free_from_ptr(ram_addr_t addr); diff --git a/include/hw/acpi/acpi.h b/include/hw/acpi/acpi.h index a9fae9d5c5..1f678b4bf2 100644 --- a/include/hw/acpi/acpi.h +++ b/include/hw/acpi/acpi.h @@ -26,6 +26,11 @@ #include "exec/memory.h" #include "hw/irq.h" +/* + * current device naming scheme supports up to 256 memory devices + */ +#define ACPI_MAX_RAM_SLOTS 256 + /* from linux include/acpi/actype.h */ /* Default ACPI register widths */ diff --git a/include/hw/acpi/acpi_dev_interface.h b/include/hw/acpi/acpi_dev_interface.h new file mode 100644 index 0000000000..f245f8d236 --- /dev/null +++ b/include/hw/acpi/acpi_dev_interface.h @@ -0,0 +1,43 @@ +#ifndef ACPI_DEV_INTERFACE_H +#define ACPI_DEV_INTERFACE_H + +#include "qom/object.h" +#include "qapi-types.h" + +#define TYPE_ACPI_DEVICE_IF "acpi-device-interface" + +#define ACPI_DEVICE_IF_CLASS(klass) \ + OBJECT_CLASS_CHECK(AcpiDeviceIfClass, (klass), \ + TYPE_ACPI_DEVICE_IF) +#define ACPI_DEVICE_IF_GET_CLASS(obj) \ + OBJECT_GET_CLASS(AcpiDeviceIfClass, (obj), \ + TYPE_ACPI_DEVICE_IF) +#define ACPI_DEVICE_IF(obj) \ + INTERFACE_CHECK(AcpiDeviceIf, (obj), \ + TYPE_ACPI_DEVICE_IF) + + +typedef struct AcpiDeviceIf { + /* <private> */ + Object Parent; +} AcpiDeviceIf; + +/** + * AcpiDeviceIfClass: + * + * ospm_status: returns status of ACPI device objects, reported + * via _OST method if device supports it. + * + * Interface is designed for providing unified interface + * to generic ACPI functionality that could be used without + * knowledge about internals of actual device that implements + * ACPI interface. + */ +typedef struct AcpiDeviceIfClass { + /* <private> */ + InterfaceClass parent_class; + + /* <public> */ + void (*ospm_status)(AcpiDeviceIf *adev, ACPIOSTInfoList ***list); +} AcpiDeviceIfClass; +#endif diff --git a/include/hw/acpi/cpu_hotplug.h b/include/hw/acpi/cpu_hotplug.h index 4576400fd7..9e5d30c9df 100644 --- a/include/hw/acpi/cpu_hotplug.h +++ b/include/hw/acpi/cpu_hotplug.h @@ -13,7 +13,7 @@ #define ACPI_HOTPLUG_H #include "hw/acpi/acpi.h" -#include "hw/acpi/cpu_hotplug_defs.h" +#include "hw/acpi/pc-hotplug.h" typedef struct AcpiCpuHotplug { MemoryRegion io; diff --git a/include/hw/acpi/cpu_hotplug_defs.h b/include/hw/acpi/cpu_hotplug_defs.h deleted file mode 100644 index 9f33663511..0000000000 --- a/include/hw/acpi/cpu_hotplug_defs.h +++ /dev/null @@ -1,32 +0,0 @@ -/* - * QEMU ACPI hotplug utilities shared defines - * - * Copyright (C) 2013 Red Hat Inc - * - * Authors: - * Igor Mammedov <imammedo@redhat.com> - * - * This work is licensed under the terms of the GNU GPL, version 2 or later. - * See the COPYING file in the top-level directory. - */ -#ifndef ACPI_HOTPLUG_DEFS_H -#define ACPI_HOTPLUG_DEFS_H - -/* - * ONLY DEFINEs are permited in this file since it's shared - * between C and ASL code. - */ -#define ACPI_CPU_HOTPLUG_STATUS 4 - -/* Limit for CPU arch IDs for CPU hotplug. All hotpluggable CPUs should - * have CPUClass.get_arch_id() < ACPI_CPU_HOTPLUG_ID_LIMIT. - */ -#define ACPI_CPU_HOTPLUG_ID_LIMIT 256 - -/* 256 CPU IDs, 8 bits per entry: */ -#define ACPI_GPE_PROC_LEN 32 - -#define ICH9_CPU_HOTPLUG_IO_BASE 0x0CD8 -#define PIIX4_CPU_HOTPLUG_IO_BASE 0xaf00 - -#endif diff --git a/include/hw/acpi/ich9.h b/include/hw/acpi/ich9.h index 104f419852..7e42448ef9 100644 --- a/include/hw/acpi/ich9.h +++ b/include/hw/acpi/ich9.h @@ -23,6 +23,8 @@ #include "hw/acpi/acpi.h" #include "hw/acpi/cpu_hotplug.h" +#include "hw/acpi/memory_hotplug.h" +#include "hw/acpi/acpi_dev_interface.h" typedef struct ICH9LPCPMRegs { /* @@ -46,6 +48,8 @@ typedef struct ICH9LPCPMRegs { AcpiCpuHotplug gpe_cpu; Notifier cpu_added_notifier; + + MemHotplugState acpi_memory_hotplug; } ICH9LPCPMRegs; void ich9_pm_init(PCIDevice *lpc_pci, ICH9LPCPMRegs *pm, @@ -55,4 +59,7 @@ extern const VMStateDescription vmstate_ich9_pm; void ich9_pm_add_properties(Object *obj, ICH9LPCPMRegs *pm, Error **errp); +void ich9_pm_device_plug_cb(ICH9LPCPMRegs *pm, DeviceState *dev, Error **errp); + +void ich9_pm_ospm_status(AcpiDeviceIf *adev, ACPIOSTInfoList ***list); #endif /* HW_ACPI_ICH9_H */ diff --git a/include/hw/acpi/memory_hotplug.h b/include/hw/acpi/memory_hotplug.h new file mode 100644 index 0000000000..7bbf8a0064 --- /dev/null +++ b/include/hw/acpi/memory_hotplug.h @@ -0,0 +1,38 @@ +#ifndef QEMU_HW_ACPI_MEMORY_HOTPLUG_H +#define QEMU_HW_ACPI_MEMORY_HOTPLUG_H + +#include "hw/qdev-core.h" +#include "hw/acpi/acpi.h" +#include "migration/vmstate.h" + +#define ACPI_MEMORY_HOTPLUG_STATUS 8 + +typedef struct MemStatus { + DeviceState *dimm; + bool is_enabled; + bool is_inserting; + uint32_t ost_event; + uint32_t ost_status; +} MemStatus; + +typedef struct MemHotplugState { + bool is_enabled; /* true if memory hotplug is supported */ + MemoryRegion io; + uint32_t selector; + uint32_t dev_count; + MemStatus *devs; +} MemHotplugState; + +void acpi_memory_hotplug_init(MemoryRegion *as, Object *owner, + MemHotplugState *state); + +void acpi_memory_plug_cb(ACPIREGS *ar, qemu_irq irq, MemHotplugState *mem_st, + DeviceState *dev, Error **errp); + +extern const VMStateDescription vmstate_memory_hotplug; +#define VMSTATE_MEMORY_HOTPLUG(memhp, state) \ + VMSTATE_STRUCT(memhp, state, 1, \ + vmstate_memory_hotplug, MemHotplugState) + +void acpi_memory_ospm_status(MemHotplugState *mem_st, ACPIOSTInfoList ***list); +#endif diff --git a/include/hw/acpi/pc-hotplug.h b/include/hw/acpi/pc-hotplug.h new file mode 100644 index 0000000000..bf5157d7c3 --- /dev/null +++ b/include/hw/acpi/pc-hotplug.h @@ -0,0 +1,56 @@ +/* + * QEMU ACPI hotplug utilities shared defines + * + * Copyright (C) 2014 Red Hat Inc + * + * Authors: + * Igor Mammedov <imammedo@redhat.com> + * + * This work is licensed under the terms of the GNU GPL, version 2 or later. + * See the COPYING file in the top-level directory. + */ +#ifndef PC_HOTPLUG_H +#define PC_HOTPLUG_H + +/* + * ONLY DEFINEs are permited in this file since it's shared + * between C and ASL code. + */ +#define ACPI_CPU_HOTPLUG_STATUS 4 + +/* Limit for CPU arch IDs for CPU hotplug. All hotpluggable CPUs should + * have CPUClass.get_arch_id() < ACPI_CPU_HOTPLUG_ID_LIMIT. + */ +#define ACPI_CPU_HOTPLUG_ID_LIMIT 256 + +/* 256 CPU IDs, 8 bits per entry: */ +#define ACPI_GPE_PROC_LEN 32 + +#define ICH9_CPU_HOTPLUG_IO_BASE 0x0CD8 +#define PIIX4_CPU_HOTPLUG_IO_BASE 0xaf00 + +#define ACPI_MEMORY_HOTPLUG_IO_LEN 24 +#define ACPI_MEMORY_HOTPLUG_BASE 0x0a00 + +#define MEMORY_HOPTLUG_DEVICE MHPD +#define MEMORY_SLOTS_NUMBER MDNR +#define MEMORY_HOTPLUG_IO_REGION HPMR +#define MEMORY_SLOT_ADDR_LOW MRBL +#define MEMORY_SLOT_ADDR_HIGH MRBH +#define MEMORY_SLOT_SIZE_LOW MRLL +#define MEMORY_SLOT_SIZE_HIGH MRLH +#define MEMORY_SLOT_PROXIMITY MPX +#define MEMORY_SLOT_ENABLED MES +#define MEMORY_SLOT_INSERT_EVENT MINS +#define MEMORY_SLOT_SLECTOR MSEL +#define MEMORY_SLOT_OST_EVENT MOEV +#define MEMORY_SLOT_OST_STATUS MOSC +#define MEMORY_SLOT_LOCK MLCK +#define MEMORY_SLOT_STATUS_METHOD MRST +#define MEMORY_SLOT_CRS_METHOD MCRS +#define MEMORY_SLOT_OST_METHOD MOST +#define MEMORY_SLOT_PROXIMITY_METHOD MPXM +#define MEMORY_SLOT_NOTIFY_METHOD MTFY +#define MEMORY_SLOT_SCAN_METHOD MSCN + +#endif diff --git a/include/hw/boards.h b/include/hw/boards.h index 2d2e2bef19..605a970934 100644 --- a/include/hw/boards.h +++ b/include/hw/boards.h @@ -43,9 +43,13 @@ struct QEMUMachine { const char *hw_version; }; -#define TYPE_MACHINE_SUFFIX "-machine" +void memory_region_allocate_system_memory(MemoryRegion *mr, Object *owner, + const char *name, + uint64_t ram_size); + int qemu_register_machine(QEMUMachine *m); +#define TYPE_MACHINE_SUFFIX "-machine" #define TYPE_MACHINE "machine" #undef MACHINE /* BSD defines it and QEMU does not use it */ #define MACHINE(obj) \ @@ -61,6 +65,11 @@ extern MachineState *current_machine; /** * MachineClass: * @qemu_machine: #QEMUMachine + * @get_hotplug_handler: this function is called during bus-less + * device hotplug. If defined it returns pointer to an instance + * of HotplugHandler object, which handles hotplug operation + * for a given @dev. It may return NULL if @dev doesn't require + * any actions to be performed by hotplug handler. */ struct MachineClass { /*< private >*/ @@ -90,6 +99,9 @@ struct MachineClass { const char *default_boot_order; GlobalProperty *compat_props; const char *hw_version; + + HotplugHandler *(*get_hotplug_handler)(MachineState *machine, + DeviceState *dev); }; /** @@ -113,6 +125,8 @@ struct MachineState { char *firmware; ram_addr_t ram_size; + ram_addr_t maxram_size; + uint64_t ram_slots; const char *boot_order; char *kernel_filename; char *kernel_cmdline; diff --git a/include/hw/i386/ich9.h b/include/hw/i386/ich9.h index e19143555e..59ea25b49a 100644 --- a/include/hw/i386/ich9.h +++ b/include/hw/i386/ich9.h @@ -24,7 +24,7 @@ I2CBus *ich9_smb_init(PCIBus *bus, int devfn, uint32_t smb_io_base); #define ICH9_CC_SIZE (16 * 1024) /* 16KB */ -#define TYPE_ICH9_LPC_DEVICE "ICH9 LPC" +#define TYPE_ICH9_LPC_DEVICE "ICH9-LPC" #define ICH9_LPC_DEVICE(obj) \ OBJECT_CHECK(ICH9LPCState, (obj), TYPE_ICH9_LPC_DEVICE) diff --git a/include/hw/i386/pc.h b/include/hw/i386/pc.h index fa9d99792a..19f78ea336 100644 --- a/include/hw/i386/pc.h +++ b/include/hw/i386/pc.h @@ -3,6 +3,7 @@ #include "qemu-common.h" #include "exec/memory.h" +#include "hw/boards.h" #include "hw/isa/isa.h" #include "hw/block/fdc.h" #include "net/net.h" @@ -12,9 +13,57 @@ #include "qemu/bitmap.h" #include "sysemu/sysemu.h" #include "hw/pci/pci.h" +#include "hw/boards.h" #define HPET_INTCAP "hpet-intcap" +/** + * PCMachineState: + * @hotplug_memory_base: address in guest RAM address space where hotplug memory + * address space begins. + * @hotplug_memory: hotplug memory addess space container + * @acpi_dev: link to ACPI PM device that performs ACPI hotplug handling + */ +struct PCMachineState { + /*< private >*/ + MachineState parent_obj; + + /* <public> */ + ram_addr_t hotplug_memory_base; + MemoryRegion hotplug_memory; + + HotplugHandler *acpi_dev; +}; + +#define PC_MACHINE_ACPI_DEVICE_PROP "acpi-device" +#define PC_MACHINE_MEMHP_REGION_SIZE "hotplug-memory-region-size" + +/** + * PCMachineClass: + * @get_hotplug_handler: pointer to parent class callback @get_hotplug_handler + */ +struct PCMachineClass { + /*< private >*/ + MachineClass parent_class; + + /*< public >*/ + HotplugHandler *(*get_hotplug_handler)(MachineState *machine, + DeviceState *dev); +}; + +typedef struct PCMachineState PCMachineState; +typedef struct PCMachineClass PCMachineClass; + +#define TYPE_PC_MACHINE "generic-pc-machine" +#define PC_MACHINE(obj) \ + OBJECT_CHECK(PCMachineState, (obj), TYPE_PC_MACHINE) +#define PC_MACHINE_GET_CLASS(obj) \ + OBJECT_GET_CLASS(PCMachineClass, (obj), TYPE_PC_MACHINE) +#define PC_MACHINE_CLASS(klass) \ + OBJECT_CLASS_CHECK(PCMachineClass, (klass), TYPE_PC_MACHINE) + +void qemu_register_pc_machine(QEMUMachine *m); + /* PC-style peripherals (also used by other machines). */ typedef struct PcPciInfo { @@ -43,6 +92,7 @@ struct PcGuestInfo { uint64_t *node_cpu; FWCfgState *fw_cfg; bool has_acpi_build; + bool has_reserved_memory; }; /* parallel.c */ @@ -134,10 +184,8 @@ PcGuestInfo *pc_guest_info_init(ram_addr_t below_4g_mem_size, void pc_pci_as_mapping_init(Object *owner, MemoryRegion *system_memory, MemoryRegion *pci_address_space); -FWCfgState *pc_memory_init(MemoryRegion *system_memory, - const char *kernel_filename, - const char *kernel_cmdline, - const char *initrd_filename, +FWCfgState *pc_memory_init(MachineState *machine, + MemoryRegion *system_memory, ram_addr_t below_4g_mem_size, ram_addr_t above_4g_mem_size, MemoryRegion *rom_memory, @@ -167,7 +215,8 @@ void ioapic_init_gsi(GSIState *gsi_state, const char *parent_name); I2CBus *piix4_pm_init(PCIBus *bus, int devfn, uint32_t smb_io_base, qemu_irq sci_irq, qemu_irq smi_irq, - int kvm_enabled, FWCfgState *fw_cfg); + int kvm_enabled, FWCfgState *fw_cfg, + DeviceState **piix4_pm); void piix4_smbus_register_device(SMBusDevice *dev, uint8_t addr); /* hpet.c */ @@ -243,7 +292,12 @@ int e820_get_num_entries(void); bool e820_get_entry(int, uint32_t, uint64_t *, uint64_t *); #define PC_Q35_COMPAT_2_0 \ - PC_COMPAT_2_0 + PC_COMPAT_2_0, \ + {\ + .driver = "ICH9-LPC",\ + .property = "memory-hotplug-support",\ + .value = "off",\ + } #define PC_Q35_COMPAT_1_7 \ PC_COMPAT_1_7, \ @@ -268,10 +322,20 @@ bool e820_get_entry(int, uint32_t, uint64_t *, uint64_t *); #define PC_COMPAT_2_0 \ {\ + .driver = "virtio-scsi-pci",\ + .property = "any_layout",\ + .value = "off",\ + },{\ + .driver = "PIIX4_PM",\ + .property = "memory-hotplug-support",\ + .value = "off",\ + },\ + {\ .driver = "apic",\ .property = "version",\ .value = stringify(0x11),\ - },{\ + },\ + {\ .driver = "nec-usb-xhci",\ .property = "superspeed-ports-first",\ .value = "off",\ @@ -290,6 +354,11 @@ bool e820_get_entry(int, uint32_t, uint64_t *, uint64_t *); .driver = "pci-serial-4x",\ .property = "prog_if",\ .value = stringify(0),\ + },\ + {\ + .driver = "virtio-net-pci",\ + .property = "guest_announce",\ + .value = "off",\ } #define PC_COMPAT_1_7 \ diff --git a/include/hw/mem/pc-dimm.h b/include/hw/mem/pc-dimm.h new file mode 100644 index 0000000000..761eeef801 --- /dev/null +++ b/include/hw/mem/pc-dimm.h @@ -0,0 +1,81 @@ +/* + * PC DIMM device + * + * Copyright ProfitBricks GmbH 2012 + * Copyright (C) 2013-2014 Red Hat Inc + * + * Authors: + * Vasilis Liaskovitis <vasilis.liaskovitis@profitbricks.com> + * Igor Mammedov <imammedo@redhat.com> + * + * This work is licensed under the terms of the GNU GPL, version 2 or later. + * See the COPYING file in the top-level directory. + * + */ + +#ifndef QEMU_PC_DIMM_H +#define QEMU_PC_DIMM_H + +#include "exec/memory.h" +#include "sysemu/hostmem.h" +#include "hw/qdev.h" + +#define DEFAULT_PC_DIMMSIZE (1024*1024*1024) + +#define TYPE_PC_DIMM "pc-dimm" +#define PC_DIMM(obj) \ + OBJECT_CHECK(PCDIMMDevice, (obj), TYPE_PC_DIMM) +#define PC_DIMM_CLASS(oc) \ + OBJECT_CLASS_CHECK(PCDIMMDeviceClass, (oc), TYPE_PC_DIMM) +#define PC_DIMM_GET_CLASS(obj) \ + OBJECT_GET_CLASS(PCDIMMDeviceClass, (obj), TYPE_PC_DIMM) + +#define PC_DIMM_ADDR_PROP "addr" +#define PC_DIMM_SLOT_PROP "slot" +#define PC_DIMM_NODE_PROP "node" +#define PC_DIMM_SIZE_PROP "size" +#define PC_DIMM_MEMDEV_PROP "memdev" + +#define PC_DIMM_UNASSIGNED_SLOT -1 + +/** + * PCDIMMDevice: + * @addr: starting guest physical address, where @PCDIMMDevice is mapped. + * Default value: 0, means that address is auto-allocated. + * @node: numa node to which @PCDIMMDevice is attached. + * @slot: slot number into which @PCDIMMDevice is plugged in. + * Default value: -1, means that slot is auto-allocated. + * @hostmem: host memory backend providing memory for @PCDIMMDevice + */ +typedef struct PCDIMMDevice { + /* private */ + DeviceState parent_obj; + + /* public */ + uint64_t addr; + uint32_t node; + int32_t slot; + HostMemoryBackend *hostmem; +} PCDIMMDevice; + +/** + * PCDIMMDeviceClass: + * @get_memory_region: returns #MemoryRegion associated with @dimm + */ +typedef struct PCDIMMDeviceClass { + /* private */ + DeviceClass parent_class; + + /* public */ + MemoryRegion *(*get_memory_region)(PCDIMMDevice *dimm); +} PCDIMMDeviceClass; + +uint64_t pc_dimm_get_free_addr(uint64_t address_space_start, + uint64_t address_space_size, + uint64_t *hint, uint64_t size, + Error **errp); + +int pc_dimm_get_free_slot(const int *hint, int max_slots, Error **errp); + +int qmp_pc_dimm_device_list(Object *obj, void *opaque); +#endif diff --git a/include/hw/virtio/vhost-backend.h b/include/hw/virtio/vhost-backend.h new file mode 100644 index 0000000000..d31768a1d4 --- /dev/null +++ b/include/hw/virtio/vhost-backend.h @@ -0,0 +1,38 @@ +/* + * vhost-backend + * + * Copyright (c) 2013 Virtual Open Systems Sarl. + * + * This work is licensed under the terms of the GNU GPL, version 2 or later. + * See the COPYING file in the top-level directory. + * + */ + +#ifndef VHOST_BACKEND_H_ +#define VHOST_BACKEND_H_ + +typedef enum VhostBackendType { + VHOST_BACKEND_TYPE_NONE = 0, + VHOST_BACKEND_TYPE_KERNEL = 1, + VHOST_BACKEND_TYPE_USER = 2, + VHOST_BACKEND_TYPE_MAX = 3, +} VhostBackendType; + +struct vhost_dev; + +typedef int (*vhost_call)(struct vhost_dev *dev, unsigned long int request, + void *arg); +typedef int (*vhost_backend_init)(struct vhost_dev *dev, void *opaque); +typedef int (*vhost_backend_cleanup)(struct vhost_dev *dev); + +typedef struct VhostOps { + VhostBackendType backend_type; + vhost_call vhost_call; + vhost_backend_init vhost_backend_init; + vhost_backend_cleanup vhost_backend_cleanup; +} VhostOps; + +int vhost_set_backend_type(struct vhost_dev *dev, + VhostBackendType backend_type); + +#endif /* VHOST_BACKEND_H_ */ diff --git a/include/hw/virtio/vhost.h b/include/hw/virtio/vhost.h index de24746c7e..33028ec8c2 100644 --- a/include/hw/virtio/vhost.h +++ b/include/hw/virtio/vhost.h @@ -2,6 +2,7 @@ #define VHOST_H #include "hw/hw.h" +#include "hw/virtio/vhost-backend.h" #include "hw/virtio/virtio.h" #include "exec/memory.h" @@ -25,11 +26,11 @@ typedef unsigned long vhost_log_chunk_t; #define VHOST_LOG_PAGE 0x1000 #define VHOST_LOG_BITS (8 * sizeof(vhost_log_chunk_t)) #define VHOST_LOG_CHUNK (VHOST_LOG_PAGE * VHOST_LOG_BITS) +#define VHOST_INVALID_FEATURE_BIT (0xff) struct vhost_memory; struct vhost_dev { MemoryListener memory_listener; - int control; struct vhost_memory *mem; int n_mem_sections; MemoryRegionSection *mem_sections; @@ -48,10 +49,12 @@ struct vhost_dev { bool memory_changed; hwaddr mem_changed_start_addr; hwaddr mem_changed_end_addr; + const VhostOps *vhost_ops; + void *opaque; }; -int vhost_dev_init(struct vhost_dev *hdev, int devfd, const char *devpath, - bool force); +int vhost_dev_init(struct vhost_dev *hdev, void *opaque, + VhostBackendType backend_type, bool force); void vhost_dev_cleanup(struct vhost_dev *hdev); bool vhost_dev_query(struct vhost_dev *hdev, VirtIODevice *vdev); int vhost_dev_start(struct vhost_dev *hdev, VirtIODevice *vdev); @@ -68,4 +71,8 @@ bool vhost_virtqueue_pending(struct vhost_dev *hdev, int n); */ void vhost_virtqueue_mask(struct vhost_dev *hdev, VirtIODevice *vdev, int n, bool mask); +unsigned vhost_get_features(struct vhost_dev *hdev, const int *feature_bits, + unsigned features); +void vhost_ack_features(struct vhost_dev *hdev, const int *feature_bits, + unsigned features); #endif diff --git a/include/hw/virtio/virtio-net.h b/include/hw/virtio/virtio-net.h index 4b32440837..f7fccc08a4 100644 --- a/include/hw/virtio/virtio-net.h +++ b/include/hw/virtio/virtio-net.h @@ -49,12 +49,14 @@ #define VIRTIO_NET_F_CTRL_RX 18 /* Control channel RX mode support */ #define VIRTIO_NET_F_CTRL_VLAN 19 /* Control channel VLAN filtering */ #define VIRTIO_NET_F_CTRL_RX_EXTRA 20 /* Extra RX mode control support */ +#define VIRTIO_NET_F_GUEST_ANNOUNCE 21 /* Guest can announce itself */ #define VIRTIO_NET_F_MQ 22 /* Device supports Receive Flow * Steering */ #define VIRTIO_NET_F_CTRL_MAC_ADDR 23 /* Set MAC address */ #define VIRTIO_NET_S_LINK_UP 1 /* Link is up */ +#define VIRTIO_NET_S_ANNOUNCE 2 /* Announcement is needed */ #define TX_TIMER_INTERVAL 150000 /* 150 us */ @@ -193,6 +195,8 @@ typedef struct VirtIONet { char *netclient_name; char *netclient_type; uint64_t curr_guest_offloads; + QEMUTimer *announce_timer; + int announce_counter; } VirtIONet; #define VIRTIO_NET_CTRL_MAC 1 @@ -213,6 +217,18 @@ typedef struct VirtIONet { #define VIRTIO_NET_CTRL_VLAN_DEL 1 /* + * Control link announce acknowledgement + * + * VIRTIO_NET_S_ANNOUNCE bit in the status field requests link announcement from + * guest driver. The driver is notified by config space change interrupt. The + * command VIRTIO_NET_CTRL_ANNOUNCE_ACK is used to indicate that the driver has + * received the notification. It makes the device clear the bit + * VIRTIO_NET_S_ANNOUNCE in the status field. + */ +#define VIRTIO_NET_CTRL_ANNOUNCE 3 + #define VIRTIO_NET_CTRL_ANNOUNCE_ACK 0 + +/* * Control Multiqueue * * The command VIRTIO_NET_CTRL_MQ_VQ_PAIRS_SET @@ -251,6 +267,7 @@ struct virtio_net_ctrl_mq { DEFINE_PROP_BIT("guest_tso6", _state, _field, VIRTIO_NET_F_GUEST_TSO6, true), \ DEFINE_PROP_BIT("guest_ecn", _state, _field, VIRTIO_NET_F_GUEST_ECN, true), \ DEFINE_PROP_BIT("guest_ufo", _state, _field, VIRTIO_NET_F_GUEST_UFO, true), \ + DEFINE_PROP_BIT("guest_announce", _state, _field, VIRTIO_NET_F_GUEST_ANNOUNCE, true), \ DEFINE_PROP_BIT("host_tso4", _state, _field, VIRTIO_NET_F_HOST_TSO4, true), \ DEFINE_PROP_BIT("host_tso6", _state, _field, VIRTIO_NET_F_HOST_TSO6, true), \ DEFINE_PROP_BIT("host_ecn", _state, _field, VIRTIO_NET_F_HOST_ECN, true), \ diff --git a/include/hw/virtio/virtio-scsi.h b/include/hw/virtio/virtio-scsi.h index 42b102487a..a8f618578b 100644 --- a/include/hw/virtio/virtio-scsi.h +++ b/include/hw/virtio/virtio-scsi.h @@ -84,14 +84,13 @@ #define VIRTIO_SCSI_EVT_RESET_RESCAN 1 #define VIRTIO_SCSI_EVT_RESET_REMOVED 2 -/* SCSI command request, followed by data-out */ +/* SCSI command request, followed by CDB and data-out */ typedef struct { uint8_t lun[8]; /* Logical Unit Number */ uint64_t tag; /* Command identifier */ uint8_t task_attr; /* Task attribute */ uint8_t prio; uint8_t crn; - uint8_t cdb[]; } QEMU_PACKED VirtIOSCSICmdReq; /* Response, followed by sense data and data-in */ @@ -101,7 +100,6 @@ typedef struct { uint16_t status_qualifier; /* Status qualifier */ uint8_t status; /* Command completion status */ uint8_t response; /* Response values */ - uint8_t sense[]; } QEMU_PACKED VirtIOSCSICmdResp; /* Task Management Request */ @@ -186,7 +184,12 @@ typedef struct { DEFINE_PROP_BIT("param_change", _state, _feature_field, \ VIRTIO_SCSI_F_CHANGE, true) -void virtio_scsi_common_realize(DeviceState *dev, Error **errp); +typedef void (*HandleOutput)(VirtIODevice *, VirtQueue *); + +void virtio_scsi_common_realize(DeviceState *dev, Error **errp, + HandleOutput ctrl, HandleOutput evt, + HandleOutput cmd); + void virtio_scsi_common_unrealize(DeviceState *dev, Error **errp); #endif /* _QEMU_VIRTIO_SCSI_H */ diff --git a/include/migration/vmstate.h b/include/migration/vmstate.h index 7e45048355..799d2d0f03 100644 --- a/include/migration/vmstate.h +++ b/include/migration/vmstate.h @@ -752,6 +752,8 @@ extern const VMStateInfo vmstate_info_bitmap; #define VMSTATE_END_OF_LIST() \ {} +#define SELF_ANNOUNCE_ROUNDS 5 + int vmstate_load_state(QEMUFile *f, const VMStateDescription *vmsd, void *opaque, int version_id); void vmstate_save_state(QEMUFile *f, const VMStateDescription *vmsd, @@ -778,4 +780,12 @@ void vmstate_register_ram(struct MemoryRegion *memory, DeviceState *dev); void vmstate_unregister_ram(struct MemoryRegion *memory, DeviceState *dev); void vmstate_register_ram_global(struct MemoryRegion *memory); +static inline +int64_t self_announce_delay(int round) +{ + assert(round < SELF_ANNOUNCE_ROUNDS && round > 0); + /* delay 50ms, 150ms, 250ms, ... */ + return 50 + (SELF_ANNOUNCE_ROUNDS - round - 1) * 100; +} + #endif diff --git a/include/monitor/monitor.h b/include/monitor/monitor.h index 1c1f56f36b..97696ea693 100644 --- a/include/monitor/monitor.h +++ b/include/monitor/monitor.h @@ -51,6 +51,7 @@ typedef enum MonitorEvent { QEVENT_BLOCK_IMAGE_CORRUPTED, QEVENT_QUORUM_FAILURE, QEVENT_QUORUM_REPORT_BAD, + QEVENT_ACPI_OST, /* Add to 'monitor_event_names' array in monitor.c when * defining new events here */ diff --git a/include/net/vhost-user.h b/include/net/vhost-user.h new file mode 100644 index 0000000000..85109f63aa --- /dev/null +++ b/include/net/vhost-user.h @@ -0,0 +1,17 @@ +/* + * vhost-user.h + * + * Copyright (c) 2013 Virtual Open Systems Sarl. + * + * This work is licensed under the terms of the GNU GPL, version 2 or later. + * See the COPYING file in the top-level directory. + * + */ + +#ifndef VHOST_USER_H_ +#define VHOST_USER_H_ + +struct vhost_net; +struct vhost_net *vhost_user_get_vhost_net(NetClientState *nc); + +#endif /* VHOST_USER_H_ */ diff --git a/include/net/vhost_net.h b/include/net/vhost_net.h index 2d936bb5f5..b1c18a3f3b 100644 --- a/include/net/vhost_net.h +++ b/include/net/vhost_net.h @@ -2,11 +2,19 @@ #define VHOST_NET_H #include "net/net.h" +#include "hw/virtio/vhost-backend.h" struct vhost_net; typedef struct vhost_net VHostNetState; -VHostNetState *vhost_net_init(NetClientState *backend, int devfd, bool force); +typedef struct VhostNetOptions { + VhostBackendType backend_type; + NetClientState *net_backend; + void *opaque; + bool force; +} VhostNetOptions; + +struct vhost_net *vhost_net_init(VhostNetOptions *options); bool vhost_net_query(VHostNetState *net, VirtIODevice *dev); int vhost_net_start(VirtIODevice *dev, NetClientState *ncs, int total_queues); @@ -20,4 +28,5 @@ void vhost_net_ack_features(VHostNetState *net, unsigned features); bool vhost_net_virtqueue_pending(VHostNetState *net, int n); void vhost_net_virtqueue_mask(VHostNetState *net, VirtIODevice *dev, int idx, bool mask); +VHostNetState *get_vhost_net(NetClientState *nc); #endif diff --git a/include/qemu-common.h b/include/qemu-common.h index 66ceceb2ad..ae76197532 100644 --- a/include/qemu-common.h +++ b/include/qemu-common.h @@ -315,9 +315,9 @@ void qemu_iovec_init_external(QEMUIOVector *qiov, struct iovec *iov, int niov); void qemu_iovec_add(QEMUIOVector *qiov, void *base, size_t len); void qemu_iovec_concat(QEMUIOVector *dst, QEMUIOVector *src, size_t soffset, size_t sbytes); -void qemu_iovec_concat_iov(QEMUIOVector *dst, - struct iovec *src_iov, unsigned int src_cnt, - size_t soffset, size_t sbytes); +size_t qemu_iovec_concat_iov(QEMUIOVector *dst, + struct iovec *src_iov, unsigned int src_cnt, + size_t soffset, size_t sbytes); bool qemu_iovec_is_zero(QEMUIOVector *qiov); void qemu_iovec_destroy(QEMUIOVector *qiov); void qemu_iovec_reset(QEMUIOVector *qiov); diff --git a/include/qemu/aes.h b/include/qemu/aes.h index c10666059f..a006da2224 100644 --- a/include/qemu/aes.h +++ b/include/qemu/aes.h @@ -10,6 +10,15 @@ struct aes_key_st { }; typedef struct aes_key_st AES_KEY; +/* FreeBSD has its own AES_set_decrypt_key in -lcrypto, avoid conflicts */ +#ifdef __FreeBSD__ +#define AES_set_encrypt_key QEMU_AES_set_encrypt_key +#define AES_set_decrypt_key QEMU_AES_set_decrypt_key +#define AES_encrypt QEMU_AES_encrypt +#define AES_decrypt QEMU_AES_decrypt +#define AES_cbc_encrypt QEMU_AES_cbc_encrypt +#endif + int AES_set_encrypt_key(const unsigned char *userKey, const int bits, AES_KEY *key); int AES_set_decrypt_key(const unsigned char *userKey, const int bits, diff --git a/include/qemu/bitops.h b/include/qemu/bitops.h index 340b1e73bd..7e2d5c996e 100644 --- a/include/qemu/bitops.h +++ b/include/qemu/bitops.h @@ -157,7 +157,17 @@ unsigned long find_next_zero_bit(const unsigned long *addr, static inline unsigned long find_first_bit(const unsigned long *addr, unsigned long size) { - return find_next_bit(addr, size, 0); + unsigned long result, tmp; + + for (result = 0; result < size; result += BITS_PER_LONG) { + tmp = *addr++; + if (tmp) { + result += ctzl(tmp); + return result < size ? result : size; + } + } + /* Not found */ + return size; } /** diff --git a/include/qemu/osdep.h b/include/qemu/osdep.h index ffb296692d..6d35c1bcba 100644 --- a/include/qemu/osdep.h +++ b/include/qemu/osdep.h @@ -116,6 +116,16 @@ void qemu_anon_ram_free(void *ptr, size_t size); #else #define QEMU_MADV_MERGEABLE QEMU_MADV_INVALID #endif +#ifdef MADV_UNMERGEABLE +#define QEMU_MADV_UNMERGEABLE MADV_UNMERGEABLE +#else +#define QEMU_MADV_UNMERGEABLE QEMU_MADV_INVALID +#endif +#ifdef MADV_DODUMP +#define QEMU_MADV_DODUMP MADV_DODUMP +#else +#define QEMU_MADV_DODUMP QEMU_MADV_INVALID +#endif #ifdef MADV_DONTDUMP #define QEMU_MADV_DONTDUMP MADV_DONTDUMP #else @@ -133,6 +143,8 @@ void qemu_anon_ram_free(void *ptr, size_t size); #define QEMU_MADV_DONTNEED POSIX_MADV_DONTNEED #define QEMU_MADV_DONTFORK QEMU_MADV_INVALID #define QEMU_MADV_MERGEABLE QEMU_MADV_INVALID +#define QEMU_MADV_UNMERGEABLE QEMU_MADV_INVALID +#define QEMU_MADV_DODUMP QEMU_MADV_INVALID #define QEMU_MADV_DONTDUMP QEMU_MADV_INVALID #define QEMU_MADV_HUGEPAGE QEMU_MADV_INVALID @@ -142,6 +154,8 @@ void qemu_anon_ram_free(void *ptr, size_t size); #define QEMU_MADV_DONTNEED QEMU_MADV_INVALID #define QEMU_MADV_DONTFORK QEMU_MADV_INVALID #define QEMU_MADV_MERGEABLE QEMU_MADV_INVALID +#define QEMU_MADV_UNMERGEABLE QEMU_MADV_INVALID +#define QEMU_MADV_DODUMP QEMU_MADV_INVALID #define QEMU_MADV_DONTDUMP QEMU_MADV_INVALID #define QEMU_MADV_HUGEPAGE QEMU_MADV_INVALID @@ -251,4 +265,6 @@ void qemu_init_auxval(char **envp); void qemu_set_tty_echo(int fd, bool echo); +void os_mem_prealloc(int fd, char *area, size_t sz); + #endif diff --git a/include/qemu/range.h b/include/qemu/range.h index aae9720161..cfa021fd48 100644 --- a/include/qemu/range.h +++ b/include/qemu/range.h @@ -3,6 +3,7 @@ #include <inttypes.h> #include <qemu/typedefs.h> +#include "qemu/queue.h" /* * Operations on 64 bit address ranges. @@ -60,4 +61,75 @@ static inline int ranges_overlap(uint64_t first1, uint64_t len1, return !(last2 < first1 || last1 < first2); } +/* 0,1 can merge with 1,2 but don't overlap */ +static inline bool ranges_can_merge(Range *range1, Range *range2) +{ + return !(range1->end < range2->begin || range2->end < range1->begin); +} + +static inline int range_merge(Range *range1, Range *range2) +{ + if (ranges_can_merge(range1, range2)) { + if (range1->end < range2->end) { + range1->end = range2->end; + } + if (range1->begin > range2->begin) { + range1->begin = range2->begin; + } + return 0; + } + + return -1; +} + +static inline GList *g_list_insert_sorted_merged(GList *list, + gpointer data, + GCompareFunc func) +{ + GList *l, *next = NULL; + Range *r, *nextr; + + if (!list) { + list = g_list_insert_sorted(list, data, func); + return list; + } + + nextr = data; + l = list; + while (l && l != next && nextr) { + r = l->data; + if (ranges_can_merge(r, nextr)) { + range_merge(r, nextr); + l = g_list_remove_link(l, next); + next = g_list_next(l); + if (next) { + nextr = next->data; + } else { + nextr = NULL; + } + } else { + l = g_list_next(l); + } + } + + if (!l) { + list = g_list_insert_sorted(list, data, func); + } + + return list; +} + +static inline gint range_compare(gconstpointer a, gconstpointer b) +{ + Range *ra = (Range *)a, *rb = (Range *)b; + if (ra->begin == rb->begin && ra->end == rb->end) { + return 0; + } else if (range_get_last(ra->begin, ra->end) < + range_get_last(rb->begin, rb->end)) { + return -1; + } else { + return 1; + } +} + #endif diff --git a/include/qom/object.h b/include/qom/object.h index a641dcde10..b882ccc85f 100644 --- a/include/qom/object.h +++ b/include/qom/object.h @@ -917,6 +917,34 @@ int64_t object_property_get_int(Object *obj, const char *name, Error **errp); /** + * object_property_get_enum: + * @obj: the object + * @name: the name of the property + * @strings: strings corresponding to enums + * @errp: returns an error if this function fails + * + * Returns: the value of the property, converted to an integer, or + * undefined if an error occurs (including when the property value is not + * an enum). + */ +int object_property_get_enum(Object *obj, const char *name, + const char *strings[], Error **errp); + +/** + * object_property_get_uint16List: + * @obj: the object + * @name: the name of the property + * @list: the returned int list + * @errp: returns an error if this function fails + * + * Returns: the value of the property, converted to integers, or + * undefined if an error occurs (including when the property value is not + * an list of integers). + */ +void object_property_get_uint16List(Object *obj, const char *name, + uint16List **list, Error **errp); + +/** * object_property_set: * @obj: the object * @v: the visitor that will be used to write the property value. This should diff --git a/include/sysemu/char.h b/include/sysemu/char.h index 7f5eeb38b0..3b835f6fb3 100644 --- a/include/sysemu/char.h +++ b/include/sysemu/char.h @@ -56,10 +56,13 @@ typedef void IOEventHandler(void *opaque, int event); struct CharDriverState { void (*init)(struct CharDriverState *s); int (*chr_write)(struct CharDriverState *s, const uint8_t *buf, int len); + int (*chr_sync_read)(struct CharDriverState *s, + const uint8_t *buf, int len); GSource *(*chr_add_watch)(struct CharDriverState *s, GIOCondition cond); void (*chr_update_read_handler)(struct CharDriverState *s); int (*chr_ioctl)(struct CharDriverState *s, int cmd, void *arg); - int (*get_msgfd)(struct CharDriverState *s); + int (*get_msgfds)(struct CharDriverState *s, int* fds, int num); + int (*set_msgfds)(struct CharDriverState *s, int *fds, int num); int (*chr_add_client)(struct CharDriverState *chr, int fd); IOEventHandler *chr_event; IOCanReadHandler *chr_can_read; @@ -80,6 +83,7 @@ struct CharDriverState { int avail_connections; int is_mux; guint fd_in_tag; + guint fd_hup_tag; QemuOpts *opts; QTAILQ_ENTRY(CharDriverState) next; }; @@ -189,6 +193,18 @@ int qemu_chr_fe_write(CharDriverState *s, const uint8_t *buf, int len); int qemu_chr_fe_write_all(CharDriverState *s, const uint8_t *buf, int len); /** + * @qemu_chr_fe_read_all: + * + * Read data to a buffer from the back end. + * + * @buf the data buffer + * @len the number of bytes to read + * + * Returns: the number of bytes read + */ +int qemu_chr_fe_read_all(CharDriverState *s, uint8_t *buf, int len); + +/** * @qemu_chr_fe_ioctl: * * Issue a device specific ioctl to a backend. @@ -215,6 +231,32 @@ int qemu_chr_fe_ioctl(CharDriverState *s, int cmd, void *arg); int qemu_chr_fe_get_msgfd(CharDriverState *s); /** + * @qemu_chr_fe_get_msgfds: + * + * For backends capable of fd passing, return the number of file received + * descriptors and fills the fds array up to num elements + * + * Returns: -1 if fd passing isn't supported or there are no pending file + * descriptors. If file descriptors are returned, subsequent calls to + * this function will return -1 until a client sends a new set of file + * descriptors. + */ +int qemu_chr_fe_get_msgfds(CharDriverState *s, int *fds, int num); + +/** + * @qemu_chr_fe_set_msgfds: + * + * For backends capable of fd passing, set an array of fds to be passed with + * the next send operation. + * A subsequent call to this function before calling a write function will + * result in overwriting the fd array with the new value without being send. + * Upon writing the message the fd array is freed. + * + * Returns: -1 if fd passing isn't supported. + */ +int qemu_chr_fe_set_msgfds(CharDriverState *s, int *fds, int num); + +/** * @qemu_chr_fe_claim: * * Claim a backend before using it, should be called before calling diff --git a/include/sysemu/cpus.h b/include/sysemu/cpus.h index 6502488a05..4f790810bf 100644 --- a/include/sysemu/cpus.h +++ b/include/sysemu/cpus.h @@ -23,7 +23,6 @@ extern int smp_threads; #define smp_threads 1 #endif -void set_numa_modes(void); void list_cpus(FILE *f, fprintf_function cpu_fprintf, const char *optarg); #endif diff --git a/include/sysemu/hostmem.h b/include/sysemu/hostmem.h new file mode 100644 index 0000000000..1ce439415d --- /dev/null +++ b/include/sysemu/hostmem.h @@ -0,0 +1,68 @@ +/* + * QEMU Host Memory Backend + * + * Copyright (C) 2013-2014 Red Hat Inc + * + * Authors: + * Igor Mammedov <imammedo@redhat.com> + * + * This work is licensed under the terms of the GNU GPL, version 2 or later. + * See the COPYING file in the top-level directory. + */ +#ifndef QEMU_RAM_H +#define QEMU_RAM_H + +#include "sysemu/sysemu.h" /* for MAX_NODES */ +#include "qom/object.h" +#include "qapi/error.h" +#include "exec/memory.h" +#include "qemu/option.h" +#include "qemu/bitmap.h" + +#define TYPE_MEMORY_BACKEND "memory-backend" +#define MEMORY_BACKEND(obj) \ + OBJECT_CHECK(HostMemoryBackend, (obj), TYPE_MEMORY_BACKEND) +#define MEMORY_BACKEND_GET_CLASS(obj) \ + OBJECT_GET_CLASS(HostMemoryBackendClass, (obj), TYPE_MEMORY_BACKEND) +#define MEMORY_BACKEND_CLASS(klass) \ + OBJECT_CLASS_CHECK(HostMemoryBackendClass, (klass), TYPE_MEMORY_BACKEND) + +typedef struct HostMemoryBackend HostMemoryBackend; +typedef struct HostMemoryBackendClass HostMemoryBackendClass; + +/** + * HostMemoryBackendClass: + * @parent_class: opaque parent class container + */ +struct HostMemoryBackendClass { + ObjectClass parent_class; + + void (*alloc)(HostMemoryBackend *backend, Error **errp); +}; + +/** + * @HostMemoryBackend + * + * @parent: opaque parent object container + * @size: amount of memory backend provides + * @id: unique identification string in memdev namespace + * @mr: MemoryRegion representing host memory belonging to backend + */ +struct HostMemoryBackend { + /* private */ + Object parent; + + /* protected */ + uint64_t size; + bool merge, dump; + bool prealloc, force_prealloc; + DECLARE_BITMAP(host_nodes, MAX_NODES + 1); + HostMemPolicy policy; + + MemoryRegion mr; +}; + +MemoryRegion *host_memory_backend_get_memory(HostMemoryBackend *backend, + Error **errp); + +#endif diff --git a/include/sysemu/kvm.h b/include/sysemu/kvm.h index de4bdaa40e..174ea36afa 100644 --- a/include/sysemu/kvm.h +++ b/include/sysemu/kvm.h @@ -43,6 +43,7 @@ extern bool kvm_allowed; extern bool kvm_kernel_irqchip; extern bool kvm_async_interrupts_allowed; extern bool kvm_halt_in_kernel_allowed; +extern bool kvm_eventfds_allowed; extern bool kvm_irqfds_allowed; extern bool kvm_msi_via_irqfd_allowed; extern bool kvm_gsi_routing_allowed; @@ -83,6 +84,15 @@ extern bool kvm_readonly_mem_allowed; #define kvm_halt_in_kernel() (kvm_halt_in_kernel_allowed) /** + * kvm_eventfds_enabled: + * + * Returns: true if we can use eventfds to receive notifications + * from a KVM CPU (ie the kernel supports eventds and we are running + * with a configuration where it is meaningful to use them). + */ +#define kvm_eventfds_enabled() (kvm_eventfds_allowed) + +/** * kvm_irqfds_enabled: * * Returns: true if we can use irqfds to inject interrupts into @@ -128,6 +138,7 @@ extern bool kvm_readonly_mem_allowed; #define kvm_irqchip_in_kernel() (false) #define kvm_async_interrupts_enabled() (false) #define kvm_halt_in_kernel() (false) +#define kvm_eventfds_enabled() (false) #define kvm_irqfds_enabled() (false) #define kvm_msi_via_irqfd_enabled() (false) #define kvm_gsi_routing_allowed() (false) diff --git a/include/sysemu/os-win32.h b/include/sysemu/os-win32.h index bf8523ada1..af3fbc47d8 100644 --- a/include/sysemu/os-win32.h +++ b/include/sysemu/os-win32.h @@ -89,6 +89,8 @@ static inline void os_setup_post(void) {} void os_set_line_buffering(void); static inline void os_set_proc_name(const char *dummy) {} +size_t getpagesize(void); + #if !defined(EPROTONOSUPPORT) # define EPROTONOSUPPORT EINVAL #endif diff --git a/include/sysemu/sysemu.h b/include/sysemu/sysemu.h index ba5c7f8093..277230db49 100644 --- a/include/sysemu/sysemu.h +++ b/include/sysemu/sysemu.h @@ -9,6 +9,8 @@ #include "qapi-types.h" #include "qemu/notify.h" #include "qemu/main-loop.h" +#include "qemu/bitmap.h" +#include "qom/object.h" /* vl.c */ @@ -131,8 +133,10 @@ extern uint8_t *boot_splash_filedata; extern size_t boot_splash_filedata_size; extern uint8_t qemu_extra_params_fw[2]; extern QEMUClockType rtc_clock; +extern const char *mem_path; +extern int mem_prealloc; -#define MAX_NODES 64 +#define MAX_NODES 128 /* The following shall be true for all CPUs: * cpu->cpu_index < max_cpus <= MAX_CPUMASK_BITS @@ -142,8 +146,16 @@ extern QEMUClockType rtc_clock; #define MAX_CPUMASK_BITS 255 extern int nb_numa_nodes; -extern uint64_t node_mem[MAX_NODES]; -extern unsigned long *node_cpumask[MAX_NODES]; +typedef struct node_info { + uint64_t node_mem; + DECLARE_BITMAP(node_cpu, MAX_CPUMASK_BITS); + struct HostMemoryBackend *node_memdev; +} NodeInfo; +extern NodeInfo numa_info[MAX_NODES]; +void set_numa_nodes(void); +void set_numa_modes(void); +extern QemuOptsList qemu_numa_opts; +int numa_init_func(QemuOpts *opts, void *opaque); #define MAX_OPTION_ROMS 16 typedef struct QEMUOptionRom { @@ -114,6 +114,7 @@ KVMState *kvm_state; bool kvm_kernel_irqchip; bool kvm_async_interrupts_allowed; bool kvm_halt_in_kernel_allowed; +bool kvm_eventfds_allowed; bool kvm_irqfds_allowed; bool kvm_msi_via_irqfd_allowed; bool kvm_gsi_routing_allowed; @@ -1544,6 +1545,9 @@ int kvm_init(MachineClass *mc) (kvm_check_extension(s, KVM_CAP_READONLY_MEM) > 0); #endif + kvm_eventfds_allowed = + (kvm_check_extension(s, KVM_CAP_IOEVENTFD) > 0); + ret = kvm_arch_init(s); if (ret < 0) { goto err; @@ -1759,6 +1763,22 @@ int kvm_cpu_exec(CPUState *cpu) case KVM_EXIT_INTERNAL_ERROR: ret = kvm_handle_internal_error(cpu, run); break; + case KVM_EXIT_SYSTEM_EVENT: + switch (run->system_event.type) { + case KVM_SYSTEM_EVENT_SHUTDOWN: + qemu_system_shutdown_request(); + ret = EXCP_INTERRUPT; + break; + case KVM_SYSTEM_EVENT_RESET: + qemu_system_reset_request(); + ret = EXCP_INTERRUPT; + break; + default: + DPRINTF("kvm_arch_handle_exit\n"); + ret = kvm_arch_handle_exit(cpu, run); + break; + } + break; default: DPRINTF("kvm_arch_handle_exit\n"); ret = kvm_arch_handle_exit(cpu, run); diff --git a/kvm-stub.c b/kvm-stub.c index ac33d8666d..8e7737caa9 100644 --- a/kvm-stub.c +++ b/kvm-stub.c @@ -22,6 +22,7 @@ KVMState *kvm_state; bool kvm_kernel_irqchip; bool kvm_async_interrupts_allowed; +bool kvm_eventfds_allowed; bool kvm_irqfds_allowed; bool kvm_msi_via_irqfd_allowed; bool kvm_gsi_routing_allowed; diff --git a/linux-user/elfload.c b/linux-user/elfload.c index c123244ecd..1248eda272 100644 --- a/linux-user/elfload.c +++ b/linux-user/elfload.c @@ -1292,7 +1292,6 @@ static bool elf_check_ehdr(struct elfhdr *ehdr) return (elf_check_arch(ehdr->e_machine) && ehdr->e_ehsize == sizeof(struct elfhdr) && ehdr->e_phentsize == sizeof(struct elf_phdr) - && ehdr->e_shentsize == sizeof(struct elf_shdr) && (ehdr->e_type == ET_EXEC || ehdr->e_type == ET_DYN)); } diff --git a/linux-user/mips/target_cpu.h b/linux-user/mips/target_cpu.h index ba8e9eb1f9..19b8855000 100644 --- a/linux-user/mips/target_cpu.h +++ b/linux-user/mips/target_cpu.h @@ -30,7 +30,7 @@ static inline void cpu_clone_regs(CPUMIPSState *env, target_ulong newsp) static inline void cpu_set_tls(CPUMIPSState *env, target_ulong newtls) { - env->tls_value = newtls; + env->active_tc.CP0_UserLocal = newtls; } #endif diff --git a/linux-user/qemu.h b/linux-user/qemu.h index ba3d8ab378..8012cc2f5b 100644 --- a/linux-user/qemu.h +++ b/linux-user/qemu.h @@ -299,7 +299,7 @@ static inline int access_ok(int type, abi_ulong addr, abi_ulong size) __builtin_choose_expr(sizeof(*(hptr)) == 2, stw_##e##_p, \ __builtin_choose_expr(sizeof(*(hptr)) == 4, stl_##e##_p, \ __builtin_choose_expr(sizeof(*(hptr)) == 8, stq_##e##_p, abort)))) \ - ((hptr), (x)), 0) + ((hptr), (x)), (void)0) #define __get_user_e(x, hptr, e) \ ((x) = (typeof(*hptr))( \ @@ -307,7 +307,7 @@ static inline int access_ok(int type, abi_ulong addr, abi_ulong size) __builtin_choose_expr(sizeof(*(hptr)) == 2, lduw_##e##_p, \ __builtin_choose_expr(sizeof(*(hptr)) == 4, ldl_##e##_p, \ __builtin_choose_expr(sizeof(*(hptr)) == 8, ldq_##e##_p, abort)))) \ - (hptr)), 0) + (hptr)), (void)0) #ifdef TARGET_WORDS_BIGENDIAN # define __put_user(x, hptr) __put_user_e(x, hptr, be) @@ -326,9 +326,9 @@ static inline int access_ok(int type, abi_ulong addr, abi_ulong size) ({ \ abi_ulong __gaddr = (gaddr); \ target_type *__hptr; \ - abi_long __ret; \ + abi_long __ret = 0; \ if ((__hptr = lock_user(VERIFY_WRITE, __gaddr, sizeof(target_type), 0))) { \ - __ret = __put_user((x), __hptr); \ + __put_user((x), __hptr); \ unlock_user(__hptr, __gaddr, sizeof(target_type)); \ } else \ __ret = -TARGET_EFAULT; \ @@ -339,9 +339,9 @@ static inline int access_ok(int type, abi_ulong addr, abi_ulong size) ({ \ abi_ulong __gaddr = (gaddr); \ target_type *__hptr; \ - abi_long __ret; \ + abi_long __ret = 0; \ if ((__hptr = lock_user(VERIFY_READ, __gaddr, sizeof(target_type), 1))) { \ - __ret = __get_user((x), __hptr); \ + __get_user((x), __hptr); \ unlock_user(__hptr, __gaddr, 0); \ } else { \ /* avoid warning */ \ diff --git a/linux-user/signal.c b/linux-user/signal.c index 5b8a01f998..f3b43787fd 100644 --- a/linux-user/signal.c +++ b/linux-user/signal.c @@ -619,11 +619,12 @@ abi_long do_sigaltstack(abi_ulong uss_addr, abi_ulong uoss_addr, abi_ulong sp) struct target_sigaltstack ss; ret = -TARGET_EFAULT; - if (!lock_user_struct(VERIFY_READ, uss, uss_addr, 1) - || __get_user(ss.ss_sp, &uss->ss_sp) - || __get_user(ss.ss_size, &uss->ss_size) - || __get_user(ss.ss_flags, &uss->ss_flags)) + if (!lock_user_struct(VERIFY_READ, uss, uss_addr, 1)) { goto out; + } + __get_user(ss.ss_sp, &uss->ss_sp); + __get_user(ss.ss_size, &uss->ss_size); + __get_user(ss.ss_flags, &uss->ss_flags); unlock_user_struct(uss, uss_addr, 0); ret = -TARGET_EPERM; @@ -721,11 +722,10 @@ int do_sigaction(int sig, const struct target_sigaction *act, return ret; } -static inline int copy_siginfo_to_user(target_siginfo_t *tinfo, +static inline void copy_siginfo_to_user(target_siginfo_t *tinfo, const target_siginfo_t *info) { tswap_siginfo(tinfo, info); - return 0; } static inline int current_exec_domain_sig(int sig) @@ -837,45 +837,43 @@ struct rt_sigframe */ /* XXX: save x87 state */ -static int -setup_sigcontext(struct target_sigcontext *sc, struct target_fpstate *fpstate, - CPUX86State *env, abi_ulong mask, abi_ulong fpstate_addr) +static void setup_sigcontext(struct target_sigcontext *sc, + struct target_fpstate *fpstate, CPUX86State *env, abi_ulong mask, + abi_ulong fpstate_addr) { CPUState *cs = CPU(x86_env_get_cpu(env)); - int err = 0; uint16_t magic; /* already locked in setup_frame() */ - err |= __put_user(env->segs[R_GS].selector, (unsigned int *)&sc->gs); - err |= __put_user(env->segs[R_FS].selector, (unsigned int *)&sc->fs); - err |= __put_user(env->segs[R_ES].selector, (unsigned int *)&sc->es); - err |= __put_user(env->segs[R_DS].selector, (unsigned int *)&sc->ds); - err |= __put_user(env->regs[R_EDI], &sc->edi); - err |= __put_user(env->regs[R_ESI], &sc->esi); - err |= __put_user(env->regs[R_EBP], &sc->ebp); - err |= __put_user(env->regs[R_ESP], &sc->esp); - err |= __put_user(env->regs[R_EBX], &sc->ebx); - err |= __put_user(env->regs[R_EDX], &sc->edx); - err |= __put_user(env->regs[R_ECX], &sc->ecx); - err |= __put_user(env->regs[R_EAX], &sc->eax); - err |= __put_user(cs->exception_index, &sc->trapno); - err |= __put_user(env->error_code, &sc->err); - err |= __put_user(env->eip, &sc->eip); - err |= __put_user(env->segs[R_CS].selector, (unsigned int *)&sc->cs); - err |= __put_user(env->eflags, &sc->eflags); - err |= __put_user(env->regs[R_ESP], &sc->esp_at_signal); - err |= __put_user(env->segs[R_SS].selector, (unsigned int *)&sc->ss); + __put_user(env->segs[R_GS].selector, (unsigned int *)&sc->gs); + __put_user(env->segs[R_FS].selector, (unsigned int *)&sc->fs); + __put_user(env->segs[R_ES].selector, (unsigned int *)&sc->es); + __put_user(env->segs[R_DS].selector, (unsigned int *)&sc->ds); + __put_user(env->regs[R_EDI], &sc->edi); + __put_user(env->regs[R_ESI], &sc->esi); + __put_user(env->regs[R_EBP], &sc->ebp); + __put_user(env->regs[R_ESP], &sc->esp); + __put_user(env->regs[R_EBX], &sc->ebx); + __put_user(env->regs[R_EDX], &sc->edx); + __put_user(env->regs[R_ECX], &sc->ecx); + __put_user(env->regs[R_EAX], &sc->eax); + __put_user(cs->exception_index, &sc->trapno); + __put_user(env->error_code, &sc->err); + __put_user(env->eip, &sc->eip); + __put_user(env->segs[R_CS].selector, (unsigned int *)&sc->cs); + __put_user(env->eflags, &sc->eflags); + __put_user(env->regs[R_ESP], &sc->esp_at_signal); + __put_user(env->segs[R_SS].selector, (unsigned int *)&sc->ss); cpu_x86_fsave(env, fpstate_addr, 1); fpstate->status = fpstate->sw; magic = 0xffff; - err |= __put_user(magic, &fpstate->magic); - err |= __put_user(fpstate_addr, &sc->fpstate); + __put_user(magic, &fpstate->magic); + __put_user(fpstate_addr, &sc->fpstate); /* non-iBCS2 extensions.. */ - err |= __put_user(mask, &sc->oldmask); - err |= __put_user(env->cr[2], &sc->cr2); - return err; + __put_user(mask, &sc->oldmask); + __put_user(env->cr[2], &sc->cr2); } /* @@ -911,47 +909,40 @@ static void setup_frame(int sig, struct target_sigaction *ka, { abi_ulong frame_addr; struct sigframe *frame; - int i, err = 0; + int i; frame_addr = get_sigframe(ka, env, sizeof(*frame)); if (!lock_user_struct(VERIFY_WRITE, frame, frame_addr, 0)) goto give_sigsegv; - err |= __put_user(current_exec_domain_sig(sig), - &frame->sig); - if (err) - goto give_sigsegv; + __put_user(current_exec_domain_sig(sig), + &frame->sig); setup_sigcontext(&frame->sc, &frame->fpstate, env, set->sig[0], frame_addr + offsetof(struct sigframe, fpstate)); - if (err) - goto give_sigsegv; - for(i = 1; i < TARGET_NSIG_WORDS; i++) { - if (__put_user(set->sig[i], &frame->extramask[i - 1])) - goto give_sigsegv; - } + for(i = 1; i < TARGET_NSIG_WORDS; i++) { + __put_user(set->sig[i], &frame->extramask[i - 1]); + } /* Set up to return from userspace. If provided, use a stub already in userspace. */ if (ka->sa_flags & TARGET_SA_RESTORER) { - err |= __put_user(ka->sa_restorer, &frame->pretcode); + __put_user(ka->sa_restorer, &frame->pretcode); } else { uint16_t val16; abi_ulong retcode_addr; retcode_addr = frame_addr + offsetof(struct sigframe, retcode); - err |= __put_user(retcode_addr, &frame->pretcode); + __put_user(retcode_addr, &frame->pretcode); /* This is popl %eax ; movl $,%eax ; int $0x80 */ val16 = 0xb858; - err |= __put_user(val16, (uint16_t *)(frame->retcode+0)); - err |= __put_user(TARGET_NR_sigreturn, (int *)(frame->retcode+2)); + __put_user(val16, (uint16_t *)(frame->retcode+0)); + __put_user(TARGET_NR_sigreturn, (int *)(frame->retcode+2)); val16 = 0x80cd; - err |= __put_user(val16, (uint16_t *)(frame->retcode+6)); + __put_user(val16, (uint16_t *)(frame->retcode+6)); } - if (err) - goto give_sigsegv; /* Set up registers for signal handler */ env->regs[R_ESP] = frame_addr; @@ -968,7 +959,6 @@ static void setup_frame(int sig, struct target_sigaction *ka, return; give_sigsegv: - unlock_user_struct(frame, frame_addr, 1); if (sig == TARGET_SIGSEGV) ka->_sa_handler = TARGET_SIG_DFL; force_sig(TARGET_SIGSEGV /* , current */); @@ -981,58 +971,50 @@ static void setup_rt_frame(int sig, struct target_sigaction *ka, { abi_ulong frame_addr, addr; struct rt_sigframe *frame; - int i, err = 0; + int i; frame_addr = get_sigframe(ka, env, sizeof(*frame)); if (!lock_user_struct(VERIFY_WRITE, frame, frame_addr, 0)) goto give_sigsegv; - err |= __put_user(current_exec_domain_sig(sig), - &frame->sig); + __put_user(current_exec_domain_sig(sig), &frame->sig); addr = frame_addr + offsetof(struct rt_sigframe, info); - err |= __put_user(addr, &frame->pinfo); + __put_user(addr, &frame->pinfo); addr = frame_addr + offsetof(struct rt_sigframe, uc); - err |= __put_user(addr, &frame->puc); - err |= copy_siginfo_to_user(&frame->info, info); - if (err) - goto give_sigsegv; + __put_user(addr, &frame->puc); + copy_siginfo_to_user(&frame->info, info); /* Create the ucontext. */ - err |= __put_user(0, &frame->uc.tuc_flags); - err |= __put_user(0, &frame->uc.tuc_link); - err |= __put_user(target_sigaltstack_used.ss_sp, - &frame->uc.tuc_stack.ss_sp); - err |= __put_user(sas_ss_flags(get_sp_from_cpustate(env)), - &frame->uc.tuc_stack.ss_flags); - err |= __put_user(target_sigaltstack_used.ss_size, - &frame->uc.tuc_stack.ss_size); - err |= setup_sigcontext(&frame->uc.tuc_mcontext, &frame->fpstate, - env, set->sig[0], - frame_addr + offsetof(struct rt_sigframe, fpstate)); - for(i = 0; i < TARGET_NSIG_WORDS; i++) { - if (__put_user(set->sig[i], &frame->uc.tuc_sigmask.sig[i])) - goto give_sigsegv; - } + __put_user(0, &frame->uc.tuc_flags); + __put_user(0, &frame->uc.tuc_link); + __put_user(target_sigaltstack_used.ss_sp, &frame->uc.tuc_stack.ss_sp); + __put_user(sas_ss_flags(get_sp_from_cpustate(env)), + &frame->uc.tuc_stack.ss_flags); + __put_user(target_sigaltstack_used.ss_size, + &frame->uc.tuc_stack.ss_size); + setup_sigcontext(&frame->uc.tuc_mcontext, &frame->fpstate, env, + set->sig[0], frame_addr + offsetof(struct rt_sigframe, fpstate)); + + for(i = 0; i < TARGET_NSIG_WORDS; i++) { + __put_user(set->sig[i], &frame->uc.tuc_sigmask.sig[i]); + } /* Set up to return from userspace. If provided, use a stub already in userspace. */ if (ka->sa_flags & TARGET_SA_RESTORER) { - err |= __put_user(ka->sa_restorer, &frame->pretcode); + __put_user(ka->sa_restorer, &frame->pretcode); } else { uint16_t val16; addr = frame_addr + offsetof(struct rt_sigframe, retcode); - err |= __put_user(addr, &frame->pretcode); + __put_user(addr, &frame->pretcode); /* This is movl $,%eax ; int $0x80 */ - err |= __put_user(0xb8, (char *)(frame->retcode+0)); - err |= __put_user(TARGET_NR_rt_sigreturn, (int *)(frame->retcode+1)); + __put_user(0xb8, (char *)(frame->retcode+0)); + __put_user(TARGET_NR_rt_sigreturn, (int *)(frame->retcode+1)); val16 = 0x80cd; - err |= __put_user(val16, (uint16_t *)(frame->retcode+5)); + __put_user(val16, (uint16_t *)(frame->retcode+5)); } - if (err) - goto give_sigsegv; - /* Set up registers for signal handler */ env->regs[R_ESP] = frame_addr; env->eip = ka->_sa_handler; @@ -1048,7 +1030,6 @@ static void setup_rt_frame(int sig, struct target_sigaction *ka, return; give_sigsegv: - unlock_user_struct(frame, frame_addr, 1); if (sig == TARGET_SIGSEGV) ka->_sa_handler = TARGET_SIG_DFL; force_sig(TARGET_SIGSEGV /* , current */); @@ -1110,11 +1091,9 @@ long do_sigreturn(CPUX86State *env) if (!lock_user_struct(VERIFY_READ, frame, frame_addr, 1)) goto badframe; /* set blocked signals */ - if (__get_user(target_set.sig[0], &frame->sc.oldmask)) - goto badframe; + __get_user(target_set.sig[0], &frame->sc.oldmask); for(i = 1; i < TARGET_NSIG_WORDS; i++) { - if (__get_user(target_set.sig[i], &frame->extramask[i - 1])) - goto badframe; + __get_user(target_set.sig[i], &frame->extramask[i - 1]); } target_to_host_sigset_internal(&set, &target_set); @@ -1371,9 +1350,7 @@ static void target_setup_frame(int usig, struct target_sigaction *ka, env->pc = ka->_sa_handler; env->xregs[30] = return_addr; if (info) { - if (copy_siginfo_to_user(&frame->info, info)) { - goto give_sigsegv; - } + copy_siginfo_to_user(&frame->info, info); env->xregs[1] = frame_addr + offsetof(struct target_rt_sigframe, info); env->xregs[2] = frame_addr + offsetof(struct target_rt_sigframe, uc); } @@ -1564,8 +1541,6 @@ static const abi_ulong retcodes[4] = { }; -#define __get_user_error(x,p,e) __get_user(x, p) - static inline int valid_user_regs(CPUARMState *regs) { return 1; @@ -1617,7 +1592,7 @@ get_sigframe(struct target_sigaction *ka, CPUARMState *regs, int framesize) return (sp - framesize) & ~7; } -static int +static void setup_return(CPUARMState *env, struct target_sigaction *ka, abi_ulong *rc, abi_ulong frame_addr, int usig, abi_ulong rc_addr) { @@ -1641,8 +1616,7 @@ setup_return(CPUARMState *env, struct target_sigaction *ka, if (ka->sa_flags & TARGET_SA_SIGINFO) idx += 2; - if (__put_user(retcodes[idx], rc)) - return 1; + __put_user(retcodes[idx], rc); retcode = rc_addr + thumb; } @@ -1652,8 +1626,6 @@ setup_return(CPUARMState *env, struct target_sigaction *ka, env->regs[14] = retcode; env->regs[15] = handler & (thumb ? ~1 : ~3); cpsr_write(env, cpsr, 0xffffffff); - - return 0; } static abi_ulong *setup_sigframe_v2_vfp(abi_ulong *regspace, CPUARMState *env) @@ -1740,15 +1712,13 @@ static void setup_frame_v1(int usig, struct target_sigaction *ka, setup_sigcontext(&frame->sc, regs, set->sig[0]); - for(i = 1; i < TARGET_NSIG_WORDS; i++) { - if (__put_user(set->sig[i], &frame->extramask[i - 1])) - goto end; - } + for(i = 1; i < TARGET_NSIG_WORDS; i++) { + __put_user(set->sig[i], &frame->extramask[i - 1]); + } setup_return(regs, ka, &frame->retcode, frame_addr, usig, frame_addr + offsetof(struct sigframe_v1, retcode)); -end: unlock_user_struct(frame, frame_addr, 1); } @@ -1810,8 +1780,7 @@ static void setup_rt_frame_v1(int usig, struct target_sigaction *ka, setup_sigcontext(&frame->uc.tuc_mcontext, env, set->sig[0]); for(i = 0; i < TARGET_NSIG_WORDS; i++) { - if (__put_user(set->sig[i], &frame->uc.tuc_sigmask.sig[i])) - goto end; + __put_user(set->sig[i], &frame->uc.tuc_sigmask.sig[i]); } setup_return(env, ka, &frame->retcode, frame_addr, usig, @@ -1820,7 +1789,6 @@ static void setup_rt_frame_v1(int usig, struct target_sigaction *ka, env->regs[1] = info_addr; env->regs[2] = uc_addr; -end: unlock_user_struct(frame, frame_addr, 1); } @@ -1867,24 +1835,24 @@ restore_sigcontext(CPUARMState *env, struct target_sigcontext *sc) int err = 0; uint32_t cpsr; - __get_user_error(env->regs[0], &sc->arm_r0, err); - __get_user_error(env->regs[1], &sc->arm_r1, err); - __get_user_error(env->regs[2], &sc->arm_r2, err); - __get_user_error(env->regs[3], &sc->arm_r3, err); - __get_user_error(env->regs[4], &sc->arm_r4, err); - __get_user_error(env->regs[5], &sc->arm_r5, err); - __get_user_error(env->regs[6], &sc->arm_r6, err); - __get_user_error(env->regs[7], &sc->arm_r7, err); - __get_user_error(env->regs[8], &sc->arm_r8, err); - __get_user_error(env->regs[9], &sc->arm_r9, err); - __get_user_error(env->regs[10], &sc->arm_r10, err); - __get_user_error(env->regs[11], &sc->arm_fp, err); - __get_user_error(env->regs[12], &sc->arm_ip, err); - __get_user_error(env->regs[13], &sc->arm_sp, err); - __get_user_error(env->regs[14], &sc->arm_lr, err); - __get_user_error(env->regs[15], &sc->arm_pc, err); + __get_user(env->regs[0], &sc->arm_r0); + __get_user(env->regs[1], &sc->arm_r1); + __get_user(env->regs[2], &sc->arm_r2); + __get_user(env->regs[3], &sc->arm_r3); + __get_user(env->regs[4], &sc->arm_r4); + __get_user(env->regs[5], &sc->arm_r5); + __get_user(env->regs[6], &sc->arm_r6); + __get_user(env->regs[7], &sc->arm_r7); + __get_user(env->regs[8], &sc->arm_r8); + __get_user(env->regs[9], &sc->arm_r9); + __get_user(env->regs[10], &sc->arm_r10); + __get_user(env->regs[11], &sc->arm_fp); + __get_user(env->regs[12], &sc->arm_ip); + __get_user(env->regs[13], &sc->arm_sp); + __get_user(env->regs[14], &sc->arm_lr); + __get_user(env->regs[15], &sc->arm_pc); #ifdef TARGET_CONFIG_CPU_32 - __get_user_error(cpsr, &sc->arm_cpsr, err); + __get_user(cpsr, &sc->arm_cpsr); cpsr_write(env, cpsr, CPSR_USER | CPSR_EXEC); #endif @@ -1914,12 +1882,10 @@ static long do_sigreturn_v1(CPUARMState *env) if (!lock_user_struct(VERIFY_READ, frame, frame_addr, 1)) goto badframe; - if (__get_user(set.sig[0], &frame->sc.oldmask)) - goto badframe; - for(i = 1; i < TARGET_NSIG_WORDS; i++) { - if (__get_user(set.sig[i], &frame->extramask[i - 1])) - goto badframe; - } + __get_user(set.sig[0], &frame->sc.oldmask); + for(i = 1; i < TARGET_NSIG_WORDS; i++) { + __get_user(set.sig[i], &frame->extramask[i - 1]); + } target_to_host_sigset_internal(&host_set, &set); do_sigprocmask(SIG_SETMASK, &host_set, NULL); @@ -1936,7 +1902,6 @@ static long do_sigreturn_v1(CPUARMState *env) return env->regs[0]; badframe: - unlock_user_struct(frame, frame_addr, 0); force_sig(TARGET_SIGSEGV /* , current */); return 0; } @@ -2276,17 +2241,17 @@ setup___siginfo(__siginfo_t *si, CPUSPARCState *env, abi_ulong mask) { int err = 0, i; - err |= __put_user(env->psr, &si->si_regs.psr); - err |= __put_user(env->pc, &si->si_regs.pc); - err |= __put_user(env->npc, &si->si_regs.npc); - err |= __put_user(env->y, &si->si_regs.y); + __put_user(env->psr, &si->si_regs.psr); + __put_user(env->pc, &si->si_regs.pc); + __put_user(env->npc, &si->si_regs.npc); + __put_user(env->y, &si->si_regs.y); for (i=0; i < 8; i++) { - err |= __put_user(env->gregs[i], &si->si_regs.u_regs[i]); + __put_user(env->gregs[i], &si->si_regs.u_regs[i]); } for (i=0; i < 8; i++) { - err |= __put_user(env->regwptr[UREG_I0 + i], &si->si_regs.u_regs[i+8]); + __put_user(env->regwptr[UREG_I0 + i], &si->si_regs.u_regs[i+8]); } - err |= __put_user(mask, &si->si_mask); + __put_user(mask, &si->si_mask); return err; } @@ -2297,13 +2262,13 @@ setup_sigcontext(struct target_sigcontext *sc, /*struct _fpstate *fpstate,*/ { int err = 0; - err |= __put_user(mask, &sc->sigc_mask); - err |= __put_user(env->regwptr[UREG_SP], &sc->sigc_sp); - err |= __put_user(env->pc, &sc->sigc_pc); - err |= __put_user(env->npc, &sc->sigc_npc); - err |= __put_user(env->psr, &sc->sigc_psr); - err |= __put_user(env->gregs[1], &sc->sigc_g1); - err |= __put_user(env->regwptr[UREG_O0], &sc->sigc_o0); + __put_user(mask, &sc->sigc_mask); + __put_user(env->regwptr[UREG_SP], &sc->sigc_sp); + __put_user(env->pc, &sc->sigc_pc); + __put_user(env->npc, &sc->sigc_npc); + __put_user(env->psr, &sc->sigc_psr); + __put_user(env->gregs[1], &sc->sigc_g1); + __put_user(env->regwptr[UREG_O0], &sc->sigc_o0); return err; } @@ -2335,21 +2300,21 @@ static void setup_frame(int sig, struct target_sigaction *ka, #endif /* 2. Save the current process state */ err = setup___siginfo(&sf->info, env, set->sig[0]); - err |= __put_user(0, &sf->extra_size); + __put_user(0, &sf->extra_size); - //err |= save_fpu_state(regs, &sf->fpu_state); - //err |= __put_user(&sf->fpu_state, &sf->fpu_save); + //save_fpu_state(regs, &sf->fpu_state); + //__put_user(&sf->fpu_state, &sf->fpu_save); - err |= __put_user(set->sig[0], &sf->info.si_mask); + __put_user(set->sig[0], &sf->info.si_mask); for (i = 0; i < TARGET_NSIG_WORDS - 1; i++) { - err |= __put_user(set->sig[i + 1], &sf->extramask[i]); + __put_user(set->sig[i + 1], &sf->extramask[i]); } for (i = 0; i < 8; i++) { - err |= __put_user(env->regwptr[i + UREG_L0], &sf->ss.locals[i]); + __put_user(env->regwptr[i + UREG_L0], &sf->ss.locals[i]); } for (i = 0; i < 8; i++) { - err |= __put_user(env->regwptr[i + UREG_I0], &sf->ss.ins[i]); + __put_user(env->regwptr[i + UREG_I0], &sf->ss.ins[i]); } if (err) goto sigsegv; @@ -2376,11 +2341,11 @@ static void setup_frame(int sig, struct target_sigaction *ka, /* mov __NR_sigreturn, %g1 */ val32 = 0x821020d8; - err |= __put_user(val32, &sf->insns[0]); + __put_user(val32, &sf->insns[0]); /* t 0x10 */ val32 = 0x91d02010; - err |= __put_user(val32, &sf->insns[1]); + __put_user(val32, &sf->insns[1]); if (err) goto sigsegv; @@ -2399,43 +2364,6 @@ sigsegv: unlock_user(sf, sf_addr, sizeof(struct target_signal_frame)); force_sig(TARGET_SIGSEGV); } -static inline int -restore_fpu_state(CPUSPARCState *env, qemu_siginfo_fpu_t *fpu) -{ - int err; -#if 0 -#ifdef CONFIG_SMP - if (current->flags & PF_USEDFPU) - regs->psr &= ~PSR_EF; -#else - if (current == last_task_used_math) { - last_task_used_math = 0; - regs->psr &= ~PSR_EF; - } -#endif - current->used_math = 1; - current->flags &= ~PF_USEDFPU; -#endif -#if 0 - if (verify_area (VERIFY_READ, fpu, sizeof(*fpu))) - return -EFAULT; -#endif - - /* XXX: incorrect */ - err = copy_from_user(&env->fpr[0], fpu->si_float_regs[0], - (sizeof(abi_ulong) * 32)); - err |= __get_user(env->fsr, &fpu->si_fsr); -#if 0 - err |= __get_user(current->thread.fpqdepth, &fpu->si_fpqdepth); - if (current->thread.fpqdepth != 0) - err |= __copy_from_user(¤t->thread.fpqueue[0], - &fpu->si_fpqueue[0], - ((sizeof(unsigned long) + - (sizeof(unsigned long *)))*16)); -#endif - return err; -} - static void setup_rt_frame(int sig, struct target_sigaction *ka, target_siginfo_t *info, @@ -2451,7 +2379,7 @@ long do_sigreturn(CPUSPARCState *env) uint32_t up_psr, pc, npc; target_sigset_t set; sigset_t host_set; - int err, i; + int err=0, i; sf_addr = env->regwptr[UREG_FP]; if (!lock_user_struct(VERIFY_READ, sf, sf_addr, 1)) @@ -2467,14 +2395,14 @@ long do_sigreturn(CPUSPARCState *env) if (sf_addr & 3) goto segv_and_exit; - err = __get_user(pc, &sf->info.si_regs.pc); - err |= __get_user(npc, &sf->info.si_regs.npc); + __get_user(pc, &sf->info.si_regs.pc); + __get_user(npc, &sf->info.si_regs.npc); if ((pc | npc) & 3) goto segv_and_exit; /* 2. Restore the state */ - err |= __get_user(up_psr, &sf->info.si_regs.psr); + __get_user(up_psr, &sf->info.si_regs.psr); /* User can only change condition codes and FPU enabling in %psr. */ env->psr = (up_psr & (PSR_ICC /* | PSR_EF */)) @@ -2482,12 +2410,12 @@ long do_sigreturn(CPUSPARCState *env) env->pc = pc; env->npc = npc; - err |= __get_user(env->y, &sf->info.si_regs.y); + __get_user(env->y, &sf->info.si_regs.y); for (i=0; i < 8; i++) { - err |= __get_user(env->gregs[i], &sf->info.si_regs.u_regs[i]); + __get_user(env->gregs[i], &sf->info.si_regs.u_regs[i]); } for (i=0; i < 8; i++) { - err |= __get_user(env->regwptr[i + UREG_I0], &sf->info.si_regs.u_regs[i+8]); + __get_user(env->regwptr[i + UREG_I0], &sf->info.si_regs.u_regs[i+8]); } /* FIXME: implement FPU save/restore: @@ -2499,9 +2427,9 @@ long do_sigreturn(CPUSPARCState *env) /* This is pretty much atomic, no amount locking would prevent * the races which exist anyways. */ - err |= __get_user(set.sig[0], &sf->info.si_mask); + __get_user(set.sig[0], &sf->info.si_mask); for(i = 1; i < TARGET_NSIG_WORDS; i++) { - err |= (__get_user(set.sig[i], &sf->extramask[i - 1])); + __get_user(set.sig[i], &sf->extramask[i - 1]); } target_to_host_sigset_internal(&host_set, &set); @@ -2599,62 +2527,58 @@ void sparc64_set_context(CPUSPARCState *env) target_mc_gregset_t *grp; abi_ulong pc, npc, tstate; abi_ulong fp, i7, w_addr; - int err; unsigned int i; ucp_addr = env->regwptr[UREG_I0]; if (!lock_user_struct(VERIFY_READ, ucp, ucp_addr, 1)) goto do_sigsegv; grp = &ucp->tuc_mcontext.mc_gregs; - err = __get_user(pc, &((*grp)[MC_PC])); - err |= __get_user(npc, &((*grp)[MC_NPC])); - if (err || ((pc | npc) & 3)) + __get_user(pc, &((*grp)[MC_PC])); + __get_user(npc, &((*grp)[MC_NPC])); + if ((pc | npc) & 3) goto do_sigsegv; if (env->regwptr[UREG_I1]) { target_sigset_t target_set; sigset_t set; if (TARGET_NSIG_WORDS == 1) { - if (__get_user(target_set.sig[0], &ucp->tuc_sigmask.sig[0])) - goto do_sigsegv; + __get_user(target_set.sig[0], &ucp->tuc_sigmask.sig[0]); } else { abi_ulong *src, *dst; src = ucp->tuc_sigmask.sig; dst = target_set.sig; for (i = 0; i < TARGET_NSIG_WORDS; i++, dst++, src++) { - err |= __get_user(*dst, src); + __get_user(*dst, src); } - if (err) - goto do_sigsegv; } target_to_host_sigset_internal(&set, &target_set); do_sigprocmask(SIG_SETMASK, &set, NULL); } env->pc = pc; env->npc = npc; - err |= __get_user(env->y, &((*grp)[MC_Y])); - err |= __get_user(tstate, &((*grp)[MC_TSTATE])); + __get_user(env->y, &((*grp)[MC_Y])); + __get_user(tstate, &((*grp)[MC_TSTATE])); env->asi = (tstate >> 24) & 0xff; cpu_put_ccr(env, tstate >> 32); cpu_put_cwp64(env, tstate & 0x1f); - err |= __get_user(env->gregs[1], (&(*grp)[MC_G1])); - err |= __get_user(env->gregs[2], (&(*grp)[MC_G2])); - err |= __get_user(env->gregs[3], (&(*grp)[MC_G3])); - err |= __get_user(env->gregs[4], (&(*grp)[MC_G4])); - err |= __get_user(env->gregs[5], (&(*grp)[MC_G5])); - err |= __get_user(env->gregs[6], (&(*grp)[MC_G6])); - err |= __get_user(env->gregs[7], (&(*grp)[MC_G7])); - err |= __get_user(env->regwptr[UREG_I0], (&(*grp)[MC_O0])); - err |= __get_user(env->regwptr[UREG_I1], (&(*grp)[MC_O1])); - err |= __get_user(env->regwptr[UREG_I2], (&(*grp)[MC_O2])); - err |= __get_user(env->regwptr[UREG_I3], (&(*grp)[MC_O3])); - err |= __get_user(env->regwptr[UREG_I4], (&(*grp)[MC_O4])); - err |= __get_user(env->regwptr[UREG_I5], (&(*grp)[MC_O5])); - err |= __get_user(env->regwptr[UREG_I6], (&(*grp)[MC_O6])); - err |= __get_user(env->regwptr[UREG_I7], (&(*grp)[MC_O7])); - - err |= __get_user(fp, &(ucp->tuc_mcontext.mc_fp)); - err |= __get_user(i7, &(ucp->tuc_mcontext.mc_i7)); + __get_user(env->gregs[1], (&(*grp)[MC_G1])); + __get_user(env->gregs[2], (&(*grp)[MC_G2])); + __get_user(env->gregs[3], (&(*grp)[MC_G3])); + __get_user(env->gregs[4], (&(*grp)[MC_G4])); + __get_user(env->gregs[5], (&(*grp)[MC_G5])); + __get_user(env->gregs[6], (&(*grp)[MC_G6])); + __get_user(env->gregs[7], (&(*grp)[MC_G7])); + __get_user(env->regwptr[UREG_I0], (&(*grp)[MC_O0])); + __get_user(env->regwptr[UREG_I1], (&(*grp)[MC_O1])); + __get_user(env->regwptr[UREG_I2], (&(*grp)[MC_O2])); + __get_user(env->regwptr[UREG_I3], (&(*grp)[MC_O3])); + __get_user(env->regwptr[UREG_I4], (&(*grp)[MC_O4])); + __get_user(env->regwptr[UREG_I5], (&(*grp)[MC_O5])); + __get_user(env->regwptr[UREG_I6], (&(*grp)[MC_O6])); + __get_user(env->regwptr[UREG_I7], (&(*grp)[MC_O7])); + + __get_user(fp, &(ucp->tuc_mcontext.mc_fp)); + __get_user(i7, &(ucp->tuc_mcontext.mc_i7)); w_addr = TARGET_STACK_BIAS+env->regwptr[UREG_I6]; if (put_user(fp, w_addr + offsetof(struct target_reg_window, ins[6]), @@ -2668,23 +2592,21 @@ void sparc64_set_context(CPUSPARCState *env) * is only restored if fenab is non-zero in: * __get_user(fenab, &(ucp->tuc_mcontext.mc_fpregs.mcfpu_enab)); */ - err |= __get_user(env->fprs, &(ucp->tuc_mcontext.mc_fpregs.mcfpu_fprs)); + __get_user(env->fprs, &(ucp->tuc_mcontext.mc_fpregs.mcfpu_fprs)); { uint32_t *src = ucp->tuc_mcontext.mc_fpregs.mcfpu_fregs.sregs; for (i = 0; i < 64; i++, src++) { if (i & 1) { - err |= __get_user(env->fpr[i/2].l.lower, src); + __get_user(env->fpr[i/2].l.lower, src); } else { - err |= __get_user(env->fpr[i/2].l.upper, src); + __get_user(env->fpr[i/2].l.upper, src); } } } - err |= __get_user(env->fsr, - &(ucp->tuc_mcontext.mc_fpregs.mcfpu_fsr)); - err |= __get_user(env->gsr, - &(ucp->tuc_mcontext.mc_fpregs.mcfpu_gsr)); - if (err) - goto do_sigsegv; + __get_user(env->fsr, + &(ucp->tuc_mcontext.mc_fpregs.mcfpu_fsr)); + __get_user(env->gsr, + &(ucp->tuc_mcontext.mc_fpregs.mcfpu_gsr)); unlock_user_struct(ucp, ucp_addr, 0); return; do_sigsegv: @@ -2720,39 +2642,39 @@ void sparc64_get_context(CPUSPARCState *env) do_sigprocmask(0, NULL, &set); host_to_target_sigset_internal(&target_set, &set); if (TARGET_NSIG_WORDS == 1) { - err |= __put_user(target_set.sig[0], - (abi_ulong *)&ucp->tuc_sigmask); + __put_user(target_set.sig[0], + (abi_ulong *)&ucp->tuc_sigmask); } else { abi_ulong *src, *dst; src = target_set.sig; dst = ucp->tuc_sigmask.sig; for (i = 0; i < TARGET_NSIG_WORDS; i++, dst++, src++) { - err |= __put_user(*src, dst); + __put_user(*src, dst); } if (err) goto do_sigsegv; } /* XXX: tstate must be saved properly */ - // err |= __put_user(env->tstate, &((*grp)[MC_TSTATE])); - err |= __put_user(env->pc, &((*grp)[MC_PC])); - err |= __put_user(env->npc, &((*grp)[MC_NPC])); - err |= __put_user(env->y, &((*grp)[MC_Y])); - err |= __put_user(env->gregs[1], &((*grp)[MC_G1])); - err |= __put_user(env->gregs[2], &((*grp)[MC_G2])); - err |= __put_user(env->gregs[3], &((*grp)[MC_G3])); - err |= __put_user(env->gregs[4], &((*grp)[MC_G4])); - err |= __put_user(env->gregs[5], &((*grp)[MC_G5])); - err |= __put_user(env->gregs[6], &((*grp)[MC_G6])); - err |= __put_user(env->gregs[7], &((*grp)[MC_G7])); - err |= __put_user(env->regwptr[UREG_I0], &((*grp)[MC_O0])); - err |= __put_user(env->regwptr[UREG_I1], &((*grp)[MC_O1])); - err |= __put_user(env->regwptr[UREG_I2], &((*grp)[MC_O2])); - err |= __put_user(env->regwptr[UREG_I3], &((*grp)[MC_O3])); - err |= __put_user(env->regwptr[UREG_I4], &((*grp)[MC_O4])); - err |= __put_user(env->regwptr[UREG_I5], &((*grp)[MC_O5])); - err |= __put_user(env->regwptr[UREG_I6], &((*grp)[MC_O6])); - err |= __put_user(env->regwptr[UREG_I7], &((*grp)[MC_O7])); + // __put_user(env->tstate, &((*grp)[MC_TSTATE])); + __put_user(env->pc, &((*grp)[MC_PC])); + __put_user(env->npc, &((*grp)[MC_NPC])); + __put_user(env->y, &((*grp)[MC_Y])); + __put_user(env->gregs[1], &((*grp)[MC_G1])); + __put_user(env->gregs[2], &((*grp)[MC_G2])); + __put_user(env->gregs[3], &((*grp)[MC_G3])); + __put_user(env->gregs[4], &((*grp)[MC_G4])); + __put_user(env->gregs[5], &((*grp)[MC_G5])); + __put_user(env->gregs[6], &((*grp)[MC_G6])); + __put_user(env->gregs[7], &((*grp)[MC_G7])); + __put_user(env->regwptr[UREG_I0], &((*grp)[MC_O0])); + __put_user(env->regwptr[UREG_I1], &((*grp)[MC_O1])); + __put_user(env->regwptr[UREG_I2], &((*grp)[MC_O2])); + __put_user(env->regwptr[UREG_I3], &((*grp)[MC_O3])); + __put_user(env->regwptr[UREG_I4], &((*grp)[MC_O4])); + __put_user(env->regwptr[UREG_I5], &((*grp)[MC_O5])); + __put_user(env->regwptr[UREG_I6], &((*grp)[MC_O6])); + __put_user(env->regwptr[UREG_I7], &((*grp)[MC_O7])); w_addr = TARGET_STACK_BIAS+env->regwptr[UREG_I6]; fp = i7 = 0; @@ -2762,22 +2684,22 @@ void sparc64_get_context(CPUSPARCState *env) if (get_user(i7, w_addr + offsetof(struct target_reg_window, ins[7]), abi_ulong) != 0) goto do_sigsegv; - err |= __put_user(fp, &(mcp->mc_fp)); - err |= __put_user(i7, &(mcp->mc_i7)); + __put_user(fp, &(mcp->mc_fp)); + __put_user(i7, &(mcp->mc_i7)); { uint32_t *dst = ucp->tuc_mcontext.mc_fpregs.mcfpu_fregs.sregs; for (i = 0; i < 64; i++, dst++) { if (i & 1) { - err |= __put_user(env->fpr[i/2].l.lower, dst); + __put_user(env->fpr[i/2].l.lower, dst); } else { - err |= __put_user(env->fpr[i/2].l.upper, dst); + __put_user(env->fpr[i/2].l.upper, dst); } } } - err |= __put_user(env->fsr, &(mcp->mc_fpregs.mcfpu_fsr)); - err |= __put_user(env->gsr, &(mcp->mc_fpregs.mcfpu_gsr)); - err |= __put_user(env->fprs, &(mcp->mc_fpregs.mcfpu_fprs)); + __put_user(env->fsr, &(mcp->mc_fpregs.mcfpu_fsr)); + __put_user(env->gsr, &(mcp->mc_fpregs.mcfpu_gsr)); + __put_user(env->fprs, &(mcp->mc_fpregs.mcfpu_fprs)); if (err) goto do_sigsegv; @@ -2867,82 +2789,76 @@ static inline int install_sigtramp(unsigned int *tramp, unsigned int syscall) * syscall */ - err |= __put_user(0x24020000 + syscall, tramp + 0); - err |= __put_user(0x0000000c , tramp + 1); + __put_user(0x24020000 + syscall, tramp + 0); + __put_user(0x0000000c , tramp + 1); return err; } -static inline int -setup_sigcontext(CPUMIPSState *regs, struct target_sigcontext *sc) +static inline void setup_sigcontext(CPUMIPSState *regs, + struct target_sigcontext *sc) { - int err = 0; int i; - err |= __put_user(exception_resume_pc(regs), &sc->sc_pc); + __put_user(exception_resume_pc(regs), &sc->sc_pc); regs->hflags &= ~MIPS_HFLAG_BMASK; __put_user(0, &sc->sc_regs[0]); for (i = 1; i < 32; ++i) { - err |= __put_user(regs->active_tc.gpr[i], &sc->sc_regs[i]); + __put_user(regs->active_tc.gpr[i], &sc->sc_regs[i]); } - err |= __put_user(regs->active_tc.HI[0], &sc->sc_mdhi); - err |= __put_user(regs->active_tc.LO[0], &sc->sc_mdlo); + __put_user(regs->active_tc.HI[0], &sc->sc_mdhi); + __put_user(regs->active_tc.LO[0], &sc->sc_mdlo); /* Rather than checking for dsp existence, always copy. The storage would just be garbage otherwise. */ - err |= __put_user(regs->active_tc.HI[1], &sc->sc_hi1); - err |= __put_user(regs->active_tc.HI[2], &sc->sc_hi2); - err |= __put_user(regs->active_tc.HI[3], &sc->sc_hi3); - err |= __put_user(regs->active_tc.LO[1], &sc->sc_lo1); - err |= __put_user(regs->active_tc.LO[2], &sc->sc_lo2); - err |= __put_user(regs->active_tc.LO[3], &sc->sc_lo3); + __put_user(regs->active_tc.HI[1], &sc->sc_hi1); + __put_user(regs->active_tc.HI[2], &sc->sc_hi2); + __put_user(regs->active_tc.HI[3], &sc->sc_hi3); + __put_user(regs->active_tc.LO[1], &sc->sc_lo1); + __put_user(regs->active_tc.LO[2], &sc->sc_lo2); + __put_user(regs->active_tc.LO[3], &sc->sc_lo3); { uint32_t dsp = cpu_rddsp(0x3ff, regs); - err |= __put_user(dsp, &sc->sc_dsp); + __put_user(dsp, &sc->sc_dsp); } - err |= __put_user(1, &sc->sc_used_math); + __put_user(1, &sc->sc_used_math); for (i = 0; i < 32; ++i) { - err |= __put_user(regs->active_fpu.fpr[i].d, &sc->sc_fpregs[i]); + __put_user(regs->active_fpu.fpr[i].d, &sc->sc_fpregs[i]); } - - return err; } -static inline int +static inline void restore_sigcontext(CPUMIPSState *regs, struct target_sigcontext *sc) { - int err = 0; int i; - err |= __get_user(regs->CP0_EPC, &sc->sc_pc); + __get_user(regs->CP0_EPC, &sc->sc_pc); - err |= __get_user(regs->active_tc.HI[0], &sc->sc_mdhi); - err |= __get_user(regs->active_tc.LO[0], &sc->sc_mdlo); + __get_user(regs->active_tc.HI[0], &sc->sc_mdhi); + __get_user(regs->active_tc.LO[0], &sc->sc_mdlo); for (i = 1; i < 32; ++i) { - err |= __get_user(regs->active_tc.gpr[i], &sc->sc_regs[i]); + __get_user(regs->active_tc.gpr[i], &sc->sc_regs[i]); } - err |= __get_user(regs->active_tc.HI[1], &sc->sc_hi1); - err |= __get_user(regs->active_tc.HI[2], &sc->sc_hi2); - err |= __get_user(regs->active_tc.HI[3], &sc->sc_hi3); - err |= __get_user(regs->active_tc.LO[1], &sc->sc_lo1); - err |= __get_user(regs->active_tc.LO[2], &sc->sc_lo2); - err |= __get_user(regs->active_tc.LO[3], &sc->sc_lo3); + __get_user(regs->active_tc.HI[1], &sc->sc_hi1); + __get_user(regs->active_tc.HI[2], &sc->sc_hi2); + __get_user(regs->active_tc.HI[3], &sc->sc_hi3); + __get_user(regs->active_tc.LO[1], &sc->sc_lo1); + __get_user(regs->active_tc.LO[2], &sc->sc_lo2); + __get_user(regs->active_tc.LO[3], &sc->sc_lo3); { uint32_t dsp; - err |= __get_user(dsp, &sc->sc_dsp); + __get_user(dsp, &sc->sc_dsp); cpu_wrdsp(dsp, 0x3ff, regs); } for (i = 0; i < 32; ++i) { - err |= __get_user(regs->active_fpu.fpr[i].d, &sc->sc_fpregs[i]); + __get_user(regs->active_fpu.fpr[i].d, &sc->sc_fpregs[i]); } - - return err; } /* @@ -2995,12 +2911,10 @@ static void setup_frame(int sig, struct target_sigaction * ka, install_sigtramp(frame->sf_code, TARGET_NR_sigreturn); - if(setup_sigcontext(regs, &frame->sf_sc)) - goto give_sigsegv; + setup_sigcontext(regs, &frame->sf_sc); for(i = 0; i < TARGET_NSIG_WORDS; i++) { - if(__put_user(set->sig[i], &frame->sf_mask.sig[i])) - goto give_sigsegv; + __put_user(set->sig[i], &frame->sf_mask.sig[i]); } /* @@ -3027,7 +2941,6 @@ static void setup_frame(int sig, struct target_sigaction * ka, return; give_sigsegv: - unlock_user_struct(frame, frame_addr, 1); force_sig(TARGET_SIGSEGV/*, current*/); } @@ -3047,15 +2960,13 @@ long do_sigreturn(CPUMIPSState *regs) goto badframe; for(i = 0; i < TARGET_NSIG_WORDS; i++) { - if(__get_user(target_set.sig[i], &frame->sf_mask.sig[i])) - goto badframe; + __get_user(target_set.sig[i], &frame->sf_mask.sig[i]); } target_to_host_sigset_internal(&blocked, &target_set); do_sigprocmask(SIG_SETMASK, &blocked, NULL); - if (restore_sigcontext(regs, &frame->sf_sc)) - goto badframe; + restore_sigcontext(regs, &frame->sf_sc); #if 0 /* @@ -3158,8 +3069,7 @@ long do_rt_sigreturn(CPUMIPSState *env) target_to_host_sigset(&blocked, &frame->rs_uc.tuc_sigmask); do_sigprocmask(SIG_SETMASK, &blocked, NULL); - if (restore_sigcontext(env, &frame->rs_uc.tuc_mcontext)) - goto badframe; + restore_sigcontext(env, &frame->rs_uc.tuc_mcontext); if (do_sigaltstack(frame_addr + offsetof(struct target_rt_sigframe, rs_uc.tuc_stack), @@ -3243,13 +3153,12 @@ static abi_ulong get_sigframe(struct target_sigaction *ka, return (sp - frame_size) & -8ul; } -static int setup_sigcontext(struct target_sigcontext *sc, +static void setup_sigcontext(struct target_sigcontext *sc, CPUSH4State *regs, unsigned long mask) { - int err = 0; int i; -#define COPY(x) err |= __put_user(regs->x, &sc->sc_##x) +#define COPY(x) __put_user(regs->x, &sc->sc_##x) COPY(gregs[0]); COPY(gregs[1]); COPY(gregs[2]); COPY(gregs[3]); COPY(gregs[4]); COPY(gregs[5]); @@ -3264,24 +3173,21 @@ static int setup_sigcontext(struct target_sigcontext *sc, #undef COPY for (i=0; i<16; i++) { - err |= __put_user(regs->fregs[i], &sc->sc_fpregs[i]); + __put_user(regs->fregs[i], &sc->sc_fpregs[i]); } - err |= __put_user(regs->fpscr, &sc->sc_fpscr); - err |= __put_user(regs->fpul, &sc->sc_fpul); + __put_user(regs->fpscr, &sc->sc_fpscr); + __put_user(regs->fpul, &sc->sc_fpul); /* non-iBCS2 extensions.. */ - err |= __put_user(mask, &sc->oldmask); - - return err; + __put_user(mask, &sc->oldmask); } -static int restore_sigcontext(CPUSH4State *regs, struct target_sigcontext *sc, +static void restore_sigcontext(CPUSH4State *regs, struct target_sigcontext *sc, target_ulong *r0_p) { - unsigned int err = 0; int i; -#define COPY(x) err |= __get_user(regs->x, &sc->sc_##x) +#define COPY(x) __get_user(regs->x, &sc->sc_##x) COPY(gregs[1]); COPY(gregs[2]); COPY(gregs[3]); COPY(gregs[4]); COPY(gregs[5]); @@ -3296,14 +3202,13 @@ static int restore_sigcontext(CPUSH4State *regs, struct target_sigcontext *sc, #undef COPY for (i=0; i<16; i++) { - err |= __get_user(regs->fregs[i], &sc->sc_fpregs[i]); + __get_user(regs->fregs[i], &sc->sc_fpregs[i]); } - err |= __get_user(regs->fpscr, &sc->sc_fpscr); - err |= __get_user(regs->fpul, &sc->sc_fpul); + __get_user(regs->fpscr, &sc->sc_fpscr); + __get_user(regs->fpul, &sc->sc_fpul); regs->tra = -1; /* disable syscall checks */ - err |= __get_user(*r0_p, &sc->sc_gregs[0]); - return err; + __get_user(*r0_p, &sc->sc_gregs[0]); } static void setup_frame(int sig, struct target_sigaction *ka, @@ -3321,10 +3226,10 @@ static void setup_frame(int sig, struct target_sigaction *ka, signal = current_exec_domain_sig(sig); - err |= setup_sigcontext(&frame->sc, regs, set->sig[0]); + setup_sigcontext(&frame->sc, regs, set->sig[0]); for (i = 0; i < TARGET_NSIG_WORDS - 1; i++) { - err |= __put_user(set->sig[i + 1], &frame->extramask[i]); + __put_user(set->sig[i + 1], &frame->extramask[i]); } /* Set up to return from userspace. If provided, use a stub @@ -3333,9 +3238,9 @@ static void setup_frame(int sig, struct target_sigaction *ka, regs->pr = (unsigned long) ka->sa_restorer; } else { /* Generate return code (system call to sigreturn) */ - err |= __put_user(MOVW(2), &frame->retcode[0]); - err |= __put_user(TRAP_NOARG, &frame->retcode[1]); - err |= __put_user((TARGET_NR_sigreturn), &frame->retcode[2]); + __put_user(MOVW(2), &frame->retcode[0]); + __put_user(TRAP_NOARG, &frame->retcode[1]); + __put_user((TARGET_NR_sigreturn), &frame->retcode[2]); regs->pr = (unsigned long) frame->retcode; } @@ -3373,21 +3278,21 @@ static void setup_rt_frame(int sig, struct target_sigaction *ka, signal = current_exec_domain_sig(sig); - err |= copy_siginfo_to_user(&frame->info, info); + copy_siginfo_to_user(&frame->info, info); /* Create the ucontext. */ - err |= __put_user(0, &frame->uc.tuc_flags); - err |= __put_user(0, (unsigned long *)&frame->uc.tuc_link); - err |= __put_user((unsigned long)target_sigaltstack_used.ss_sp, - &frame->uc.tuc_stack.ss_sp); - err |= __put_user(sas_ss_flags(regs->gregs[15]), - &frame->uc.tuc_stack.ss_flags); - err |= __put_user(target_sigaltstack_used.ss_size, - &frame->uc.tuc_stack.ss_size); - err |= setup_sigcontext(&frame->uc.tuc_mcontext, + __put_user(0, &frame->uc.tuc_flags); + __put_user(0, (unsigned long *)&frame->uc.tuc_link); + __put_user((unsigned long)target_sigaltstack_used.ss_sp, + &frame->uc.tuc_stack.ss_sp); + __put_user(sas_ss_flags(regs->gregs[15]), + &frame->uc.tuc_stack.ss_flags); + __put_user(target_sigaltstack_used.ss_size, + &frame->uc.tuc_stack.ss_size); + setup_sigcontext(&frame->uc.tuc_mcontext, regs, set->sig[0]); for(i = 0; i < TARGET_NSIG_WORDS; i++) { - err |= __put_user(set->sig[i], &frame->uc.tuc_sigmask.sig[i]); + __put_user(set->sig[i], &frame->uc.tuc_sigmask.sig[i]); } /* Set up to return from userspace. If provided, use a stub @@ -3396,9 +3301,9 @@ static void setup_rt_frame(int sig, struct target_sigaction *ka, regs->pr = (unsigned long) ka->sa_restorer; } else { /* Generate return code (system call to sigreturn) */ - err |= __put_user(MOVW(2), &frame->retcode[0]); - err |= __put_user(TRAP_NOARG, &frame->retcode[1]); - err |= __put_user((TARGET_NR_rt_sigreturn), &frame->retcode[2]); + __put_user(MOVW(2), &frame->retcode[0]); + __put_user(TRAP_NOARG, &frame->retcode[1]); + __put_user((TARGET_NR_rt_sigreturn), &frame->retcode[2]); regs->pr = (unsigned long) frame->retcode; } @@ -3437,9 +3342,9 @@ long do_sigreturn(CPUSH4State *regs) if (!lock_user_struct(VERIFY_READ, frame, frame_addr, 1)) goto badframe; - err |= __get_user(target_set.sig[0], &frame->sc.oldmask); + __get_user(target_set.sig[0], &frame->sc.oldmask); for(i = 1; i < TARGET_NSIG_WORDS; i++) { - err |= (__get_user(target_set.sig[i], &frame->extramask[i - 1])); + __get_user(target_set.sig[i], &frame->extramask[i - 1]); } if (err) @@ -3448,8 +3353,7 @@ long do_sigreturn(CPUSH4State *regs) target_to_host_sigset_internal(&blocked, &target_set); do_sigprocmask(SIG_SETMASK, &blocked, NULL); - if (restore_sigcontext(regs, &frame->sc, &r0)) - goto badframe; + restore_sigcontext(regs, &frame->sc, &r0); unlock_user_struct(frame, frame_addr, 0); return r0; @@ -3477,8 +3381,7 @@ long do_rt_sigreturn(CPUSH4State *regs) target_to_host_sigset(&blocked, &frame->uc.tuc_sigmask); do_sigprocmask(SIG_SETMASK, &blocked, NULL); - if (restore_sigcontext(regs, &frame->uc.tuc_mcontext, &r0)) - goto badframe; + restore_sigcontext(regs, &frame->uc.tuc_mcontext, &r0); if (do_sigaltstack(frame_addr + offsetof(struct target_rt_sigframe, uc.tuc_stack), @@ -3617,7 +3520,6 @@ static void setup_frame(int sig, struct target_sigaction *ka, { struct target_signal_frame *frame; abi_ulong frame_addr; - int err = 0; int i; frame_addr = get_sigframe(ka, env, sizeof *frame); @@ -3625,13 +3527,10 @@ static void setup_frame(int sig, struct target_sigaction *ka, goto badframe; /* Save the mask. */ - err |= __put_user(set->sig[0], &frame->uc.tuc_mcontext.oldmask); - if (err) - goto badframe; + __put_user(set->sig[0], &frame->uc.tuc_mcontext.oldmask); for(i = 1; i < TARGET_NSIG_WORDS; i++) { - if (__put_user(set->sig[i], &frame->extramask[i - 1])) - goto badframe; + __put_user(set->sig[i], &frame->extramask[i - 1]); } setup_sigcontext(&frame->uc.tuc_mcontext, env); @@ -3646,19 +3545,16 @@ static void setup_frame(int sig, struct target_sigaction *ka, /* Note, these encodings are _big endian_! */ /* addi r12, r0, __NR_sigreturn */ t = 0x31800000UL | TARGET_NR_sigreturn; - err |= __put_user(t, frame->tramp + 0); + __put_user(t, frame->tramp + 0); /* brki r14, 0x8 */ t = 0xb9cc0008UL; - err |= __put_user(t, frame->tramp + 1); + __put_user(t, frame->tramp + 1); /* Return from sighandler will jump to the tramp. Negative 8 offset because return is rtsd r15, 8 */ env->regs[15] = ((unsigned long)frame->tramp) - 8; } - if (err) - goto badframe; - /* Set up registers for signal handler */ env->regs[1] = frame_addr; /* Signal handler args: */ @@ -3673,7 +3569,6 @@ static void setup_frame(int sig, struct target_sigaction *ka, unlock_user_struct(frame, frame_addr, 1); return; badframe: - unlock_user_struct(frame, frame_addr, 1); force_sig(TARGET_SIGSEGV); } @@ -3698,11 +3593,9 @@ long do_sigreturn(CPUMBState *env) goto badframe; /* Restore blocked signals */ - if (__get_user(target_set.sig[0], &frame->uc.tuc_mcontext.oldmask)) - goto badframe; + __get_user(target_set.sig[0], &frame->uc.tuc_mcontext.oldmask); for(i = 1; i < TARGET_NSIG_WORDS; i++) { - if (__get_user(target_set.sig[i], &frame->extramask[i - 1])) - goto badframe; + __get_user(target_set.sig[i], &frame->extramask[i - 1]); } target_to_host_sigset_internal(&set, &target_set); do_sigprocmask(SIG_SETMASK, &set, NULL); @@ -3715,7 +3608,6 @@ long do_sigreturn(CPUMBState *env) unlock_user_struct(frame, frame_addr, 0); return env->regs[10]; badframe: - unlock_user_struct(frame, frame_addr, 0); force_sig(TARGET_SIGSEGV); } @@ -3807,7 +3699,6 @@ static void setup_frame(int sig, struct target_sigaction *ka, { struct target_signal_frame *frame; abi_ulong frame_addr; - int err = 0; int i; frame_addr = get_sigframe(env, sizeof *frame); @@ -3821,20 +3712,17 @@ static void setup_frame(int sig, struct target_sigaction *ka, * * This is movu.w __NR_sigreturn, r9; break 13; */ - err |= __put_user(0x9c5f, frame->retcode+0); - err |= __put_user(TARGET_NR_sigreturn, - frame->retcode + 1); - err |= __put_user(0xe93d, frame->retcode + 2); + __put_user(0x9c5f, frame->retcode+0); + __put_user(TARGET_NR_sigreturn, + frame->retcode + 1); + __put_user(0xe93d, frame->retcode + 2); /* Save the mask. */ - err |= __put_user(set->sig[0], &frame->sc.oldmask); - if (err) - goto badframe; + __put_user(set->sig[0], &frame->sc.oldmask); - for(i = 1; i < TARGET_NSIG_WORDS; i++) { - if (__put_user(set->sig[i], &frame->extramask[i - 1])) - goto badframe; - } + for(i = 1; i < TARGET_NSIG_WORDS; i++) { + __put_user(set->sig[i], &frame->extramask[i - 1]); + } setup_sigcontext(&frame->sc, env); @@ -3848,7 +3736,6 @@ static void setup_frame(int sig, struct target_sigaction *ka, unlock_user_struct(frame, frame_addr, 1); return; badframe: - unlock_user_struct(frame, frame_addr, 1); force_sig(TARGET_SIGSEGV); } @@ -3873,11 +3760,9 @@ long do_sigreturn(CPUCRISState *env) goto badframe; /* Restore blocked signals */ - if (__get_user(target_set.sig[0], &frame->sc.oldmask)) - goto badframe; + __get_user(target_set.sig[0], &frame->sc.oldmask); for(i = 1; i < TARGET_NSIG_WORDS; i++) { - if (__get_user(target_set.sig[i], &frame->extramask[i - 1])) - goto badframe; + __get_user(target_set.sig[i], &frame->extramask[i - 1]); } target_to_host_sigset_internal(&set, &target_set); do_sigprocmask(SIG_SETMASK, &set, NULL); @@ -3886,7 +3771,6 @@ long do_sigreturn(CPUCRISState *env) unlock_user_struct(frame, frame_addr, 0); return env->regs[10]; badframe: - unlock_user_struct(frame, frame_addr, 0); force_sig(TARGET_SIGSEGV); } @@ -3950,7 +3834,7 @@ static int restore_sigcontext(CPUOpenRISCState *regs, * stuff after pushing it) */ - err |= __get_user(old_usp, &sc->usp); + __get_user(old_usp, &sc->usp); phx_signal("old_usp 0x%lx", old_usp); __PHX__ REALLY /* ??? */ @@ -3971,16 +3855,15 @@ badframe: /* Set up a signal frame. */ -static int setup_sigcontext(struct target_sigcontext *sc, +static void setup_sigcontext(struct target_sigcontext *sc, CPUOpenRISCState *regs, unsigned long mask) { - int err = 0; unsigned long usp = regs->gpr[1]; /* copy the regs. they are first in sc so we can use sc directly */ - /*err |= copy_to_user(&sc, regs, sizeof(struct target_pt_regs));*/ + /*copy_to_user(&sc, regs, sizeof(struct target_pt_regs));*/ /* Set the frametype to CRIS_FRAME_NORMAL for the execution of the signal handler. The frametype will be restored to its previous @@ -3988,8 +3871,8 @@ static int setup_sigcontext(struct target_sigcontext *sc, /*regs->frametype = CRIS_FRAME_NORMAL;*/ /* then some other stuff */ - err |= __put_user(mask, &sc->oldmask); - err |= __put_user(usp, &sc->usp); return err; + __put_user(mask, &sc->oldmask); + __put_user(usp, &sc->usp); } static inline unsigned long align_sigframe(unsigned long sp) @@ -4048,40 +3931,33 @@ static void setup_rt_frame(int sig, struct target_sigaction *ka, } info_addr = frame_addr + offsetof(struct target_rt_sigframe, info); - err |= __put_user(info_addr, &frame->pinfo); + __put_user(info_addr, &frame->pinfo); uc_addr = frame_addr + offsetof(struct target_rt_sigframe, uc); - err |= __put_user(uc_addr, &frame->puc); + __put_user(uc_addr, &frame->puc); if (ka->sa_flags & SA_SIGINFO) { - err |= copy_siginfo_to_user(&frame->info, info); - } - if (err) { - goto give_sigsegv; + copy_siginfo_to_user(&frame->info, info); } /*err |= __clear_user(&frame->uc, offsetof(struct ucontext, uc_mcontext));*/ - err |= __put_user(0, &frame->uc.tuc_flags); - err |= __put_user(0, &frame->uc.tuc_link); - err |= __put_user(target_sigaltstack_used.ss_sp, - &frame->uc.tuc_stack.ss_sp); - err |= __put_user(sas_ss_flags(env->gpr[1]), &frame->uc.tuc_stack.ss_flags); - err |= __put_user(target_sigaltstack_used.ss_size, - &frame->uc.tuc_stack.ss_size); - err |= setup_sigcontext(&frame->sc, env, set->sig[0]); + __put_user(0, &frame->uc.tuc_flags); + __put_user(0, &frame->uc.tuc_link); + __put_user(target_sigaltstack_used.ss_sp, + &frame->uc.tuc_stack.ss_sp); + __put_user(sas_ss_flags(env->gpr[1]), &frame->uc.tuc_stack.ss_flags); + __put_user(target_sigaltstack_used.ss_size, + &frame->uc.tuc_stack.ss_size); + setup_sigcontext(&frame->sc, env, set->sig[0]); /*err |= copy_to_user(frame->uc.tuc_sigmask, set, sizeof(*set));*/ - if (err) { - goto give_sigsegv; - } - /* trampoline - the desired return ip is the retcode itself */ return_ip = (unsigned long)&frame->retcode; /* This is l.ori r11,r0,__NR_sigreturn, l.sys 1 */ - err |= __put_user(0xa960, (short *)(frame->retcode + 0)); - err |= __put_user(TARGET_NR_rt_sigreturn, (short *)(frame->retcode + 2)); - err |= __put_user(0x20000001, (unsigned long *)(frame->retcode + 4)); - err |= __put_user(0x15000000, (unsigned long *)(frame->retcode + 8)); + __put_user(0xa960, (short *)(frame->retcode + 0)); + __put_user(TARGET_NR_rt_sigreturn, (short *)(frame->retcode + 2)); + __put_user(0x20000001, (unsigned long *)(frame->retcode + 4)); + __put_user(0x15000000, (unsigned long *)(frame->retcode + 8)); if (err) { goto give_sigsegv; @@ -4248,9 +4124,7 @@ static void setup_frame(int sig, struct target_sigaction *ka, } qemu_log("%s: 1\n", __FUNCTION__); - if (__put_user(set->sig[0], &frame->sc.oldmask[0])) { - goto give_sigsegv; - } + __put_user(set->sig[0], &frame->sc.oldmask[0]); save_sigregs(env, &frame->sregs); @@ -4265,15 +4139,12 @@ static void setup_frame(int sig, struct target_sigaction *ka, } else { env->regs[14] = (unsigned long) frame->retcode | PSW_ADDR_AMODE; - if (__put_user(S390_SYSCALL_OPCODE | TARGET_NR_sigreturn, - (uint16_t *)(frame->retcode))) - goto give_sigsegv; + __put_user(S390_SYSCALL_OPCODE | TARGET_NR_sigreturn, + (uint16_t *)(frame->retcode)); } /* Set up backchain. */ - if (__put_user(env->regs[15], (abi_ulong *) frame)) { - goto give_sigsegv; - } + __put_user(env->regs[15], (abi_ulong *) frame); /* Set up registers for signal handler */ env->regs[15] = frame_addr; @@ -4288,15 +4159,12 @@ static void setup_frame(int sig, struct target_sigaction *ka, env->regs[5] = 0; // FIXME: no clue... current->thread.prot_addr; /* Place signal number on stack to allow backtrace from handler. */ - if (__put_user(env->regs[2], (int *) &frame->signo)) { - goto give_sigsegv; - } + __put_user(env->regs[2], (int *) &frame->signo); unlock_user_struct(frame, frame_addr, 1); return; give_sigsegv: qemu_log("%s: give_sigsegv\n", __FUNCTION__); - unlock_user_struct(frame, frame_addr, 1); force_sig(TARGET_SIGSEGV); } @@ -4316,9 +4184,7 @@ static void setup_rt_frame(int sig, struct target_sigaction *ka, } qemu_log("%s: 1\n", __FUNCTION__); - if (copy_siginfo_to_user(&frame->info, info)) { - goto give_sigsegv; - } + copy_siginfo_to_user(&frame->info, info); /* Create the ucontext. */ __put_user(0, &frame->uc.tuc_flags); @@ -4339,16 +4205,12 @@ static void setup_rt_frame(int sig, struct target_sigaction *ka, env->regs[14] = (unsigned long) ka->sa_restorer | PSW_ADDR_AMODE; } else { env->regs[14] = (unsigned long) frame->retcode | PSW_ADDR_AMODE; - if (__put_user(S390_SYSCALL_OPCODE | TARGET_NR_rt_sigreturn, - (uint16_t *)(frame->retcode))) { - goto give_sigsegv; - } + __put_user(S390_SYSCALL_OPCODE | TARGET_NR_rt_sigreturn, + (uint16_t *)(frame->retcode)); } /* Set up backchain. */ - if (__put_user(env->regs[15], (abi_ulong *) frame)) { - goto give_sigsegv; - } + __put_user(env->regs[15], (abi_ulong *) frame); /* Set up registers for signal handler */ env->regs[15] = frame_addr; @@ -4361,7 +4223,6 @@ static void setup_rt_frame(int sig, struct target_sigaction *ka, give_sigsegv: qemu_log("%s: give_sigsegv\n", __FUNCTION__); - unlock_user_struct(frame, frame_addr, 1); force_sig(TARGET_SIGSEGV); } @@ -4372,21 +4233,21 @@ restore_sigregs(CPUS390XState *env, target_sigregs *sc) int i; for (i = 0; i < 16; i++) { - err |= __get_user(env->regs[i], &sc->regs.gprs[i]); + __get_user(env->regs[i], &sc->regs.gprs[i]); } - err |= __get_user(env->psw.mask, &sc->regs.psw.mask); + __get_user(env->psw.mask, &sc->regs.psw.mask); qemu_log("%s: sc->regs.psw.addr 0x%llx env->psw.addr 0x%llx\n", __FUNCTION__, (unsigned long long)sc->regs.psw.addr, (unsigned long long)env->psw.addr); - err |= __get_user(env->psw.addr, &sc->regs.psw.addr); + __get_user(env->psw.addr, &sc->regs.psw.addr); /* FIXME: 31-bit -> | PSW_ADDR_AMODE */ for (i = 0; i < 16; i++) { - err |= __get_user(env->aregs[i], &sc->regs.acrs[i]); + __get_user(env->aregs[i], &sc->regs.acrs[i]); } for (i = 0; i < 16; i++) { - err |= __get_user(env->fregs[i].ll, &sc->fpregs.fprs[i]); + __get_user(env->fregs[i].ll, &sc->fpregs.fprs[i]); } return err; @@ -4404,9 +4265,7 @@ long do_sigreturn(CPUS390XState *env) if (!lock_user_struct(VERIFY_READ, frame, frame_addr, 1)) { goto badframe; } - if (__get_user(target_set.sig[0], &frame->sc.oldmask[0])) { - goto badframe; - } + __get_user(target_set.sig[0], &frame->sc.oldmask[0]); target_to_host_sigset_internal(&set, &target_set); do_sigprocmask(SIG_SETMASK, &set, NULL); /* ~_BLOCKABLE? */ @@ -4419,7 +4278,6 @@ long do_sigreturn(CPUS390XState *env) return env->regs[2]; badframe: - unlock_user_struct(frame, frame_addr, 0); force_sig(TARGET_SIGSEGV); return 0; } @@ -4622,7 +4480,7 @@ static target_ulong get_sigframe(struct target_sigaction *ka, return newsp; } -static int save_user_regs(CPUPPCState *env, struct target_mcontext *frame, +static void save_user_regs(CPUPPCState *env, struct target_mcontext *frame, int sigret) { target_ulong msr = env->msr; @@ -4635,21 +4493,17 @@ static int save_user_regs(CPUPPCState *env, struct target_mcontext *frame, /* Save general registers. */ for (i = 0; i < ARRAY_SIZE(env->gpr); i++) { - if (__put_user(env->gpr[i], &frame->mc_gregs[i])) { - return 1; - } + __put_user(env->gpr[i], &frame->mc_gregs[i]); } - if (__put_user(env->nip, &frame->mc_gregs[TARGET_PT_NIP]) - || __put_user(env->ctr, &frame->mc_gregs[TARGET_PT_CTR]) - || __put_user(env->lr, &frame->mc_gregs[TARGET_PT_LNK]) - || __put_user(env->xer, &frame->mc_gregs[TARGET_PT_XER])) - return 1; + __put_user(env->nip, &frame->mc_gregs[TARGET_PT_NIP]); + __put_user(env->ctr, &frame->mc_gregs[TARGET_PT_CTR]); + __put_user(env->lr, &frame->mc_gregs[TARGET_PT_LNK]); + __put_user(env->xer, &frame->mc_gregs[TARGET_PT_XER]); for (i = 0; i < ARRAY_SIZE(env->crf); i++) { ccr |= env->crf[i] << (32 - ((i + 1) * 4)); } - if (__put_user(ccr, &frame->mc_gregs[TARGET_PT_CCR])) - return 1; + __put_user(ccr, &frame->mc_gregs[TARGET_PT_CCR]); /* Save Altivec registers if necessary. */ if (env->insns_flags & PPC_ALTIVEC) { @@ -4657,69 +4511,53 @@ static int save_user_regs(CPUPPCState *env, struct target_mcontext *frame, ppc_avr_t *avr = &env->avr[i]; ppc_avr_t *vreg = &frame->mc_vregs.altivec[i]; - if (__put_user(avr->u64[0], &vreg->u64[0]) || - __put_user(avr->u64[1], &vreg->u64[1])) { - return 1; - } + __put_user(avr->u64[0], &vreg->u64[0]); + __put_user(avr->u64[1], &vreg->u64[1]); } /* Set MSR_VR in the saved MSR value to indicate that frame->mc_vregs contains valid data. */ msr |= MSR_VR; - if (__put_user((uint32_t)env->spr[SPR_VRSAVE], - &frame->mc_vregs.altivec[32].u32[3])) - return 1; + __put_user((uint32_t)env->spr[SPR_VRSAVE], + &frame->mc_vregs.altivec[32].u32[3]); } /* Save floating point registers. */ if (env->insns_flags & PPC_FLOAT) { for (i = 0; i < ARRAY_SIZE(env->fpr); i++) { - if (__put_user(env->fpr[i], &frame->mc_fregs[i])) { - return 1; - } + __put_user(env->fpr[i], &frame->mc_fregs[i]); } - if (__put_user((uint64_t) env->fpscr, &frame->mc_fregs[32])) - return 1; + __put_user((uint64_t) env->fpscr, &frame->mc_fregs[32]); } /* Save SPE registers. The kernel only saves the high half. */ if (env->insns_flags & PPC_SPE) { #if defined(TARGET_PPC64) for (i = 0; i < ARRAY_SIZE(env->gpr); i++) { - if (__put_user(env->gpr[i] >> 32, &frame->mc_vregs.spe[i])) { - return 1; - } + __put_user(env->gpr[i] >> 32, &frame->mc_vregs.spe[i]); } #else for (i = 0; i < ARRAY_SIZE(env->gprh); i++) { - if (__put_user(env->gprh[i], &frame->mc_vregs.spe[i])) { - return 1; - } + __put_user(env->gprh[i], &frame->mc_vregs.spe[i]); } #endif /* Set MSR_SPE in the saved MSR value to indicate that frame->mc_vregs contains valid data. */ msr |= MSR_SPE; - if (__put_user(env->spe_fscr, &frame->mc_vregs.spe[32])) - return 1; + __put_user(env->spe_fscr, &frame->mc_vregs.spe[32]); } /* Store MSR. */ - if (__put_user(msr, &frame->mc_gregs[TARGET_PT_MSR])) - return 1; + __put_user(msr, &frame->mc_gregs[TARGET_PT_MSR]); /* Set up the sigreturn trampoline: li r0,sigret; sc. */ if (sigret) { - if (__put_user(0x38000000UL | sigret, &frame->tramp[0]) || - __put_user(0x44000002UL, &frame->tramp[1])) { - return 1; - } + __put_user(0x38000000UL | sigret, &frame->tramp[0]); + __put_user(0x44000002UL, &frame->tramp[1]); } - - return 0; } -static int restore_user_regs(CPUPPCState *env, - struct target_mcontext *frame, int sig) +static void restore_user_regs(CPUPPCState *env, + struct target_mcontext *frame, int sig) { target_ulong save_r2 = 0; target_ulong msr; @@ -4733,17 +4571,13 @@ static int restore_user_regs(CPUPPCState *env, /* Restore general registers. */ for (i = 0; i < ARRAY_SIZE(env->gpr); i++) { - if (__get_user(env->gpr[i], &frame->mc_gregs[i])) { - return 1; - } + __get_user(env->gpr[i], &frame->mc_gregs[i]); } - if (__get_user(env->nip, &frame->mc_gregs[TARGET_PT_NIP]) - || __get_user(env->ctr, &frame->mc_gregs[TARGET_PT_CTR]) - || __get_user(env->lr, &frame->mc_gregs[TARGET_PT_LNK]) - || __get_user(env->xer, &frame->mc_gregs[TARGET_PT_XER])) - return 1; - if (__get_user(ccr, &frame->mc_gregs[TARGET_PT_CCR])) - return 1; + __get_user(env->nip, &frame->mc_gregs[TARGET_PT_NIP]); + __get_user(env->ctr, &frame->mc_gregs[TARGET_PT_CTR]); + __get_user(env->lr, &frame->mc_gregs[TARGET_PT_LNK]); + __get_user(env->xer, &frame->mc_gregs[TARGET_PT_XER]); + __get_user(ccr, &frame->mc_gregs[TARGET_PT_CCR]); for (i = 0; i < ARRAY_SIZE(env->crf); i++) { env->crf[i] = (ccr >> (32 - ((i + 1) * 4))) & 0xf; @@ -4753,8 +4587,7 @@ static int restore_user_regs(CPUPPCState *env, env->gpr[2] = save_r2; } /* Restore MSR. */ - if (__get_user(msr, &frame->mc_gregs[TARGET_PT_MSR])) - return 1; + __get_user(msr, &frame->mc_gregs[TARGET_PT_MSR]); /* If doing signal return, restore the previous little-endian mode. */ if (sig) @@ -4766,28 +4599,22 @@ static int restore_user_regs(CPUPPCState *env, ppc_avr_t *avr = &env->avr[i]; ppc_avr_t *vreg = &frame->mc_vregs.altivec[i]; - if (__get_user(avr->u64[0], &vreg->u64[0]) || - __get_user(avr->u64[1], &vreg->u64[1])) { - return 1; - } + __get_user(avr->u64[0], &vreg->u64[0]); + __get_user(avr->u64[1], &vreg->u64[1]); } /* Set MSR_VEC in the saved MSR value to indicate that frame->mc_vregs contains valid data. */ - if (__get_user(env->spr[SPR_VRSAVE], - (target_ulong *)(&frame->mc_vregs.altivec[32].u32[3]))) - return 1; + __get_user(env->spr[SPR_VRSAVE], + (target_ulong *)(&frame->mc_vregs.altivec[32].u32[3])); } /* Restore floating point registers. */ if (env->insns_flags & PPC_FLOAT) { uint64_t fpscr; for (i = 0; i < ARRAY_SIZE(env->fpr); i++) { - if (__get_user(env->fpr[i], &frame->mc_fregs[i])) { - return 1; - } + __get_user(env->fpr[i], &frame->mc_fregs[i]); } - if (__get_user(fpscr, &frame->mc_fregs[32])) - return 1; + __get_user(fpscr, &frame->mc_fregs[32]); env->fpscr = (uint32_t) fpscr; } @@ -4797,23 +4624,16 @@ static int restore_user_regs(CPUPPCState *env, for (i = 0; i < ARRAY_SIZE(env->gpr); i++) { uint32_t hi; - if (__get_user(hi, &frame->mc_vregs.spe[i])) { - return 1; - } + __get_user(hi, &frame->mc_vregs.spe[i]); env->gpr[i] = ((uint64_t)hi << 32) | ((uint32_t) env->gpr[i]); } #else for (i = 0; i < ARRAY_SIZE(env->gprh); i++) { - if (__get_user(env->gprh[i], &frame->mc_vregs.spe[i])) { - return 1; - } + __get_user(env->gprh[i], &frame->mc_vregs.spe[i]); } #endif - if (__get_user(env->spe_fscr, &frame->mc_vregs.spe[32])) - return 1; + __get_user(env->spe_fscr, &frame->mc_vregs.spe[32]); } - - return 0; } static void setup_frame(int sig, struct target_sigaction *ka, @@ -4832,18 +4652,18 @@ static void setup_frame(int sig, struct target_sigaction *ka, signal = current_exec_domain_sig(sig); - err |= __put_user(ka->_sa_handler, &sc->handler); - err |= __put_user(set->sig[0], &sc->oldmask); + __put_user(ka->_sa_handler, &sc->handler); + __put_user(set->sig[0], &sc->oldmask); #if defined(TARGET_PPC64) - err |= __put_user(set->sig[0] >> 32, &sc->_unused[3]); + __put_user(set->sig[0] >> 32, &sc->_unused[3]); #else - err |= __put_user(set->sig[1], &sc->_unused[3]); + __put_user(set->sig[1], &sc->_unused[3]); #endif - err |= __put_user(h2g(&frame->mctx), &sc->regs); - err |= __put_user(sig, &sc->signal); + __put_user(h2g(&frame->mctx), &sc->regs); + __put_user(sig, &sc->signal); /* Save user regs. */ - err |= save_user_regs(env, &frame->mctx, TARGET_NR_sigreturn); + save_user_regs(env, &frame->mctx, TARGET_NR_sigreturn); /* The kernel checks for the presence of a VDSO here. We don't emulate a vdso, so use a sigreturn system call. */ @@ -4892,24 +4712,24 @@ static void setup_rt_frame(int sig, struct target_sigaction *ka, signal = current_exec_domain_sig(sig); - err |= copy_siginfo_to_user(&rt_sf->info, info); - - err |= __put_user(0, &rt_sf->uc.tuc_flags); - err |= __put_user(0, &rt_sf->uc.tuc_link); - err |= __put_user((target_ulong)target_sigaltstack_used.ss_sp, - &rt_sf->uc.tuc_stack.ss_sp); - err |= __put_user(sas_ss_flags(env->gpr[1]), - &rt_sf->uc.tuc_stack.ss_flags); - err |= __put_user(target_sigaltstack_used.ss_size, - &rt_sf->uc.tuc_stack.ss_size); - err |= __put_user(h2g (&rt_sf->uc.tuc_mcontext), - &rt_sf->uc.tuc_regs); + copy_siginfo_to_user(&rt_sf->info, info); + + __put_user(0, &rt_sf->uc.tuc_flags); + __put_user(0, &rt_sf->uc.tuc_link); + __put_user((target_ulong)target_sigaltstack_used.ss_sp, + &rt_sf->uc.tuc_stack.ss_sp); + __put_user(sas_ss_flags(env->gpr[1]), + &rt_sf->uc.tuc_stack.ss_flags); + __put_user(target_sigaltstack_used.ss_size, + &rt_sf->uc.tuc_stack.ss_size); + __put_user(h2g (&rt_sf->uc.tuc_mcontext), + &rt_sf->uc.tuc_regs); for(i = 0; i < TARGET_NSIG_WORDS; i++) { - err |= __put_user(set->sig[i], &rt_sf->uc.tuc_sigmask.sig[i]); + __put_user(set->sig[i], &rt_sf->uc.tuc_sigmask.sig[i]); } frame = &rt_sf->uc.tuc_mcontext; - err |= save_user_regs(env, frame, TARGET_NR_rt_sigreturn); + save_user_regs(env, frame, TARGET_NR_rt_sigreturn); /* The kernel checks for the presence of a VDSO here. We don't emulate a vdso, so use a sigreturn system call. */ @@ -4920,7 +4740,7 @@ static void setup_rt_frame(int sig, struct target_sigaction *ka, /* Create a stack frame for the caller of the handler. */ newsp = rt_sf_addr - (SIGNAL_FRAMESIZE + 16); - err |= __put_user(env->gpr[1], (target_ulong *)(uintptr_t) newsp); + __put_user(env->gpr[1], (target_ulong *)(uintptr_t) newsp); if (err) goto sigsegv; @@ -4960,19 +4780,16 @@ long do_sigreturn(CPUPPCState *env) #if defined(TARGET_PPC64) set.sig[0] = sc->oldmask + ((long)(sc->_unused[3]) << 32); #else - if(__get_user(set.sig[0], &sc->oldmask) || - __get_user(set.sig[1], &sc->_unused[3])) - goto sigsegv; + __get_user(set.sig[0], &sc->oldmask); + __get_user(set.sig[1], &sc->_unused[3]); #endif target_to_host_sigset_internal(&blocked, &set); do_sigprocmask(SIG_SETMASK, &blocked, NULL); - if (__get_user(sr_addr, &sc->regs)) - goto sigsegv; + __get_user(sr_addr, &sc->regs); if (!lock_user_struct(VERIFY_READ, sr, sr_addr, 1)) goto sigsegv; - if (restore_user_regs(env, sr, 1)) - goto sigsegv; + restore_user_regs(env, sr, 1); unlock_user_struct(sr, sr_addr, 1); unlock_user_struct(sc, sc_addr, 1); @@ -5002,23 +4819,17 @@ static int do_setcontext(struct target_ucontext *ucp, CPUPPCState *env, int sig) fprintf (stderr, "do_setcontext: not implemented\n"); return 0; #else - if (__get_user(mcp_addr, &ucp->tuc_regs)) - return 1; + __get_user(mcp_addr, &ucp->tuc_regs); if (!lock_user_struct(VERIFY_READ, mcp, mcp_addr, 1)) return 1; target_to_host_sigset_internal(&blocked, &set); do_sigprocmask(SIG_SETMASK, &blocked, NULL); - if (restore_user_regs(env, mcp, sig)) - goto sigsegv; + restore_user_regs(env, mcp, sig); unlock_user_struct(mcp, mcp_addr, 1); return 0; - -sigsegv: - unlock_user_struct(mcp, mcp_addr, 1); - return 1; #endif } @@ -5109,41 +4920,33 @@ struct target_rt_sigframe struct target_ucontext uc; }; -static int -setup_sigcontext(struct target_sigcontext *sc, CPUM68KState *env, - abi_ulong mask) +static void setup_sigcontext(struct target_sigcontext *sc, CPUM68KState *env, + abi_ulong mask) { - int err = 0; - - err |= __put_user(mask, &sc->sc_mask); - err |= __put_user(env->aregs[7], &sc->sc_usp); - err |= __put_user(env->dregs[0], &sc->sc_d0); - err |= __put_user(env->dregs[1], &sc->sc_d1); - err |= __put_user(env->aregs[0], &sc->sc_a0); - err |= __put_user(env->aregs[1], &sc->sc_a1); - err |= __put_user(env->sr, &sc->sc_sr); - err |= __put_user(env->pc, &sc->sc_pc); - - return err; + __put_user(mask, &sc->sc_mask); + __put_user(env->aregs[7], &sc->sc_usp); + __put_user(env->dregs[0], &sc->sc_d0); + __put_user(env->dregs[1], &sc->sc_d1); + __put_user(env->aregs[0], &sc->sc_a0); + __put_user(env->aregs[1], &sc->sc_a1); + __put_user(env->sr, &sc->sc_sr); + __put_user(env->pc, &sc->sc_pc); } -static int +static void restore_sigcontext(CPUM68KState *env, struct target_sigcontext *sc, int *pd0) { - int err = 0; int temp; - err |= __get_user(env->aregs[7], &sc->sc_usp); - err |= __get_user(env->dregs[1], &sc->sc_d1); - err |= __get_user(env->aregs[0], &sc->sc_a0); - err |= __get_user(env->aregs[1], &sc->sc_a1); - err |= __get_user(env->pc, &sc->sc_pc); - err |= __get_user(temp, &sc->sc_sr); + __get_user(env->aregs[7], &sc->sc_usp); + __get_user(env->dregs[1], &sc->sc_d1); + __get_user(env->aregs[0], &sc->sc_a0); + __get_user(env->aregs[1], &sc->sc_a1); + __get_user(env->pc, &sc->sc_pc); + __get_user(temp, &sc->sc_sr); env->sr = (env->sr & 0xff00) | (temp & 0xff); *pd0 = tswapl(sc->sc_d0); - - return err; } /* @@ -5172,40 +4975,33 @@ static void setup_frame(int sig, struct target_sigaction *ka, abi_ulong frame_addr; abi_ulong retcode_addr; abi_ulong sc_addr; - int err = 0; int i; frame_addr = get_sigframe(ka, env, sizeof *frame); if (!lock_user_struct(VERIFY_WRITE, frame, frame_addr, 0)) goto give_sigsegv; - err |= __put_user(sig, &frame->sig); + __put_user(sig, &frame->sig); sc_addr = frame_addr + offsetof(struct target_sigframe, sc); - err |= __put_user(sc_addr, &frame->psc); + __put_user(sc_addr, &frame->psc); - err |= setup_sigcontext(&frame->sc, env, set->sig[0]); - if (err) - goto give_sigsegv; + setup_sigcontext(&frame->sc, env, set->sig[0]); for(i = 1; i < TARGET_NSIG_WORDS; i++) { - if (__put_user(set->sig[i], &frame->extramask[i - 1])) - goto give_sigsegv; + __put_user(set->sig[i], &frame->extramask[i - 1]); } /* Set up to return from userspace. */ retcode_addr = frame_addr + offsetof(struct target_sigframe, retcode); - err |= __put_user(retcode_addr, &frame->pretcode); + __put_user(retcode_addr, &frame->pretcode); /* moveq #,d0; trap #0 */ - err |= __put_user(0x70004e40 + (TARGET_NR_sigreturn << 16), + __put_user(0x70004e40 + (TARGET_NR_sigreturn << 16), (long *)(frame->retcode)); - if (err) - goto give_sigsegv; - /* Set up to return from userspace */ env->aregs[7] = frame_addr; @@ -5215,7 +5011,6 @@ static void setup_frame(int sig, struct target_sigaction *ka, return; give_sigsegv: - unlock_user_struct(frame, frame_addr, 1); force_sig(TARGET_SIGSEGV); } @@ -5223,29 +5018,28 @@ static inline int target_rt_setup_ucontext(struct target_ucontext *uc, CPUM68KState *env) { target_greg_t *gregs = uc->tuc_mcontext.gregs; - int err; - err = __put_user(TARGET_MCONTEXT_VERSION, &uc->tuc_mcontext.version); - err |= __put_user(env->dregs[0], &gregs[0]); - err |= __put_user(env->dregs[1], &gregs[1]); - err |= __put_user(env->dregs[2], &gregs[2]); - err |= __put_user(env->dregs[3], &gregs[3]); - err |= __put_user(env->dregs[4], &gregs[4]); - err |= __put_user(env->dregs[5], &gregs[5]); - err |= __put_user(env->dregs[6], &gregs[6]); - err |= __put_user(env->dregs[7], &gregs[7]); - err |= __put_user(env->aregs[0], &gregs[8]); - err |= __put_user(env->aregs[1], &gregs[9]); - err |= __put_user(env->aregs[2], &gregs[10]); - err |= __put_user(env->aregs[3], &gregs[11]); - err |= __put_user(env->aregs[4], &gregs[12]); - err |= __put_user(env->aregs[5], &gregs[13]); - err |= __put_user(env->aregs[6], &gregs[14]); - err |= __put_user(env->aregs[7], &gregs[15]); - err |= __put_user(env->pc, &gregs[16]); - err |= __put_user(env->sr, &gregs[17]); + __put_user(TARGET_MCONTEXT_VERSION, &uc->tuc_mcontext.version); + __put_user(env->dregs[0], &gregs[0]); + __put_user(env->dregs[1], &gregs[1]); + __put_user(env->dregs[2], &gregs[2]); + __put_user(env->dregs[3], &gregs[3]); + __put_user(env->dregs[4], &gregs[4]); + __put_user(env->dregs[5], &gregs[5]); + __put_user(env->dregs[6], &gregs[6]); + __put_user(env->dregs[7], &gregs[7]); + __put_user(env->aregs[0], &gregs[8]); + __put_user(env->aregs[1], &gregs[9]); + __put_user(env->aregs[2], &gregs[10]); + __put_user(env->aregs[3], &gregs[11]); + __put_user(env->aregs[4], &gregs[12]); + __put_user(env->aregs[5], &gregs[13]); + __put_user(env->aregs[6], &gregs[14]); + __put_user(env->aregs[7], &gregs[15]); + __put_user(env->pc, &gregs[16]); + __put_user(env->sr, &gregs[17]); - return err; + return 0; } static inline int target_rt_restore_ucontext(CPUM68KState *env, @@ -5253,36 +5047,35 @@ static inline int target_rt_restore_ucontext(CPUM68KState *env, int *pd0) { int temp; - int err; target_greg_t *gregs = uc->tuc_mcontext.gregs; - err = __get_user(temp, &uc->tuc_mcontext.version); + __get_user(temp, &uc->tuc_mcontext.version); if (temp != TARGET_MCONTEXT_VERSION) goto badframe; /* restore passed registers */ - err |= __get_user(env->dregs[0], &gregs[0]); - err |= __get_user(env->dregs[1], &gregs[1]); - err |= __get_user(env->dregs[2], &gregs[2]); - err |= __get_user(env->dregs[3], &gregs[3]); - err |= __get_user(env->dregs[4], &gregs[4]); - err |= __get_user(env->dregs[5], &gregs[5]); - err |= __get_user(env->dregs[6], &gregs[6]); - err |= __get_user(env->dregs[7], &gregs[7]); - err |= __get_user(env->aregs[0], &gregs[8]); - err |= __get_user(env->aregs[1], &gregs[9]); - err |= __get_user(env->aregs[2], &gregs[10]); - err |= __get_user(env->aregs[3], &gregs[11]); - err |= __get_user(env->aregs[4], &gregs[12]); - err |= __get_user(env->aregs[5], &gregs[13]); - err |= __get_user(env->aregs[6], &gregs[14]); - err |= __get_user(env->aregs[7], &gregs[15]); - err |= __get_user(env->pc, &gregs[16]); - err |= __get_user(temp, &gregs[17]); + __get_user(env->dregs[0], &gregs[0]); + __get_user(env->dregs[1], &gregs[1]); + __get_user(env->dregs[2], &gregs[2]); + __get_user(env->dregs[3], &gregs[3]); + __get_user(env->dregs[4], &gregs[4]); + __get_user(env->dregs[5], &gregs[5]); + __get_user(env->dregs[6], &gregs[6]); + __get_user(env->dregs[7], &gregs[7]); + __get_user(env->aregs[0], &gregs[8]); + __get_user(env->aregs[1], &gregs[9]); + __get_user(env->aregs[2], &gregs[10]); + __get_user(env->aregs[3], &gregs[11]); + __get_user(env->aregs[4], &gregs[12]); + __get_user(env->aregs[5], &gregs[13]); + __get_user(env->aregs[6], &gregs[14]); + __get_user(env->aregs[7], &gregs[15]); + __get_user(env->pc, &gregs[16]); + __get_user(temp, &gregs[17]); env->sr = (env->sr & 0xff00) | (temp & 0xff); *pd0 = env->dregs[0]; - return err; + return 0; badframe: return 1; @@ -5304,46 +5097,45 @@ static void setup_rt_frame(int sig, struct target_sigaction *ka, if (!lock_user_struct(VERIFY_WRITE, frame, frame_addr, 0)) goto give_sigsegv; - err |= __put_user(sig, &frame->sig); + __put_user(sig, &frame->sig); info_addr = frame_addr + offsetof(struct target_rt_sigframe, info); - err |= __put_user(info_addr, &frame->pinfo); + __put_user(info_addr, &frame->pinfo); uc_addr = frame_addr + offsetof(struct target_rt_sigframe, uc); - err |= __put_user(uc_addr, &frame->puc); + __put_user(uc_addr, &frame->puc); - err |= copy_siginfo_to_user(&frame->info, info); + copy_siginfo_to_user(&frame->info, info); /* Create the ucontext */ - err |= __put_user(0, &frame->uc.tuc_flags); - err |= __put_user(0, &frame->uc.tuc_link); - err |= __put_user(target_sigaltstack_used.ss_sp, - &frame->uc.tuc_stack.ss_sp); - err |= __put_user(sas_ss_flags(env->aregs[7]), - &frame->uc.tuc_stack.ss_flags); - err |= __put_user(target_sigaltstack_used.ss_size, - &frame->uc.tuc_stack.ss_size); + __put_user(0, &frame->uc.tuc_flags); + __put_user(0, &frame->uc.tuc_link); + __put_user(target_sigaltstack_used.ss_sp, + &frame->uc.tuc_stack.ss_sp); + __put_user(sas_ss_flags(env->aregs[7]), + &frame->uc.tuc_stack.ss_flags); + __put_user(target_sigaltstack_used.ss_size, + &frame->uc.tuc_stack.ss_size); err |= target_rt_setup_ucontext(&frame->uc, env); if (err) goto give_sigsegv; for(i = 0; i < TARGET_NSIG_WORDS; i++) { - if (__put_user(set->sig[i], &frame->uc.tuc_sigmask.sig[i])) - goto give_sigsegv; + __put_user(set->sig[i], &frame->uc.tuc_sigmask.sig[i]); } /* Set up to return from userspace. */ retcode_addr = frame_addr + offsetof(struct target_sigframe, retcode); - err |= __put_user(retcode_addr, &frame->pretcode); + __put_user(retcode_addr, &frame->pretcode); /* moveq #,d0; notb d0; trap #0 */ - err |= __put_user(0x70004600 + ((TARGET_NR_rt_sigreturn ^ 0xff) << 16), - (long *)(frame->retcode + 0)); - err |= __put_user(0x4e40, (short *)(frame->retcode + 4)); + __put_user(0x70004600 + ((TARGET_NR_rt_sigreturn ^ 0xff) << 16), + (long *)(frame->retcode + 0)); + __put_user(0x4e40, (short *)(frame->retcode + 4)); if (err) goto give_sigsegv; @@ -5374,12 +5166,10 @@ long do_sigreturn(CPUM68KState *env) /* set blocked signals */ - if (__get_user(target_set.sig[0], &frame->sc.sc_mask)) - goto badframe; + __get_user(target_set.sig[0], &frame->sc.sc_mask); for(i = 1; i < TARGET_NSIG_WORDS; i++) { - if (__get_user(target_set.sig[i], &frame->extramask[i - 1])) - goto badframe; + __get_user(target_set.sig[i], &frame->extramask[i - 1]); } target_to_host_sigset_internal(&set, &target_set); @@ -5387,14 +5177,12 @@ long do_sigreturn(CPUM68KState *env) /* restore registers */ - if (restore_sigcontext(env, &frame->sc, &d0)) - goto badframe; + restore_sigcontext(env, &frame->sc, &d0); unlock_user_struct(frame, frame_addr, 0); return d0; badframe: - unlock_user_struct(frame, frame_addr, 0); force_sig(TARGET_SIGSEGV); return 0; } @@ -5480,53 +5268,49 @@ struct target_rt_sigframe { #define INSN_LDI_R0 0x201f0000 #define INSN_CALLSYS 0x00000083 -static int setup_sigcontext(struct target_sigcontext *sc, CPUAlphaState *env, +static void setup_sigcontext(struct target_sigcontext *sc, CPUAlphaState *env, abi_ulong frame_addr, target_sigset_t *set) { - int i, err = 0; + int i; - err |= __put_user(on_sig_stack(frame_addr), &sc->sc_onstack); - err |= __put_user(set->sig[0], &sc->sc_mask); - err |= __put_user(env->pc, &sc->sc_pc); - err |= __put_user(8, &sc->sc_ps); + __put_user(on_sig_stack(frame_addr), &sc->sc_onstack); + __put_user(set->sig[0], &sc->sc_mask); + __put_user(env->pc, &sc->sc_pc); + __put_user(8, &sc->sc_ps); for (i = 0; i < 31; ++i) { - err |= __put_user(env->ir[i], &sc->sc_regs[i]); + __put_user(env->ir[i], &sc->sc_regs[i]); } - err |= __put_user(0, &sc->sc_regs[31]); + __put_user(0, &sc->sc_regs[31]); for (i = 0; i < 31; ++i) { - err |= __put_user(env->fir[i], &sc->sc_fpregs[i]); + __put_user(env->fir[i], &sc->sc_fpregs[i]); } - err |= __put_user(0, &sc->sc_fpregs[31]); - err |= __put_user(cpu_alpha_load_fpcr(env), &sc->sc_fpcr); - - err |= __put_user(0, &sc->sc_traparg_a0); /* FIXME */ - err |= __put_user(0, &sc->sc_traparg_a1); /* FIXME */ - err |= __put_user(0, &sc->sc_traparg_a2); /* FIXME */ + __put_user(0, &sc->sc_fpregs[31]); + __put_user(cpu_alpha_load_fpcr(env), &sc->sc_fpcr); - return err; + __put_user(0, &sc->sc_traparg_a0); /* FIXME */ + __put_user(0, &sc->sc_traparg_a1); /* FIXME */ + __put_user(0, &sc->sc_traparg_a2); /* FIXME */ } -static int restore_sigcontext(CPUAlphaState *env, +static void restore_sigcontext(CPUAlphaState *env, struct target_sigcontext *sc) { uint64_t fpcr; - int i, err = 0; + int i; - err |= __get_user(env->pc, &sc->sc_pc); + __get_user(env->pc, &sc->sc_pc); for (i = 0; i < 31; ++i) { - err |= __get_user(env->ir[i], &sc->sc_regs[i]); + __get_user(env->ir[i], &sc->sc_regs[i]); } for (i = 0; i < 31; ++i) { - err |= __get_user(env->fir[i], &sc->sc_fpregs[i]); + __get_user(env->fir[i], &sc->sc_fpregs[i]); } - err |= __get_user(fpcr, &sc->sc_fpcr); + __get_user(fpcr, &sc->sc_fpcr); cpu_alpha_store_fpcr(env, fpcr); - - return err; } static inline abi_ulong get_sigframe(struct target_sigaction *sa, @@ -5554,15 +5338,15 @@ static void setup_frame(int sig, struct target_sigaction *ka, goto give_sigsegv; } - err |= setup_sigcontext(&frame->sc, env, frame_addr, set); + setup_sigcontext(&frame->sc, env, frame_addr, set); if (ka->sa_restorer) { r26 = ka->sa_restorer; } else { - err |= __put_user(INSN_MOV_R30_R16, &frame->retcode[0]); - err |= __put_user(INSN_LDI_R0 + TARGET_NR_sigreturn, - &frame->retcode[1]); - err |= __put_user(INSN_CALLSYS, &frame->retcode[2]); + __put_user(INSN_MOV_R30_R16, &frame->retcode[0]); + __put_user(INSN_LDI_R0 + TARGET_NR_sigreturn, + &frame->retcode[1]); + __put_user(INSN_CALLSYS, &frame->retcode[2]); /* imb() */ r26 = frame_addr; } @@ -5598,29 +5382,29 @@ static void setup_rt_frame(int sig, struct target_sigaction *ka, goto give_sigsegv; } - err |= copy_siginfo_to_user(&frame->info, info); + copy_siginfo_to_user(&frame->info, info); - err |= __put_user(0, &frame->uc.tuc_flags); - err |= __put_user(0, &frame->uc.tuc_link); - err |= __put_user(set->sig[0], &frame->uc.tuc_osf_sigmask); - err |= __put_user(target_sigaltstack_used.ss_sp, - &frame->uc.tuc_stack.ss_sp); - err |= __put_user(sas_ss_flags(env->ir[IR_SP]), - &frame->uc.tuc_stack.ss_flags); - err |= __put_user(target_sigaltstack_used.ss_size, - &frame->uc.tuc_stack.ss_size); - err |= setup_sigcontext(&frame->uc.tuc_mcontext, env, frame_addr, set); + __put_user(0, &frame->uc.tuc_flags); + __put_user(0, &frame->uc.tuc_link); + __put_user(set->sig[0], &frame->uc.tuc_osf_sigmask); + __put_user(target_sigaltstack_used.ss_sp, + &frame->uc.tuc_stack.ss_sp); + __put_user(sas_ss_flags(env->ir[IR_SP]), + &frame->uc.tuc_stack.ss_flags); + __put_user(target_sigaltstack_used.ss_size, + &frame->uc.tuc_stack.ss_size); + setup_sigcontext(&frame->uc.tuc_mcontext, env, frame_addr, set); for (i = 0; i < TARGET_NSIG_WORDS; ++i) { - err |= __put_user(set->sig[i], &frame->uc.tuc_sigmask.sig[i]); + __put_user(set->sig[i], &frame->uc.tuc_sigmask.sig[i]); } if (ka->sa_restorer) { r26 = ka->sa_restorer; } else { - err |= __put_user(INSN_MOV_R30_R16, &frame->retcode[0]); - err |= __put_user(INSN_LDI_R0 + TARGET_NR_rt_sigreturn, - &frame->retcode[1]); - err |= __put_user(INSN_CALLSYS, &frame->retcode[2]); + __put_user(INSN_MOV_R30_R16, &frame->retcode[0]); + __put_user(INSN_LDI_R0 + TARGET_NR_rt_sigreturn, + &frame->retcode[1]); + __put_user(INSN_CALLSYS, &frame->retcode[2]); /* imb(); */ r26 = frame_addr; } @@ -5653,21 +5437,16 @@ long do_sigreturn(CPUAlphaState *env) } target_sigemptyset(&target_set); - if (__get_user(target_set.sig[0], &sc->sc_mask)) { - goto badframe; - } + __get_user(target_set.sig[0], &sc->sc_mask); target_to_host_sigset_internal(&set, &target_set); do_sigprocmask(SIG_SETMASK, &set, NULL); - if (restore_sigcontext(env, sc)) { - goto badframe; - } + restore_sigcontext(env, sc); unlock_user_struct(sc, sc_addr, 0); return env->ir[IR_V0]; badframe: - unlock_user_struct(sc, sc_addr, 0); force_sig(TARGET_SIGSEGV); } @@ -5683,9 +5462,7 @@ long do_rt_sigreturn(CPUAlphaState *env) target_to_host_sigset(&set, &frame->uc.tuc_sigmask); do_sigprocmask(SIG_SETMASK, &set, NULL); - if (restore_sigcontext(env, &frame->uc.tuc_mcontext)) { - goto badframe; - } + restore_sigcontext(env, &frame->uc.tuc_mcontext); if (do_sigaltstack(frame_addr + offsetof(struct target_rt_sigframe, uc.tuc_stack), 0, env->ir[IR_SP]) == -EFAULT) { diff --git a/linux-user/syscall.c b/linux-user/syscall.c index 6efeeff2bf..7d7407920b 100644 --- a/linux-user/syscall.c +++ b/linux-user/syscall.c @@ -1856,7 +1856,7 @@ static abi_long do_socket(int domain, int type, int protocol) } if (domain == PF_NETLINK) - return -EAFNOSUPPORT; /* do not NETLINK socket connections possible */ + return -TARGET_EAFNOSUPPORT; ret = get_errno(socket(domain, type, protocol)); if (ret >= 0) { ret = sock_flags_fixup(ret, target_type); @@ -7438,6 +7438,22 @@ abi_long do_syscall(void *cpu_env, int num, abi_long arg1, ret = get_errno(sys_sched_getaffinity(arg1, mask_size, mask)); if (!is_error(ret)) { + if (ret > arg2) { + /* More data returned than the caller's buffer will fit. + * This only happens if sizeof(abi_long) < sizeof(long) + * and the caller passed us a buffer holding an odd number + * of abi_longs. If the host kernel is actually using the + * extra 4 bytes then fail EINVAL; otherwise we can just + * ignore them and only copy the interesting part. + */ + int numcpus = sysconf(_SC_NPROCESSORS_CONF); + if (numcpus > arg2 * 8) { + ret = -TARGET_EINVAL; + break; + } + ret = arg2; + } + if (copy_to_user(arg3, mask, ret)) { goto efault; } @@ -8686,7 +8702,7 @@ abi_long do_syscall(void *cpu_env, int num, abi_long arg1, #ifdef TARGET_NR_set_thread_area case TARGET_NR_set_thread_area: #if defined(TARGET_MIPS) - ((CPUMIPSState *) cpu_env)->tls_value = arg1; + ((CPUMIPSState *) cpu_env)->active_tc.CP0_UserLocal = arg1; ret = 0; break; #elif defined(TARGET_CRIS) diff --git a/linux-user/uname.c b/linux-user/uname.c index f5d4c66f59..1e6560d724 100644 --- a/linux-user/uname.c +++ b/linux-user/uname.c @@ -52,9 +52,7 @@ const char *cpu_to_uname_machine(void *cpu_env) /* earliest emulated CPU is ARMv5TE; qemu can emulate the 1026, but not its * Jazelle support */ return "armv5te" utsname_suffix; -#elif defined(TARGET_X86_64) - return "x86-64"; -#elif defined(TARGET_I386) +#elif defined(TARGET_I386) && !defined(TARGET_X86_64) /* see arch/x86/kernel/cpu/bugs.c: check_bugs(), 386, 486, 586, 686 */ CPUState *cpu = ENV_GET_CPU((CPUX86State *)cpu_env); int family = object_property_get_int(OBJECT(cpu), "family", NULL); @@ -23,11 +23,13 @@ #include "exec/memory-internal.h" #include "exec/ram_addr.h" +#include "sysemu/sysemu.h" //#define DEBUG_UNASSIGNED static unsigned memory_region_transaction_depth; static bool memory_region_update_pending; +static bool ioeventfd_update_pending; static bool global_dirty_log = false; /* flat_view_mutex is taken around reading as->current_map; the critical @@ -484,15 +486,15 @@ static AddressSpace *memory_region_to_address_space(MemoryRegion *mr) { AddressSpace *as; - while (mr->parent) { - mr = mr->parent; + while (mr->container) { + mr = mr->container; } QTAILQ_FOREACH(as, &address_spaces, address_spaces_link) { if (mr == as->root) { return as; } } - abort(); + return NULL; } /* Render a memory region into the global view. Ranges in @view obscure @@ -786,22 +788,34 @@ void memory_region_transaction_begin(void) ++memory_region_transaction_depth; } +static void memory_region_clear_pending(void) +{ + memory_region_update_pending = false; + ioeventfd_update_pending = false; +} + void memory_region_transaction_commit(void) { AddressSpace *as; assert(memory_region_transaction_depth); --memory_region_transaction_depth; - if (!memory_region_transaction_depth && memory_region_update_pending) { - memory_region_update_pending = false; - MEMORY_LISTENER_CALL_GLOBAL(begin, Forward); + if (!memory_region_transaction_depth) { + if (memory_region_update_pending) { + MEMORY_LISTENER_CALL_GLOBAL(begin, Forward); - QTAILQ_FOREACH(as, &address_spaces, address_spaces_link) { - address_space_update_topology(as); - } + QTAILQ_FOREACH(as, &address_spaces, address_spaces_link) { + address_space_update_topology(as); + } - MEMORY_LISTENER_CALL_GLOBAL(commit, Forward); - } + MEMORY_LISTENER_CALL_GLOBAL(commit, Forward); + } else if (ioeventfd_update_pending) { + QTAILQ_FOREACH(as, &address_spaces, address_spaces_link) { + address_space_update_ioeventfds(as); + } + } + memory_region_clear_pending(); + } } static void memory_region_destructor_none(MemoryRegion *mr) @@ -837,7 +851,7 @@ void memory_region_init(MemoryRegion *mr, mr->opaque = NULL; mr->owner = owner; mr->iommu_ops = NULL; - mr->parent = NULL; + mr->container = NULL; mr->size = int128_make64(size); if (size == UINT64_MAX) { mr->size = int128_2_64(); @@ -1019,6 +1033,23 @@ void memory_region_init_ram(MemoryRegion *mr, mr->ram_addr = qemu_ram_alloc(size, mr); } +#ifdef __linux__ +void memory_region_init_ram_from_file(MemoryRegion *mr, + struct Object *owner, + const char *name, + uint64_t size, + bool share, + const char *path, + Error **errp) +{ + memory_region_init(mr, owner, name, size); + mr->ram = true; + mr->terminates = true; + mr->destructor = memory_region_destructor_ram; + mr->ram_addr = qemu_ram_alloc_from_file(size, mr, share, path, errp); +} +#endif + void memory_region_init_ram_ptr(MemoryRegion *mr, Object *owner, const char *name, @@ -1241,6 +1272,17 @@ void memory_region_reset_dirty(MemoryRegion *mr, hwaddr addr, cpu_physical_memory_reset_dirty(mr->ram_addr + addr, size, client); } +int memory_region_get_fd(MemoryRegion *mr) +{ + if (mr->alias) { + return memory_region_get_fd(mr->alias); + } + + assert(mr->terminates); + + return qemu_get_ram_fd(mr->ram_addr & TARGET_PAGE_MASK); +} + void *memory_region_get_ram_ptr(MemoryRegion *mr) { if (mr->alias) { @@ -1319,6 +1361,7 @@ void memory_region_add_coalescing(MemoryRegion *mr, void memory_region_clear_coalescing(MemoryRegion *mr) { CoalescedMemoryRange *cmr; + bool updated = false; qemu_flush_coalesced_mmio_buffer(); mr->flush_coalesced_mmio = false; @@ -1327,8 +1370,12 @@ void memory_region_clear_coalescing(MemoryRegion *mr) cmr = QTAILQ_FIRST(&mr->coalesced); QTAILQ_REMOVE(&mr->coalesced, cmr, link); g_free(cmr); + updated = true; + } + + if (updated) { + memory_region_update_coalesced_range(mr); } - memory_region_update_coalesced_range(mr); } void memory_region_set_flush_coalesced(MemoryRegion *mr) @@ -1373,7 +1420,7 @@ void memory_region_add_eventfd(MemoryRegion *mr, memmove(&mr->ioeventfds[i+1], &mr->ioeventfds[i], sizeof(*mr->ioeventfds) * (mr->ioeventfd_nb-1 - i)); mr->ioeventfds[i] = mrfd; - memory_region_update_pending |= mr->enabled; + ioeventfd_update_pending |= mr->enabled; memory_region_transaction_commit(); } @@ -1406,22 +1453,19 @@ void memory_region_del_eventfd(MemoryRegion *mr, --mr->ioeventfd_nb; mr->ioeventfds = g_realloc(mr->ioeventfds, sizeof(*mr->ioeventfds)*mr->ioeventfd_nb + 1); - memory_region_update_pending |= mr->enabled; + ioeventfd_update_pending |= mr->enabled; memory_region_transaction_commit(); } -static void memory_region_add_subregion_common(MemoryRegion *mr, - hwaddr offset, - MemoryRegion *subregion) +static void memory_region_update_container_subregions(MemoryRegion *subregion) { + hwaddr offset = subregion->addr; + MemoryRegion *mr = subregion->container; MemoryRegion *other; memory_region_transaction_begin(); - assert(!subregion->parent); memory_region_ref(subregion); - subregion->parent = mr; - subregion->addr = offset; QTAILQ_FOREACH(other, &mr->subregions, subregions_link) { if (subregion->may_overlap || other->may_overlap) { continue; @@ -1455,6 +1499,15 @@ done: memory_region_transaction_commit(); } +static void memory_region_add_subregion_common(MemoryRegion *mr, + hwaddr offset, + MemoryRegion *subregion) +{ + assert(!subregion->container); + subregion->container = mr; + subregion->addr = offset; + memory_region_update_container_subregions(subregion); +} void memory_region_add_subregion(MemoryRegion *mr, hwaddr offset, @@ -1479,8 +1532,8 @@ void memory_region_del_subregion(MemoryRegion *mr, MemoryRegion *subregion) { memory_region_transaction_begin(); - assert(subregion->parent == mr); - subregion->parent = NULL; + assert(subregion->container == mr); + subregion->container = NULL; QTAILQ_REMOVE(&mr->subregions, subregion, subregions_link); memory_region_unref(subregion); memory_region_update_pending |= mr->enabled && subregion->enabled; @@ -1498,27 +1551,27 @@ void memory_region_set_enabled(MemoryRegion *mr, bool enabled) memory_region_transaction_commit(); } -void memory_region_set_address(MemoryRegion *mr, hwaddr addr) +static void memory_region_readd_subregion(MemoryRegion *mr) { - MemoryRegion *parent = mr->parent; - int priority = mr->priority; - bool may_overlap = mr->may_overlap; + MemoryRegion *container = mr->container; - if (addr == mr->addr || !parent) { - mr->addr = addr; - return; + if (container) { + memory_region_transaction_begin(); + memory_region_ref(mr); + memory_region_del_subregion(container, mr); + mr->container = container; + memory_region_update_container_subregions(mr); + memory_region_unref(mr); + memory_region_transaction_commit(); } +} - memory_region_transaction_begin(); - memory_region_ref(mr); - memory_region_del_subregion(parent, mr); - if (may_overlap) { - memory_region_add_subregion_overlap(parent, addr, mr, priority); - } else { - memory_region_add_subregion(parent, addr, mr); +void memory_region_set_address(MemoryRegion *mr, hwaddr addr) +{ + if (addr != mr->addr) { + mr->addr = addr; + memory_region_readd_subregion(mr); } - memory_region_unref(mr); - memory_region_transaction_commit(); } void memory_region_set_alias_offset(MemoryRegion *mr, hwaddr offset) @@ -1559,16 +1612,21 @@ static FlatRange *flatview_lookup(FlatView *view, AddrRange addr) sizeof(FlatRange), cmp_flatrange_addr); } -bool memory_region_present(MemoryRegion *parent, hwaddr addr) +bool memory_region_present(MemoryRegion *container, hwaddr addr) { - MemoryRegion *mr = memory_region_find(parent, addr, 1).mr; - if (!mr || (mr == parent)) { + MemoryRegion *mr = memory_region_find(container, addr, 1).mr; + if (!mr || (mr == container)) { return false; } memory_region_unref(mr); return true; } +bool memory_region_is_mapped(MemoryRegion *mr) +{ + return mr->container ? true : false; +} + MemoryRegionSection memory_region_find(MemoryRegion *mr, hwaddr addr, uint64_t size) { @@ -1580,12 +1638,15 @@ MemoryRegionSection memory_region_find(MemoryRegion *mr, FlatRange *fr; addr += mr->addr; - for (root = mr; root->parent; ) { - root = root->parent; + for (root = mr; root->container; ) { + root = root->container; addr += root->addr; } as = memory_region_to_address_space(root); + if (!as) { + return ret; + } range = addrrange_make(int128_make64(addr), int128_make64(size)); view = address_space_get_flatview(as); @@ -487,6 +487,7 @@ static const char *monitor_event_names[] = { [QEVENT_BLOCK_IMAGE_CORRUPTED] = "BLOCK_IMAGE_CORRUPTED", [QEVENT_QUORUM_FAILURE] = "QUORUM_FAILURE", [QEVENT_QUORUM_REPORT_BAD] = "QUORUM_REPORT_BAD", + [QEVENT_ACPI_OST] = "ACPI_DEVICE_OST", }; QEMU_BUILD_BUG_ON(ARRAY_SIZE(monitor_event_names) != QEVENT_MAX) @@ -2011,7 +2012,7 @@ static void do_info_numa(Monitor *mon, const QDict *qdict) } monitor_printf(mon, "\n"); monitor_printf(mon, "node %d size: %" PRId64 " MB\n", i, - node_mem[i] >> 20); + numa_info[i].node_mem >> 20); } } @@ -2964,6 +2965,13 @@ static mon_cmd_t info_cmds[] = { .mhandler.cmd = hmp_info_tpm, }, { + .name = "memdev", + .args_type = "", + .params = "", + .help = "show the memory device", + .mhandler.cmd = hmp_info_memdev, + }, + { .name = NULL, }, }; diff --git a/net/Makefile.objs b/net/Makefile.objs index c25fe6920c..301f6b6b51 100644 --- a/net/Makefile.objs +++ b/net/Makefile.objs @@ -2,7 +2,7 @@ common-obj-y = net.o queue.o checksum.o util.o hub.o common-obj-y += socket.o common-obj-y += dump.o common-obj-y += eth.o -common-obj-$(CONFIG_POSIX) += tap.o +common-obj-$(CONFIG_POSIX) += tap.o vhost-user.o common-obj-$(CONFIG_LINUX) += tap-linux.o common-obj-$(CONFIG_WIN32) += tap-win32.o common-obj-$(CONFIG_BSD) += tap-bsd.o diff --git a/net/clients.h b/net/clients.h index 7322ff5f33..7f3d4ae9f3 100644 --- a/net/clients.h +++ b/net/clients.h @@ -57,4 +57,7 @@ int net_init_netmap(const NetClientOptions *opts, const char *name, NetClientState *peer); #endif +int net_init_vhost_user(const NetClientOptions *opts, const char *name, + NetClientState *peer); + #endif /* QEMU_NET_CLIENTS_H */ @@ -322,6 +322,7 @@ void net_hub_check_clients(void) case NET_CLIENT_OPTIONS_KIND_TAP: case NET_CLIENT_OPTIONS_KIND_SOCKET: case NET_CLIENT_OPTIONS_KIND_VDE: + case NET_CLIENT_OPTIONS_KIND_VHOST_USER: has_host_dev = 1; break; default: @@ -62,6 +62,7 @@ const char *host_net_devices[] = { #ifdef CONFIG_VDE "vde", #endif + "vhost-user", NULL, }; @@ -802,6 +803,9 @@ static int (* const net_client_init_fun[NET_CLIENT_OPTIONS_KIND_MAX])( [NET_CLIENT_OPTIONS_KIND_BRIDGE] = net_init_bridge, #endif [NET_CLIENT_OPTIONS_KIND_HUBPORT] = net_init_hubport, +#ifdef CONFIG_VHOST_NET_USED + [NET_CLIENT_OPTIONS_KIND_VHOST_USER] = net_init_vhost_user, +#endif }; @@ -835,6 +839,9 @@ static int net_client_init1(const void *object, int is_netdev, Error **errp) case NET_CLIENT_OPTIONS_KIND_BRIDGE: #endif case NET_CLIENT_OPTIONS_KIND_HUBPORT: +#ifdef CONFIG_VHOST_NET_USED + case NET_CLIENT_OPTIONS_KIND_VHOST_USER: +#endif break; default: @@ -594,6 +594,7 @@ static int net_init_tap_one(const NetdevTapOptions *tap, NetClientState *peer, int vnet_hdr, int fd) { TAPState *s; + int vhostfd; s = net_tap_fd_init(peer, model, name, fd, vnet_hdr); if (!s) { @@ -624,7 +625,11 @@ static int net_init_tap_one(const NetdevTapOptions *tap, NetClientState *peer, if (tap->has_vhost ? tap->vhost : vhostfdname || (tap->has_vhostforce && tap->vhostforce)) { - int vhostfd; + VhostNetOptions options; + + options.backend_type = VHOST_BACKEND_TYPE_KERNEL; + options.net_backend = &s->nc; + options.force = tap->has_vhostforce && tap->vhostforce; if (tap->has_vhostfd || tap->has_vhostfds) { vhostfd = monitor_handle_fd_param(cur_mon, vhostfdname); @@ -632,11 +637,16 @@ static int net_init_tap_one(const NetdevTapOptions *tap, NetClientState *peer, return -1; } } else { - vhostfd = -1; + vhostfd = open("/dev/vhost-net", O_RDWR); + if (vhostfd < 0) { + error_report("tap: open vhost char device failed: %s", + strerror(errno)); + return -1; + } } + options.opaque = (void *)(uintptr_t)vhostfd; - s->vhost_net = vhost_net_init(&s->nc, vhostfd, - tap->has_vhostforce && tap->vhostforce); + s->vhost_net = vhost_net_init(&options); if (!s->vhost_net) { error_report("vhost-net requested but could not be initialized"); return -1; diff --git a/net/vhost-user.c b/net/vhost-user.c new file mode 100644 index 0000000000..24e050c772 --- /dev/null +++ b/net/vhost-user.c @@ -0,0 +1,258 @@ +/* + * vhost-user.c + * + * Copyright (c) 2013 Virtual Open Systems Sarl. + * + * This work is licensed under the terms of the GNU GPL, version 2 or later. + * See the COPYING file in the top-level directory. + * + */ + +#include "clients.h" +#include "net/vhost_net.h" +#include "net/vhost-user.h" +#include "sysemu/char.h" +#include "qemu/config-file.h" +#include "qemu/error-report.h" + +typedef struct VhostUserState { + NetClientState nc; + CharDriverState *chr; + bool vhostforce; + VHostNetState *vhost_net; +} VhostUserState; + +typedef struct VhostUserChardevProps { + bool is_socket; + bool is_unix; + bool is_server; +} VhostUserChardevProps; + +VHostNetState *vhost_user_get_vhost_net(NetClientState *nc) +{ + VhostUserState *s = DO_UPCAST(VhostUserState, nc, nc); + assert(nc->info->type == NET_CLIENT_OPTIONS_KIND_VHOST_USER); + return s->vhost_net; +} + +static int vhost_user_running(VhostUserState *s) +{ + return (s->vhost_net) ? 1 : 0; +} + +static int vhost_user_start(VhostUserState *s) +{ + VhostNetOptions options; + + if (vhost_user_running(s)) { + return 0; + } + + options.backend_type = VHOST_BACKEND_TYPE_USER; + options.net_backend = &s->nc; + options.opaque = s->chr; + options.force = s->vhostforce; + + s->vhost_net = vhost_net_init(&options); + + return vhost_user_running(s) ? 0 : -1; +} + +static void vhost_user_stop(VhostUserState *s) +{ + if (vhost_user_running(s)) { + vhost_net_cleanup(s->vhost_net); + } + + s->vhost_net = 0; +} + +static void vhost_user_cleanup(NetClientState *nc) +{ + VhostUserState *s = DO_UPCAST(VhostUserState, nc, nc); + + vhost_user_stop(s); + qemu_purge_queued_packets(nc); +} + +static bool vhost_user_has_vnet_hdr(NetClientState *nc) +{ + assert(nc->info->type == NET_CLIENT_OPTIONS_KIND_VHOST_USER); + + return true; +} + +static bool vhost_user_has_ufo(NetClientState *nc) +{ + assert(nc->info->type == NET_CLIENT_OPTIONS_KIND_VHOST_USER); + + return true; +} + +static NetClientInfo net_vhost_user_info = { + .type = NET_CLIENT_OPTIONS_KIND_VHOST_USER, + .size = sizeof(VhostUserState), + .cleanup = vhost_user_cleanup, + .has_vnet_hdr = vhost_user_has_vnet_hdr, + .has_ufo = vhost_user_has_ufo, +}; + +static void net_vhost_link_down(VhostUserState *s, bool link_down) +{ + s->nc.link_down = link_down; + + if (s->nc.peer) { + s->nc.peer->link_down = link_down; + } + + if (s->nc.info->link_status_changed) { + s->nc.info->link_status_changed(&s->nc); + } + + if (s->nc.peer && s->nc.peer->info->link_status_changed) { + s->nc.peer->info->link_status_changed(s->nc.peer); + } +} + +static void net_vhost_user_event(void *opaque, int event) +{ + VhostUserState *s = opaque; + + switch (event) { + case CHR_EVENT_OPENED: + vhost_user_start(s); + net_vhost_link_down(s, false); + error_report("chardev \"%s\" went up\n", s->chr->label); + break; + case CHR_EVENT_CLOSED: + net_vhost_link_down(s, true); + vhost_user_stop(s); + error_report("chardev \"%s\" went down\n", s->chr->label); + break; + } +} + +static int net_vhost_user_init(NetClientState *peer, const char *device, + const char *name, CharDriverState *chr, + bool vhostforce) +{ + NetClientState *nc; + VhostUserState *s; + + nc = qemu_new_net_client(&net_vhost_user_info, peer, device, name); + + snprintf(nc->info_str, sizeof(nc->info_str), "vhost-user to %s", + chr->label); + + s = DO_UPCAST(VhostUserState, nc, nc); + + /* We don't provide a receive callback */ + s->nc.receive_disabled = 1; + s->chr = chr; + s->vhostforce = vhostforce; + + qemu_chr_add_handlers(s->chr, NULL, NULL, net_vhost_user_event, s); + + return 0; +} + +static int net_vhost_chardev_opts(const char *name, const char *value, + void *opaque) +{ + VhostUserChardevProps *props = opaque; + + if (strcmp(name, "backend") == 0 && strcmp(value, "socket") == 0) { + props->is_socket = true; + } else if (strcmp(name, "path") == 0) { + props->is_unix = true; + } else if (strcmp(name, "server") == 0) { + props->is_server = true; + } else { + error_report("vhost-user does not support a chardev" + " with the following option:\n %s = %s", + name, value); + return -1; + } + return 0; +} + +static CharDriverState *net_vhost_parse_chardev(const NetdevVhostUserOptions *opts) +{ + CharDriverState *chr = qemu_chr_find(opts->chardev); + VhostUserChardevProps props; + + if (chr == NULL) { + error_report("chardev \"%s\" not found", opts->chardev); + return NULL; + } + + /* inspect chardev opts */ + memset(&props, 0, sizeof(props)); + if (qemu_opt_foreach(chr->opts, net_vhost_chardev_opts, &props, true) != 0) { + return NULL; + } + + if (!props.is_socket || !props.is_unix) { + error_report("chardev \"%s\" is not a unix socket", + opts->chardev); + return NULL; + } + + qemu_chr_fe_claim_no_fail(chr); + + return chr; +} + +static int net_vhost_check_net(QemuOpts *opts, void *opaque) +{ + const char *name = opaque; + const char *driver, *netdev; + const char virtio_name[] = "virtio-net-"; + + driver = qemu_opt_get(opts, "driver"); + netdev = qemu_opt_get(opts, "netdev"); + + if (!driver || !netdev) { + return 0; + } + + if (strcmp(netdev, name) == 0 && + strncmp(driver, virtio_name, strlen(virtio_name)) != 0) { + error_report("vhost-user requires frontend driver virtio-net-*"); + return -1; + } + + return 0; +} + +int net_init_vhost_user(const NetClientOptions *opts, const char *name, + NetClientState *peer) +{ + const NetdevVhostUserOptions *vhost_user_opts; + CharDriverState *chr; + bool vhostforce; + + assert(opts->kind == NET_CLIENT_OPTIONS_KIND_VHOST_USER); + vhost_user_opts = opts->vhost_user; + + chr = net_vhost_parse_chardev(vhost_user_opts); + if (!chr) { + error_report("No suitable chardev found"); + return -1; + } + + /* verify net frontend */ + if (qemu_opts_foreach(qemu_find_opts("device"), net_vhost_check_net, + (char *)name, true) == -1) { + return -1; + } + + /* vhostforce for non-MSIX */ + if (vhost_user_opts->has_vhostforce) { + vhostforce = vhost_user_opts->vhostforce; + } else { + vhostforce = false; + } + + return net_vhost_user_init(peer, "vhost_user", name, chr, vhostforce); +} diff --git a/numa.c b/numa.c new file mode 100644 index 0000000000..e471afe04a --- /dev/null +++ b/numa.c @@ -0,0 +1,369 @@ +/* + * NUMA parameter parsing routines + * + * Copyright (c) 2014 Fujitsu Ltd. + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + * THE SOFTWARE. + */ + +#include "sysemu/sysemu.h" +#include "exec/cpu-common.h" +#include "qemu/bitmap.h" +#include "qom/cpu.h" +#include "qemu/error-report.h" +#include "include/exec/cpu-common.h" /* for RAM_ADDR_FMT */ +#include "qapi-visit.h" +#include "qapi/opts-visitor.h" +#include "qapi/dealloc-visitor.h" +#include "qapi/qmp/qerror.h" +#include "hw/boards.h" +#include "sysemu/hostmem.h" +#include "qmp-commands.h" + +QemuOptsList qemu_numa_opts = { + .name = "numa", + .implied_opt_name = "type", + .head = QTAILQ_HEAD_INITIALIZER(qemu_numa_opts.head), + .desc = { { 0 } } /* validated with OptsVisitor */ +}; + +static int have_memdevs = -1; + +static void numa_node_parse(NumaNodeOptions *node, QemuOpts *opts, Error **errp) +{ + uint16_t nodenr; + uint16List *cpus = NULL; + + if (node->has_nodeid) { + nodenr = node->nodeid; + } else { + nodenr = nb_numa_nodes; + } + + if (nodenr >= MAX_NODES) { + error_setg(errp, "Max number of NUMA nodes reached: %" + PRIu16 "\n", nodenr); + return; + } + + for (cpus = node->cpus; cpus; cpus = cpus->next) { + if (cpus->value > MAX_CPUMASK_BITS) { + error_setg(errp, "CPU number %" PRIu16 " is bigger than %d", + cpus->value, MAX_CPUMASK_BITS); + return; + } + bitmap_set(numa_info[nodenr].node_cpu, cpus->value, 1); + } + + if (node->has_mem && node->has_memdev) { + error_setg(errp, "qemu: cannot specify both mem= and memdev=\n"); + return; + } + + if (have_memdevs == -1) { + have_memdevs = node->has_memdev; + } + if (node->has_memdev != have_memdevs) { + error_setg(errp, "qemu: memdev option must be specified for either " + "all or no nodes\n"); + return; + } + + if (node->has_mem) { + uint64_t mem_size = node->mem; + const char *mem_str = qemu_opt_get(opts, "mem"); + /* Fix up legacy suffix-less format */ + if (g_ascii_isdigit(mem_str[strlen(mem_str) - 1])) { + mem_size <<= 20; + } + numa_info[nodenr].node_mem = mem_size; + } + if (node->has_memdev) { + Object *o; + o = object_resolve_path_type(node->memdev, TYPE_MEMORY_BACKEND, NULL); + if (!o) { + error_setg(errp, "memdev=%s is ambiguous", node->memdev); + return; + } + + object_ref(o); + numa_info[nodenr].node_mem = object_property_get_int(o, "size", NULL); + numa_info[nodenr].node_memdev = MEMORY_BACKEND(o); + } +} + +int numa_init_func(QemuOpts *opts, void *opaque) +{ + NumaOptions *object = NULL; + Error *err = NULL; + + { + OptsVisitor *ov = opts_visitor_new(opts); + visit_type_NumaOptions(opts_get_visitor(ov), &object, NULL, &err); + opts_visitor_cleanup(ov); + } + + if (err) { + goto error; + } + + switch (object->kind) { + case NUMA_OPTIONS_KIND_NODE: + numa_node_parse(object->node, opts, &err); + if (err) { + goto error; + } + nb_numa_nodes++; + break; + default: + abort(); + } + + return 0; + +error: + qerror_report_err(err); + error_free(err); + + if (object) { + QapiDeallocVisitor *dv = qapi_dealloc_visitor_new(); + visit_type_NumaOptions(qapi_dealloc_get_visitor(dv), + &object, NULL, NULL); + qapi_dealloc_visitor_cleanup(dv); + } + + return -1; +} + +void set_numa_nodes(void) +{ + if (nb_numa_nodes > 0) { + uint64_t numa_total; + int i; + + if (nb_numa_nodes > MAX_NODES) { + nb_numa_nodes = MAX_NODES; + } + + /* If no memory size if given for any node, assume the default case + * and distribute the available memory equally across all nodes + */ + for (i = 0; i < nb_numa_nodes; i++) { + if (numa_info[i].node_mem != 0) { + break; + } + } + if (i == nb_numa_nodes) { + uint64_t usedmem = 0; + + /* On Linux, the each node's border has to be 8MB aligned, + * the final node gets the rest. + */ + for (i = 0; i < nb_numa_nodes - 1; i++) { + numa_info[i].node_mem = (ram_size / nb_numa_nodes) & + ~((1 << 23UL) - 1); + usedmem += numa_info[i].node_mem; + } + numa_info[i].node_mem = ram_size - usedmem; + } + + numa_total = 0; + for (i = 0; i < nb_numa_nodes; i++) { + numa_total += numa_info[i].node_mem; + } + if (numa_total != ram_size) { + error_report("total memory for NUMA nodes (%" PRIu64 ")" + " should equal RAM size (" RAM_ADDR_FMT ")", + numa_total, ram_size); + exit(1); + } + + for (i = 0; i < nb_numa_nodes; i++) { + if (!bitmap_empty(numa_info[i].node_cpu, MAX_CPUMASK_BITS)) { + break; + } + } + /* assigning the VCPUs round-robin is easier to implement, guest OSes + * must cope with this anyway, because there are BIOSes out there in + * real machines which also use this scheme. + */ + if (i == nb_numa_nodes) { + for (i = 0; i < max_cpus; i++) { + set_bit(i, numa_info[i % nb_numa_nodes].node_cpu); + } + } + } +} + +void set_numa_modes(void) +{ + CPUState *cpu; + int i; + + CPU_FOREACH(cpu) { + for (i = 0; i < nb_numa_nodes; i++) { + if (test_bit(cpu->cpu_index, numa_info[i].node_cpu)) { + cpu->numa_node = i; + } + } + } +} + +static void allocate_system_memory_nonnuma(MemoryRegion *mr, Object *owner, + const char *name, + uint64_t ram_size) +{ + if (mem_path) { +#ifdef __linux__ + Error *err = NULL; + memory_region_init_ram_from_file(mr, owner, name, ram_size, false, + mem_path, &err); + + /* Legacy behavior: if allocation failed, fall back to + * regular RAM allocation. + */ + if (err) { + qerror_report_err(err); + error_free(err); + memory_region_init_ram(mr, owner, name, ram_size); + } +#else + fprintf(stderr, "-mem-path not supported on this host\n"); + exit(1); +#endif + } else { + memory_region_init_ram(mr, owner, name, ram_size); + } + vmstate_register_ram_global(mr); +} + +void memory_region_allocate_system_memory(MemoryRegion *mr, Object *owner, + const char *name, + uint64_t ram_size) +{ + uint64_t addr = 0; + int i; + + if (nb_numa_nodes == 0 || !have_memdevs) { + allocate_system_memory_nonnuma(mr, owner, name, ram_size); + return; + } + + memory_region_init(mr, owner, name, ram_size); + for (i = 0; i < MAX_NODES; i++) { + Error *local_err = NULL; + uint64_t size = numa_info[i].node_mem; + HostMemoryBackend *backend = numa_info[i].node_memdev; + if (!backend) { + continue; + } + MemoryRegion *seg = host_memory_backend_get_memory(backend, &local_err); + if (local_err) { + qerror_report_err(local_err); + exit(1); + } + + memory_region_add_subregion(mr, addr, seg); + vmstate_register_ram_global(seg); + addr += size; + } +} + +static int query_memdev(Object *obj, void *opaque) +{ + MemdevList **list = opaque; + Error *err = NULL; + + if (object_dynamic_cast(obj, TYPE_MEMORY_BACKEND)) { + MemdevList *m = g_malloc0(sizeof(*m)); + + m->value = g_malloc0(sizeof(*m->value)); + + m->value->size = object_property_get_int(obj, "size", + &err); + if (err) { + goto error; + } + + m->value->merge = object_property_get_bool(obj, "merge", + &err); + if (err) { + goto error; + } + + m->value->dump = object_property_get_bool(obj, "dump", + &err); + if (err) { + goto error; + } + + m->value->prealloc = object_property_get_bool(obj, + "prealloc", &err); + if (err) { + goto error; + } + + m->value->policy = object_property_get_enum(obj, + "policy", + HostMemPolicy_lookup, + &err); + if (err) { + goto error; + } + + object_property_get_uint16List(obj, "host-nodes", + &m->value->host_nodes, &err); + if (err) { + goto error; + } + + m->next = *list; + *list = m; + } + + return 0; +error: + return -1; +} + +MemdevList *qmp_query_memdev(Error **errp) +{ + Object *obj; + MemdevList *list = NULL, *m; + + obj = object_resolve_path("/objects", NULL); + if (obj == NULL) { + return NULL; + } + + if (object_child_foreach(obj, query_memdev, &list) != 0) { + goto error; + } + + return list; + +error: + while (list) { + m = list; + list = list->next; + g_free(m->value); + g_free(m); + } + return NULL; +} diff --git a/qapi-schema.json b/qapi-schema.json index dc2abe479e..98350048f6 100644 --- a/qapi-schema.json +++ b/qapi-schema.json @@ -2069,6 +2069,22 @@ '*devname': 'str' } } ## +# @NetdevVhostUserOptions +# +# Vhost-user network backend +# +# @chardev: name of a unix socket chardev +# +# @vhostforce: #optional vhost on for non-MSIX virtio guests (default: false). +# +# Since 2.1 +## +{ 'type': 'NetdevVhostUserOptions', + 'data': { + 'chardev': 'str', + '*vhostforce': 'bool' } } + +## # @NetClientOptions # # A discriminated record of network device traits. @@ -2086,7 +2102,8 @@ 'dump': 'NetdevDumpOptions', 'bridge': 'NetdevBridgeOptions', 'hubport': 'NetdevHubPortOptions', - 'netmap': 'NetdevNetmapOptions' } } + 'netmap': 'NetdevNetmapOptions', + 'vhost-user': 'NetdevVhostUserOptions' } } ## # @NetLegacy @@ -3080,3 +3097,192 @@ 'btn' : 'InputBtnEvent', 'rel' : 'InputMoveEvent', 'abs' : 'InputMoveEvent' } } + +## +# @NumaOptions +# +# A discriminated record of NUMA options. (for OptsVisitor) +# +# Since 2.1 +## +{ 'union': 'NumaOptions', + 'data': { + 'node': 'NumaNodeOptions' }} + +## +# @NumaNodeOptions +# +# Create a guest NUMA node. (for OptsVisitor) +# +# @nodeid: #optional NUMA node ID (increase by 1 from 0 if omitted) +# +# @cpus: #optional VCPUs belonging to this node (assign VCPUS round-robin +# if omitted) +# +# @mem: #optional memory size of this node; mutually exclusive with @memdev. +# Equally divide total memory among nodes if both @mem and @memdev are +# omitted. +# +# @memdev: #optional memory backend object. If specified for one node, +# it must be specified for all nodes. +# +# Since: 2.1 +## +{ 'type': 'NumaNodeOptions', + 'data': { + '*nodeid': 'uint16', + '*cpus': ['uint16'], + '*mem': 'size', + '*memdev': 'str' }} + +## +# @HostMemPolicy +# +# Host memory policy types +# +# @default: restore default policy, remove any nondefault policy +# +# @preferred: set the preferred host nodes for allocation +# +# @bind: a strict policy that restricts memory allocation to the +# host nodes specified +# +# @interleave: memory allocations are interleaved across the set +# of host nodes specified +# +# Since 2.1 +## +{ 'enum': 'HostMemPolicy', + 'data': [ 'default', 'preferred', 'bind', 'interleave' ] } + +## +# @Memdev: +# +# Information of memory device +# +# @size: memory device size +# +# @merge: enables or disables memory merge support +# +# @dump: includes memory device's memory in a core dump or not +# +# @prealloc: enables or disables memory preallocation +# +# @host-nodes: host nodes for its memory policy +# +# @policy: memory policy of memory device +# +# Since: 2.1 +## + +{ 'type': 'Memdev', + 'data': { + 'size': 'size', + 'merge': 'bool', + 'dump': 'bool', + 'prealloc': 'bool', + 'host-nodes': ['uint16'], + 'policy': 'HostMemPolicy' }} + +## +# @query-memdev: +# +# Returns information for all memory devices. +# +# Returns: a list of @Memdev. +# +# Since: 2.1 +## +{ 'command': 'query-memdev', 'returns': ['Memdev'] } +# @PCDIMMDeviceInfo: +# +# PCDIMMDevice state information +# +# @id: #optional device's ID +# +# @addr: physical address, where device is mapped +# +# @size: size of memory that the device provides +# +# @slot: slot number at which device is plugged in +# +# @node: NUMA node number where device is plugged in +# +# @memdev: memory backend linked with device +# +# @hotplugged: true if device was hotplugged +# +# @hotpluggable: true if device if could be added/removed while machine is running +# +# Since: 2.1 +## +{ 'type': 'PCDIMMDeviceInfo', + 'data': { '*id': 'str', + 'addr': 'int', + 'size': 'int', + 'slot': 'int', + 'node': 'int', + 'memdev': 'str', + 'hotplugged': 'bool', + 'hotpluggable': 'bool' + } +} + +## +# @MemoryDeviceInfo: +# +# Union containing information about a memory device +# +# Since: 2.1 +## +{ 'union': 'MemoryDeviceInfo', 'data': {'dimm': 'PCDIMMDeviceInfo'} } + +## +# @query-memory-devices +# +# Lists available memory devices and their state +# +# Since: 2.1 +## +{ 'command': 'query-memory-devices', 'returns': ['MemoryDeviceInfo'] } + +## @ACPISlotType +# +# @DIMM: memory slot +# +{ 'enum': 'ACPISlotType', 'data': [ 'DIMM' ] } + +## @ACPIOSTInfo +# +# OSPM Status Indication for a device +# For description of possible values of @source and @status fields +# see "_OST (OSPM Status Indication)" chapter of ACPI5.0 spec. +# +# @device: #optional device ID associated with slot +# +# @slot: slot ID, unique per slot of a given @slot-type +# +# @slot-type: type of the slot +# +# @source: an integer containing the source event +# +# @status: an integer containing the status code +# +# Since: 2.1 +## +{ 'type': 'ACPIOSTInfo', + 'data' : { '*device': 'str', + 'slot': 'str', + 'slot-type': 'ACPISlotType', + 'source': 'int', + 'status': 'int' } } + +## +# @query-acpi-ospm-status +# +# Lists ACPI OSPM status of ACPI device objects, +# which might be reported via _OST method +# +# Since: 2.1 +## +{ 'command': 'query-acpi-ospm-status', 'returns': ['ACPIOSTInfo'] } diff --git a/qapi/string-input-visitor.c b/qapi/string-input-visitor.c index 5780944792..d8a8db02ed 100644 --- a/qapi/string-input-visitor.c +++ b/qapi/string-input-visitor.c @@ -15,31 +15,210 @@ #include "qapi/visitor-impl.h" #include "qapi/qmp/qerror.h" #include "qemu/option.h" +#include "qemu/queue.h" +#include "qemu/range.h" + struct StringInputVisitor { Visitor visitor; + + bool head; + + GList *ranges; + GList *cur_range; + int64_t cur; + const char *string; }; +static void free_range(void *range, void *dummy) +{ + g_free(range); +} + +static void parse_str(StringInputVisitor *siv, Error **errp) +{ + char *str = (char *) siv->string; + long long start, end; + Range *cur; + char *endptr; + + if (siv->ranges) { + return; + } + + do { + errno = 0; + start = strtoll(str, &endptr, 0); + if (errno == 0 && endptr > str) { + if (*endptr == '\0') { + cur = g_malloc0(sizeof(*cur)); + cur->begin = start; + cur->end = start + 1; + siv->ranges = g_list_insert_sorted_merged(siv->ranges, cur, + range_compare); + cur = NULL; + str = NULL; + } else if (*endptr == '-') { + str = endptr + 1; + errno = 0; + end = strtoll(str, &endptr, 0); + if (errno == 0 && endptr > str && start <= end && + (start > INT64_MAX - 65536 || + end < start + 65536)) { + if (*endptr == '\0') { + cur = g_malloc0(sizeof(*cur)); + cur->begin = start; + cur->end = end + 1; + siv->ranges = + g_list_insert_sorted_merged(siv->ranges, + cur, + range_compare); + cur = NULL; + str = NULL; + } else if (*endptr == ',') { + str = endptr + 1; + cur = g_malloc0(sizeof(*cur)); + cur->begin = start; + cur->end = end + 1; + siv->ranges = + g_list_insert_sorted_merged(siv->ranges, + cur, + range_compare); + cur = NULL; + } else { + goto error; + } + } else { + goto error; + } + } else if (*endptr == ',') { + str = endptr + 1; + cur = g_malloc0(sizeof(*cur)); + cur->begin = start; + cur->end = start + 1; + siv->ranges = g_list_insert_sorted_merged(siv->ranges, + cur, + range_compare); + cur = NULL; + } else { + goto error; + } + } else { + goto error; + } + } while (str); + + return; +error: + g_list_foreach(siv->ranges, free_range, NULL); + g_list_free(siv->ranges); + siv->ranges = NULL; +} + +static void +start_list(Visitor *v, const char *name, Error **errp) +{ + StringInputVisitor *siv = DO_UPCAST(StringInputVisitor, visitor, v); + + parse_str(siv, errp); + + siv->cur_range = g_list_first(siv->ranges); + if (siv->cur_range) { + Range *r = siv->cur_range->data; + if (r) { + siv->cur = r->begin; + } + } +} + +static GenericList * +next_list(Visitor *v, GenericList **list, Error **errp) +{ + StringInputVisitor *siv = DO_UPCAST(StringInputVisitor, visitor, v); + GenericList **link; + Range *r; + + if (!siv->ranges || !siv->cur_range) { + return NULL; + } + + r = siv->cur_range->data; + if (!r) { + return NULL; + } + + if (siv->cur < r->begin || siv->cur >= r->end) { + siv->cur_range = g_list_next(siv->cur_range); + if (!siv->cur_range) { + return NULL; + } + r = siv->cur_range->data; + if (!r) { + return NULL; + } + siv->cur = r->begin; + } + + if (siv->head) { + link = list; + siv->head = false; + } else { + link = &(*list)->next; + } + + *link = g_malloc0(sizeof **link); + return *link; +} + +static void +end_list(Visitor *v, Error **errp) +{ + StringInputVisitor *siv = DO_UPCAST(StringInputVisitor, visitor, v); + siv->head = true; +} + static void parse_type_int(Visitor *v, int64_t *obj, const char *name, Error **errp) { StringInputVisitor *siv = DO_UPCAST(StringInputVisitor, visitor, v); - char *endp = (char *) siv->string; - long long val; - errno = 0; - if (siv->string) { - val = strtoll(siv->string, &endp, 0); - } - if (!siv->string || errno || endp == siv->string || *endp) { + if (!siv->string) { error_set(errp, QERR_INVALID_PARAMETER_TYPE, name ? name : "null", "integer"); return; } - *obj = val; + parse_str(siv, errp); + + if (!siv->ranges) { + goto error; + } + + if (!siv->cur_range) { + Range *r; + + siv->cur_range = g_list_first(siv->ranges); + if (!siv->cur_range) { + goto error; + } + + r = siv->cur_range->data; + if (!r) { + goto error; + } + + siv->cur = r->begin; + } + + *obj = siv->cur; + siv->cur++; + return; + +error: + error_set(errp, QERR_INVALID_PARAMETER_VALUE, name, + "an int64 value or range"); } static void parse_type_size(Visitor *v, uint64_t *obj, const char *name, @@ -140,6 +319,8 @@ Visitor *string_input_get_visitor(StringInputVisitor *v) void string_input_visitor_cleanup(StringInputVisitor *v) { + g_list_foreach(v->ranges, free_range, NULL); + g_list_free(v->ranges); g_free(v); } @@ -155,8 +336,12 @@ StringInputVisitor *string_input_visitor_new(const char *str) v->visitor.type_bool = parse_type_bool; v->visitor.type_str = parse_type_str; v->visitor.type_number = parse_type_number; + v->visitor.start_list = start_list; + v->visitor.next_list = next_list; + v->visitor.end_list = end_list; v->visitor.optional = parse_optional; v->string = str; + v->head = true; return v; } diff --git a/qapi/string-output-visitor.c b/qapi/string-output-visitor.c index fb1d2e806d..e9aca3bfdc 100644 --- a/qapi/string-output-visitor.c +++ b/qapi/string-output-visitor.c @@ -16,32 +16,181 @@ #include "qapi/qmp/qerror.h" #include "qemu/host-utils.h" #include <math.h> +#include "qemu/range.h" + +enum ListMode { + LM_NONE, /* not traversing a list of repeated options */ + LM_STARTED, /* start_list() succeeded */ + + LM_IN_PROGRESS, /* next_list() has been called. + * + * Generating the next list link will consume the most + * recently parsed QemuOpt instance of the repeated + * option. + * + * Parsing a value into the list link will examine the + * next QemuOpt instance of the repeated option, and + * possibly enter LM_SIGNED_INTERVAL or + * LM_UNSIGNED_INTERVAL. + */ + + LM_SIGNED_INTERVAL, /* next_list() has been called. + * + * Generating the next list link will consume the most + * recently stored element from the signed interval, + * parsed from the most recent QemuOpt instance of the + * repeated option. This may consume QemuOpt itself + * and return to LM_IN_PROGRESS. + * + * Parsing a value into the list link will store the + * next element of the signed interval. + */ + + LM_UNSIGNED_INTERVAL,/* Same as above, only for an unsigned interval. */ + + LM_END +}; + +typedef enum ListMode ListMode; struct StringOutputVisitor { Visitor visitor; bool human; - char *string; + GString *string; + bool head; + ListMode list_mode; + union { + int64_t s; + uint64_t u; + } range_start, range_end; + GList *ranges; }; static void string_output_set(StringOutputVisitor *sov, char *string) { - g_free(sov->string); - sov->string = string; + if (sov->string) { + g_string_free(sov->string, true); + } + sov->string = g_string_new(string); + g_free(string); +} + +static void string_output_append(StringOutputVisitor *sov, int64_t a) +{ + Range *r = g_malloc0(sizeof(*r)); + r->begin = a; + r->end = a + 1; + sov->ranges = g_list_insert_sorted_merged(sov->ranges, r, range_compare); +} + +static void string_output_append_range(StringOutputVisitor *sov, + int64_t s, int64_t e) +{ + Range *r = g_malloc0(sizeof(*r)); + r->begin = s; + r->end = e + 1; + sov->ranges = g_list_insert_sorted_merged(sov->ranges, r, range_compare); +} + +static void format_string(StringOutputVisitor *sov, Range *r, bool next, + bool human) +{ + if (r->end - r->begin > 1) { + if (human) { + g_string_append_printf(sov->string, "0x%" PRIx64 "-%" PRIx64, + r->begin, r->end - 1); + + } else { + g_string_append_printf(sov->string, "%" PRId64 "-%" PRId64, + r->begin, r->end - 1); + } + } else { + if (human) { + g_string_append_printf(sov->string, "0x%" PRIx64, r->begin); + } else { + g_string_append_printf(sov->string, "%" PRId64, r->begin); + } + } + if (next) { + g_string_append(sov->string, ","); + } } static void print_type_int(Visitor *v, int64_t *obj, const char *name, Error **errp) { StringOutputVisitor *sov = DO_UPCAST(StringOutputVisitor, visitor, v); - char *out; + GList *l; + + switch (sov->list_mode) { + case LM_NONE: + string_output_append(sov, *obj); + break; + + case LM_STARTED: + sov->range_start.s = *obj; + sov->range_end.s = *obj; + sov->list_mode = LM_IN_PROGRESS; + return; + + case LM_IN_PROGRESS: + if (sov->range_end.s + 1 == *obj) { + sov->range_end.s++; + } else { + if (sov->range_start.s == sov->range_end.s) { + string_output_append(sov, sov->range_end.s); + } else { + assert(sov->range_start.s < sov->range_end.s); + string_output_append_range(sov, sov->range_start.s, + sov->range_end.s); + } + + sov->range_start.s = *obj; + sov->range_end.s = *obj; + } + return; + + case LM_END: + if (sov->range_end.s + 1 == *obj) { + sov->range_end.s++; + assert(sov->range_start.s < sov->range_end.s); + string_output_append_range(sov, sov->range_start.s, + sov->range_end.s); + } else { + if (sov->range_start.s == sov->range_end.s) { + string_output_append(sov, sov->range_end.s); + } else { + assert(sov->range_start.s < sov->range_end.s); + + string_output_append_range(sov, sov->range_start.s, + sov->range_end.s); + } + string_output_append(sov, *obj); + } + break; + + default: + abort(); + } + + l = sov->ranges; + while (l) { + Range *r = l->data; + format_string(sov, r, l->next != NULL, false); + l = l->next; + } if (sov->human) { - out = g_strdup_printf("%lld (%#llx)", (long long) *obj, (long long) *obj); - } else { - out = g_strdup_printf("%lld", (long long) *obj); + l = sov->ranges; + g_string_append(sov->string, " ("); + while (l) { + Range *r = l->data; + format_string(sov, r, l->next != NULL, true); + l = l->next; + } + g_string_append(sov->string, ")"); } - string_output_set(sov, out); } static void print_type_size(Visitor *v, uint64_t *obj, const char *name, @@ -103,9 +252,61 @@ static void print_type_number(Visitor *v, double *obj, const char *name, string_output_set(sov, g_strdup_printf("%f", *obj)); } +static void +start_list(Visitor *v, const char *name, Error **errp) +{ + StringOutputVisitor *sov = DO_UPCAST(StringOutputVisitor, visitor, v); + + /* we can't traverse a list in a list */ + assert(sov->list_mode == LM_NONE); + sov->list_mode = LM_STARTED; + sov->head = true; +} + +static GenericList * +next_list(Visitor *v, GenericList **list, Error **errp) +{ + StringOutputVisitor *sov = DO_UPCAST(StringOutputVisitor, visitor, v); + GenericList *ret = NULL; + if (*list) { + if (sov->head) { + ret = *list; + } else { + ret = (*list)->next; + } + + if (sov->head) { + if (ret && ret->next == NULL) { + sov->list_mode = LM_NONE; + } + sov->head = false; + } else { + if (ret && ret->next == NULL) { + sov->list_mode = LM_END; + } + } + } + + return ret; +} + +static void +end_list(Visitor *v, Error **errp) +{ + StringOutputVisitor *sov = DO_UPCAST(StringOutputVisitor, visitor, v); + + assert(sov->list_mode == LM_STARTED || + sov->list_mode == LM_END || + sov->list_mode == LM_NONE || + sov->list_mode == LM_IN_PROGRESS); + sov->list_mode = LM_NONE; + sov->head = true; + +} + char *string_output_get_string(StringOutputVisitor *sov) { - char *string = sov->string; + char *string = g_string_free(sov->string, false); sov->string = NULL; return string; } @@ -115,9 +316,19 @@ Visitor *string_output_get_visitor(StringOutputVisitor *sov) return &sov->visitor; } +static void free_range(void *range, void *dummy) +{ + g_free(range); +} + void string_output_visitor_cleanup(StringOutputVisitor *sov) { - g_free(sov->string); + if (sov->string) { + g_string_free(sov->string, true); + } + + g_list_foreach(sov->ranges, free_range, NULL); + g_list_free(sov->ranges); g_free(sov); } @@ -127,6 +338,7 @@ StringOutputVisitor *string_output_visitor_new(bool human) v = g_malloc0(sizeof(*v)); + v->string = g_string_new(NULL); v->human = human; v->visitor.type_enum = output_type_enum; v->visitor.type_int = print_type_int; @@ -134,6 +346,9 @@ StringOutputVisitor *string_output_visitor_new(bool human) v->visitor.type_bool = print_type_bool; v->visitor.type_str = print_type_str; v->visitor.type_number = print_type_number; + v->visitor.start_list = start_list; + v->visitor.next_list = next_list; + v->visitor.end_list = end_list; return v; } diff --git a/qemu-char.c b/qemu-char.c index f918f90972..b3bd3b5af4 100644 --- a/qemu-char.c +++ b/qemu-char.c @@ -84,6 +84,7 @@ #include "ui/qemu-spice.h" #define READ_BUF_LEN 4096 +#define READ_RETRIES 10 /***********************************************************/ /* character device */ @@ -145,6 +146,41 @@ int qemu_chr_fe_write_all(CharDriverState *s, const uint8_t *buf, int len) return offset; } +int qemu_chr_fe_read_all(CharDriverState *s, uint8_t *buf, int len) +{ + int offset = 0, counter = 10; + int res; + + if (!s->chr_sync_read) { + return 0; + } + + while (offset < len) { + do { + res = s->chr_sync_read(s, buf + offset, len - offset); + if (res == -1 && errno == EAGAIN) { + g_usleep(100); + } + } while (res == -1 && errno == EAGAIN); + + if (res == 0) { + break; + } + + if (res < 0) { + return res; + } + + offset += res; + + if (!counter--) { + break; + } + } + + return offset; +} + int qemu_chr_fe_ioctl(CharDriverState *s, int cmd, void *arg) { if (!s->chr_ioctl) @@ -168,7 +204,18 @@ void qemu_chr_be_write(CharDriverState *s, uint8_t *buf, int len) int qemu_chr_fe_get_msgfd(CharDriverState *s) { - return s->get_msgfd ? s->get_msgfd(s) : -1; + int fd; + return (qemu_chr_fe_get_msgfds(s, &fd, 1) >= 0) ? fd : -1; +} + +int qemu_chr_fe_get_msgfds(CharDriverState *s, int *fds, int len) +{ + return s->get_msgfds ? s->get_msgfds(s, fds, len) : -1; +} + +int qemu_chr_fe_set_msgfds(CharDriverState *s, int *fds, int num) +{ + return s->set_msgfds ? s->set_msgfds(s, fds, num) : -1; } int qemu_chr_add_client(CharDriverState *s, int fd) @@ -2296,16 +2343,73 @@ typedef struct { int do_telnetopt; int do_nodelay; int is_unix; - int msgfd; + int *read_msgfds; + int read_msgfds_num; + int *write_msgfds; + int write_msgfds_num; } TCPCharDriver; static gboolean tcp_chr_accept(GIOChannel *chan, GIOCondition cond, void *opaque); +#ifndef _WIN32 +static int unix_send_msgfds(CharDriverState *chr, const uint8_t *buf, int len) +{ + TCPCharDriver *s = chr->opaque; + struct msghdr msgh; + struct iovec iov; + int r; + + size_t fd_size = s->write_msgfds_num * sizeof(int); + char control[CMSG_SPACE(fd_size)]; + struct cmsghdr *cmsg; + + memset(&msgh, 0, sizeof(msgh)); + memset(control, 0, sizeof(control)); + + /* set the payload */ + iov.iov_base = (uint8_t *) buf; + iov.iov_len = len; + + msgh.msg_iov = &iov; + msgh.msg_iovlen = 1; + + msgh.msg_control = control; + msgh.msg_controllen = sizeof(control); + + cmsg = CMSG_FIRSTHDR(&msgh); + + cmsg->cmsg_len = CMSG_LEN(fd_size); + cmsg->cmsg_level = SOL_SOCKET; + cmsg->cmsg_type = SCM_RIGHTS; + memcpy(CMSG_DATA(cmsg), s->write_msgfds, fd_size); + + do { + r = sendmsg(s->fd, &msgh, 0); + } while (r < 0 && errno == EINTR); + + /* free the written msgfds, no matter what */ + if (s->write_msgfds_num) { + g_free(s->write_msgfds); + s->write_msgfds = 0; + s->write_msgfds_num = 0; + } + + return r; +} +#endif + static int tcp_chr_write(CharDriverState *chr, const uint8_t *buf, int len) { TCPCharDriver *s = chr->opaque; if (s->connected) { - return io_channel_send(s->chan, buf, len); +#ifndef _WIN32 + if (s->is_unix && s->write_msgfds_num) { + return unix_send_msgfds(chr, buf, len); + } else +#endif + { + return io_channel_send(s->chan, buf, len); + } } else { /* XXX: indicate an error ? */ return len; @@ -2372,12 +2476,39 @@ static void tcp_chr_process_IAC_bytes(CharDriverState *chr, *size = j; } -static int tcp_get_msgfd(CharDriverState *chr) +static int tcp_get_msgfds(CharDriverState *chr, int *fds, int num) { TCPCharDriver *s = chr->opaque; - int fd = s->msgfd; - s->msgfd = -1; - return fd; + int to_copy = (s->read_msgfds_num < num) ? s->read_msgfds_num : num; + + if (to_copy) { + memcpy(fds, s->read_msgfds, to_copy * sizeof(int)); + + g_free(s->read_msgfds); + s->read_msgfds = 0; + s->read_msgfds_num = 0; + } + + return to_copy; +} + +static int tcp_set_msgfds(CharDriverState *chr, int *fds, int num) +{ + TCPCharDriver *s = chr->opaque; + + /* clear old pending fd array */ + if (s->write_msgfds) { + g_free(s->write_msgfds); + } + + if (num) { + s->write_msgfds = g_malloc(num * sizeof(int)); + memcpy(s->write_msgfds, fds, num * sizeof(int)); + } + + s->write_msgfds_num = num; + + return 0; } #ifndef _WIN32 @@ -2387,26 +2518,46 @@ static void unix_process_msgfd(CharDriverState *chr, struct msghdr *msg) struct cmsghdr *cmsg; for (cmsg = CMSG_FIRSTHDR(msg); cmsg; cmsg = CMSG_NXTHDR(msg, cmsg)) { - int fd; + int fd_size, i; - if (cmsg->cmsg_len != CMSG_LEN(sizeof(int)) || + if (cmsg->cmsg_len < CMSG_LEN(sizeof(int)) || cmsg->cmsg_level != SOL_SOCKET || - cmsg->cmsg_type != SCM_RIGHTS) + cmsg->cmsg_type != SCM_RIGHTS) { continue; + } + + fd_size = cmsg->cmsg_len - CMSG_LEN(0); - fd = *((int *)CMSG_DATA(cmsg)); - if (fd < 0) + if (!fd_size) { continue; + } + + /* close and clean read_msgfds */ + for (i = 0; i < s->read_msgfds_num; i++) { + close(s->read_msgfds[i]); + } + + if (s->read_msgfds_num) { + g_free(s->read_msgfds); + } - /* O_NONBLOCK is preserved across SCM_RIGHTS so reset it */ - qemu_set_block(fd); + s->read_msgfds_num = fd_size / sizeof(int); + s->read_msgfds = g_malloc(fd_size); + memcpy(s->read_msgfds, CMSG_DATA(cmsg), fd_size); -#ifndef MSG_CMSG_CLOEXEC - qemu_set_cloexec(fd); -#endif - if (s->msgfd != -1) - close(s->msgfd); - s->msgfd = fd; + for (i = 0; i < s->read_msgfds_num; i++) { + int fd = s->read_msgfds[i]; + if (fd < 0) { + continue; + } + + /* O_NONBLOCK is preserved across SCM_RIGHTS so reset it */ + qemu_set_block(fd); + + #ifndef MSG_CMSG_CLOEXEC + qemu_set_cloexec(fd); + #endif + } } } @@ -2454,6 +2605,23 @@ static GSource *tcp_chr_add_watch(CharDriverState *chr, GIOCondition cond) return g_io_create_watch(s->chan, cond); } +static void tcp_chr_disconnect(CharDriverState *chr) +{ + TCPCharDriver *s = chr->opaque; + + s->connected = 0; + if (s->listen_chan) { + s->listen_tag = g_io_add_watch(s->listen_chan, G_IO_IN, + tcp_chr_accept, chr); + } + remove_fd_in_watch(chr); + g_io_channel_unref(s->chan); + s->chan = NULL; + closesocket(s->fd); + s->fd = -1; + qemu_chr_be_event(chr, CHR_EVENT_CLOSED); +} + static gboolean tcp_chr_read(GIOChannel *chan, GIOCondition cond, void *opaque) { CharDriverState *chr = opaque; @@ -2470,16 +2638,7 @@ static gboolean tcp_chr_read(GIOChannel *chan, GIOCondition cond, void *opaque) size = tcp_chr_recv(chr, (void *)buf, len); if (size == 0) { /* connection closed */ - s->connected = 0; - if (s->listen_chan) { - s->listen_tag = g_io_add_watch(s->listen_chan, G_IO_IN, tcp_chr_accept, chr); - } - remove_fd_in_watch(chr); - g_io_channel_unref(s->chan); - s->chan = NULL; - closesocket(s->fd); - s->fd = -1; - qemu_chr_be_event(chr, CHR_EVENT_CLOSED); + tcp_chr_disconnect(chr); } else if (size > 0) { if (s->do_telnetopt) tcp_chr_process_IAC_bytes(chr, s, buf, &size); @@ -2490,6 +2649,24 @@ static gboolean tcp_chr_read(GIOChannel *chan, GIOCondition cond, void *opaque) return TRUE; } +static int tcp_chr_sync_read(CharDriverState *chr, const uint8_t *buf, int len) +{ + TCPCharDriver *s = chr->opaque; + int size; + + if (!s->connected) { + return 0; + } + + size = tcp_chr_recv(chr, (void *) buf, len); + if (size == 0) { + /* connection closed */ + tcp_chr_disconnect(chr); + } + + return size; +} + #ifndef _WIN32 CharDriverState *qemu_chr_open_eventfd(int eventfd) { @@ -2503,6 +2680,25 @@ CharDriverState *qemu_chr_open_eventfd(int eventfd) } #endif +static gboolean tcp_chr_chan_close(GIOChannel *channel, GIOCondition cond, + void *opaque) +{ + CharDriverState *chr = opaque; + + if (cond != G_IO_HUP) { + return FALSE; + } + + /* connection closed */ + tcp_chr_disconnect(chr); + if (chr->fd_hup_tag) { + g_source_remove(chr->fd_hup_tag); + chr->fd_hup_tag = 0; + } + + return TRUE; +} + static void tcp_chr_connect(void *opaque) { CharDriverState *chr = opaque; @@ -2512,6 +2708,8 @@ static void tcp_chr_connect(void *opaque) if (s->chan) { chr->fd_in_tag = io_add_watch_poll(s->chan, tcp_chr_read_poll, tcp_chr_read, chr); + chr->fd_hup_tag = g_io_add_watch(s->chan, G_IO_HUP, tcp_chr_chan_close, + chr); } qemu_chr_be_generic_open(chr); } @@ -2604,6 +2802,7 @@ static gboolean tcp_chr_accept(GIOChannel *channel, GIOCondition cond, void *opa static void tcp_chr_close(CharDriverState *chr) { TCPCharDriver *s = chr->opaque; + int i; if (s->fd >= 0) { remove_fd_in_watch(chr); if (s->chan) { @@ -2621,6 +2820,15 @@ static void tcp_chr_close(CharDriverState *chr) } closesocket(s->listen_fd); } + if (s->read_msgfds_num) { + for (i = 0; i < s->read_msgfds_num; i++) { + close(s->read_msgfds[i]); + } + g_free(s->read_msgfds); + } + if (s->write_msgfds_num) { + g_free(s->write_msgfds); + } g_free(s); qemu_chr_be_event(chr, CHR_EVENT_CLOSED); } @@ -2649,7 +2857,10 @@ static CharDriverState *qemu_chr_open_socket_fd(int fd, bool do_nodelay, s->connected = 0; s->fd = -1; s->listen_fd = -1; - s->msgfd = -1; + s->read_msgfds = 0; + s->read_msgfds_num = 0; + s->write_msgfds = 0; + s->write_msgfds_num = 0; chr->filename = g_malloc(256); switch (ss.ss_family) { @@ -2678,8 +2889,10 @@ static CharDriverState *qemu_chr_open_socket_fd(int fd, bool do_nodelay, chr->opaque = s; chr->chr_write = tcp_chr_write; + chr->chr_sync_read = tcp_chr_sync_read; chr->chr_close = tcp_chr_close; - chr->get_msgfd = tcp_get_msgfd; + chr->get_msgfds = tcp_get_msgfds; + chr->set_msgfds = tcp_set_msgfds; chr->chr_add_client = tcp_chr_add_client; chr->chr_add_watch = tcp_chr_add_watch; chr->chr_update_read_handler = tcp_chr_update_read_handler; diff --git a/qemu-options.hx b/qemu-options.hx index d0714c43a6..ca75760b27 100644 --- a/qemu-options.hx +++ b/qemu-options.hx @@ -95,12 +95,22 @@ specifies the maximum number of hotpluggable CPUs. ETEXI DEF("numa", HAS_ARG, QEMU_OPTION_numa, - "-numa node[,mem=size][,cpus=cpu[-cpu]][,nodeid=node]\n", QEMU_ARCH_ALL) + "-numa node[,mem=size][,cpus=cpu[-cpu]][,nodeid=node]\n" + "-numa node[,memdev=id][,cpus=cpu[-cpu]][,nodeid=node]\n", QEMU_ARCH_ALL) STEXI -@item -numa @var{opts} +@item -numa node[,mem=@var{size}][,cpus=@var{cpu[-cpu]}][,nodeid=@var{node}] +@item -numa node[,memdev=@var{id}][,cpus=@var{cpu[-cpu]}][,nodeid=@var{node}] @findex -numa -Simulate a multi node NUMA system. If mem and cpus are omitted, resources -are split equally. +Simulate a multi node NUMA system. If @samp{mem}, @samp{memdev} +and @samp{cpus} are omitted, resources are split equally. Also, note +that the -@option{numa} option doesn't allocate any of the specified +resources. That is, it just assigns existing resources to NUMA nodes. This +means that one still has to use the @option{-m}, @option{-smp} options +to allocate RAM and VCPUs respectively, and possibly @option{-object} +to specify the memory backend for the @samp{memdev} suboption. + +@samp{mem} and @samp{memdev} are mutually exclusive. Furthermore, if one +node uses @samp{memdev}, all of them have to use it. ETEXI DEF("add-fd", HAS_ARG, QEMU_OPTION_add_fd, @@ -210,17 +220,20 @@ use is discouraged as it may be removed from future versions. ETEXI DEF("m", HAS_ARG, QEMU_OPTION_m, - "-m [size=]megs\n" + "-m[emory] [size=]megs[,slots=n,maxmem=size]\n" " configure guest RAM\n" " size: initial amount of guest memory (default: " - stringify(DEFAULT_RAM_SIZE) "MiB)\n", + stringify(DEFAULT_RAM_SIZE) "MiB)\n" + " slots: number of hotplug slots (default: none)\n" + " maxmem: maximum amount of guest memory (default: none)\n", QEMU_ARCH_ALL) STEXI @item -m [size=]@var{megs} @findex -m Set virtual RAM size to @var{megs} megabytes. Default is 128 MiB. Optionally, a suffix of ``M'' or ``G'' can be used to signify a value in megabytes or -gigabytes respectively. +gigabytes respectively. Optional pair @var{slots}, @var{maxmem} could be used +to set amount of hotluggable memory slots and possible maximum amount of memory. ETEXI DEF("mem-path", HAS_ARG, QEMU_OPTION_mempath, @@ -1457,6 +1470,7 @@ DEF("netdev", HAS_ARG, QEMU_OPTION_netdev, #ifdef CONFIG_NETMAP "netmap|" #endif + "vhost-user|" "socket|" "hubport],id=str[,option][,option][,...]\n", QEMU_ARCH_ALL) STEXI @@ -1788,6 +1802,23 @@ The hubport netdev lets you connect a NIC to a QEMU "vlan" instead of a single netdev. @code{-net} and @code{-device} with parameter @option{vlan} create the required hub automatically. +@item -netdev vhost-user,chardev=@var{id}[,vhostforce=on|off] + +Establish a vhost-user netdev, backed by a chardev @var{id}. The chardev should +be a unix domain socket backed one. The vhost-user uses a specifically defined +protocol to pass vhost ioctl replacement messages to an application on the other +end of the socket. On non-MSIX guests, the feature can be forced with +@var{vhostforce}. + +Example: +@example +qemu -m 512 -object memory-backend-file,id=mem,size=512M,mem-path=/hugetlbfs,share=on \ + -numa node,memdev=mem \ + -chardev socket,path=/path/to/socket \ + -netdev type=vhost-user,id=net0,chardev=chr0 \ + -device virtio-net-pci,netdev=net0 +@end example + @item -net dump[,vlan=@var{n}][,file=@var{file}][,len=@var{len}] Dump network traffic on VLAN @var{n} to file @var{file} (@file{qemu-vlan0.pcap} by default). At most @var{len} bytes (64k by default) per packet are stored. The file format is diff --git a/qmp-commands.hx b/qmp-commands.hx index d6bb0f483f..e4a1c80434 100644 --- a/qmp-commands.hx +++ b/qmp-commands.hx @@ -3571,6 +3571,93 @@ Example: "format":"qcow2", "virtual-size":2048000 } - } }Â ] } + } } ] } EQMP + + { + .name = "query-memdev", + .args_type = "", + .mhandler.cmd_new = qmp_marshal_input_query_memdev, + }, + +SQMP +query-memdev +------------ + +Show memory devices information. + + +Example (1): + +-> { "execute": "query-memdev" } +<- { "return": [ + { + "size": 536870912, + "merge": false, + "dump": true, + "prealloc": false, + "host-nodes": [0, 1], + "policy": "bind" + }, + { + "size": 536870912, + "merge": false, + "dump": true, + "prealloc": true, + "host-nodes": [2, 3], + "policy": "preferred" + } + ] + } + +EQMP + + { + .name = "query-memory-devices", + .args_type = "", + .mhandler.cmd_new = qmp_marshal_input_query_memory_devices, + }, + +SQMP +@query-memory-devices +-------------------- + +Return a list of memory devices. + +Example: +-> { "execute": "query-memory-devices" } +<- { "return": [ { "data": + { "addr": 5368709120, + "hotpluggable": true, + "hotplugged": true, + "id": "d1", + "memdev": "/objects/memX", + "node": 0, + "size": 1073741824, + "slot": 0}, + "type": "dimm" + } ] } +EQMP + + { + .name = "query-acpi-ospm-status", + .args_type = "", + .mhandler.cmd_new = qmp_marshal_input_query_acpi_ospm_status, + }, + +SQMP +@query-acpi-ospm-status +-------------------- + +Return list of ACPIOSTInfo for devices that support status reporting +via ACPI _OST method. + +Example: +-> { "execute": "query-acpi-ospm-status" } +<- { "return": [ { "device": "d1", "slot": "0", "slot-type": "DIMM", "source": 1, "status": 0}, + { "slot": "1", "slot-type": "DIMM", "source": 0, "status": 0}, + { "slot": "2", "slot-type": "DIMM", "source": 0, "status": 0}, + { "slot": "3", "slot-type": "DIMM", "source": 0, "status": 0} + ]} +EQMP @@ -28,6 +28,8 @@ #include "qapi/qmp-input-visitor.h" #include "hw/boards.h" #include "qom/object_interfaces.h" +#include "hw/mem/pc-dimm.h" +#include "hw/acpi/acpi_dev_interface.h" NameInfo *qmp_query_name(Error **errp) { @@ -540,7 +542,7 @@ void object_add(const char *type, const char *id, const QDict *qdict, klass = object_class_by_name(type); if (!klass) { - error_setg(errp, "invalid class name"); + error_setg(errp, "invalid object type: %s", type); return; } @@ -565,13 +567,18 @@ void object_add(const char *type, const char *id, const QDict *qdict, } } - user_creatable_complete(obj, &local_err); + object_property_add_child(container_get(object_get_root(), "/objects"), + id, obj, &local_err); if (local_err) { goto out; } - object_property_add_child(container_get(object_get_root(), "/objects"), - id, obj, &local_err); + user_creatable_complete(obj, &local_err); + if (local_err) { + object_property_del(container_get(object_get_root(), "/objects"), + id, &error_abort); + goto out; + } out: if (local_err) { error_propagate(errp, local_err); @@ -623,3 +630,32 @@ void qmp_object_del(const char *id, Error **errp) } object_unparent(obj); } + +MemoryDeviceInfoList *qmp_query_memory_devices(Error **errp) +{ + MemoryDeviceInfoList *head = NULL; + MemoryDeviceInfoList **prev = &head; + + qmp_pc_dimm_device_list(qdev_get_machine(), &prev); + + return head; +} + +ACPIOSTInfoList *qmp_query_acpi_ospm_status(Error **errp) +{ + bool ambig; + ACPIOSTInfoList *head = NULL; + ACPIOSTInfoList **prev = &head; + Object *obj = object_resolve_path_type("", TYPE_ACPI_DEVICE_IF, &ambig); + + if (obj) { + AcpiDeviceIfClass *adevc = ACPI_DEVICE_IF_GET_CLASS(obj); + AcpiDeviceIf *adev = ACPI_DEVICE_IF(obj); + + adevc->ospm_status(adev, &prev); + } else { + error_setg(errp, "command is not supported, missing ACPI device"); + } + + return head; +} diff --git a/qom/object.c b/qom/object.c index e42b254303..3876618c2e 100644 --- a/qom/object.c +++ b/qom/object.c @@ -13,6 +13,7 @@ #include "qom/object.h" #include "qemu-common.h" #include "qapi/visitor.h" +#include "qapi-visit.h" #include "qapi/string-input-visitor.h" #include "qapi/string-output-visitor.h" #include "qapi/qmp/qerror.h" @@ -938,6 +939,40 @@ int64_t object_property_get_int(Object *obj, const char *name, return retval; } +int object_property_get_enum(Object *obj, const char *name, + const char *strings[], Error **errp) +{ + StringOutputVisitor *sov; + StringInputVisitor *siv; + int ret; + + sov = string_output_visitor_new(false); + object_property_get(obj, string_output_get_visitor(sov), name, errp); + siv = string_input_visitor_new(string_output_get_string(sov)); + string_output_visitor_cleanup(sov); + visit_type_enum(string_input_get_visitor(siv), + &ret, strings, NULL, name, errp); + string_input_visitor_cleanup(siv); + + return ret; +} + +void object_property_get_uint16List(Object *obj, const char *name, + uint16List **list, Error **errp) +{ + StringOutputVisitor *ov; + StringInputVisitor *iv; + + ov = string_output_visitor_new(false); + object_property_get(obj, string_output_get_visitor(ov), + name, errp); + iv = string_input_visitor_new(string_output_get_string(ov)); + visit_type_uint16List(string_input_get_visitor(iv), + list, NULL, errp); + string_output_visitor_cleanup(ov); + string_input_visitor_cleanup(iv); +} + void object_property_parse(Object *obj, const char *string, const char *name, Error **errp) { @@ -42,7 +42,6 @@ #include "block/snapshot.h" #include "block/qapi.h" -#define SELF_ANNOUNCE_ROUNDS 5 #ifndef ETH_P_RARP #define ETH_P_RARP 0x8035 @@ -98,7 +97,7 @@ static void qemu_announce_self_once(void *opaque) if (--count) { /* delay 50ms, 150ms, 250ms, ... */ timer_mod(timer, qemu_clock_get_ms(QEMU_CLOCK_REALTIME) + - 50 + (SELF_ANNOUNCE_ROUNDS - count - 1) * 100); + self_announce_delay(count)); } else { timer_del(timer); timer_free(timer); @@ -1209,7 +1208,7 @@ void vmstate_register_ram(MemoryRegion *mr, DeviceState *dev) void vmstate_unregister_ram(MemoryRegion *mr, DeviceState *dev) { - /* Nothing do to while the implementation is in RAMBlock */ + qemu_ram_unset_idstr(memory_region_get_ram_addr(mr) & TARGET_PAGE_MASK); } void vmstate_register_ram_global(MemoryRegion *mr) diff --git a/stubs/Makefile.objs b/stubs/Makefile.objs index d99e2b9259..997d68d5b9 100644 --- a/stubs/Makefile.objs +++ b/stubs/Makefile.objs @@ -1,4 +1,6 @@ stub-obj-y += arch-query-cpu-def.o +stub-obj-y += bdrv-commit-all.o +stub-obj-y += chr-msmouse.o stub-obj-y += clock-warp.o stub-obj-y += cpu-get-clock.o stub-obj-y += cpu-get-icount.o @@ -9,13 +11,18 @@ stub-obj-y += fdset-get-fd.o stub-obj-y += fdset-remove-fd.o stub-obj-y += gdbstub.o stub-obj-y += get-fd.o +stub-obj-y += get-next-serial.o stub-obj-y += get-vm-name.o stub-obj-y += iothread-lock.o +stub-obj-y += is-daemonized.o +stub-obj-y += machine-init-done.o stub-obj-y += migr-blocker.o stub-obj-y += mon-is-qmp.o stub-obj-y += mon-printf.o stub-obj-y += mon-protocol-event.o stub-obj-y += mon-set-error.o +stub-obj-y += monitor-init.o +stub-obj-y += notify-event.o stub-obj-y += pci-drive-hot-add.o stub-obj-y += qtest.o stub-obj-y += reset.o @@ -24,8 +31,10 @@ stub-obj-y += set-fd-handler.o stub-obj-y += slirp.o stub-obj-y += sysbus.o stub-obj-y += uuid.o +stub-obj-y += vc-init.o stub-obj-y += vm-stop.o stub-obj-y += vmstate.o stub-obj-$(CONFIG_WIN32) += fd-register.o stub-obj-y += cpus.o stub-obj-y += kvm.o +stub-obj-y += qmp_pc_dimm_device_list.o diff --git a/stubs/bdrv-commit-all.c b/stubs/bdrv-commit-all.c new file mode 100644 index 0000000000..a8e0a95417 --- /dev/null +++ b/stubs/bdrv-commit-all.c @@ -0,0 +1,7 @@ +#include "qemu-common.h" +#include "block/block.h" + +int bdrv_commit_all(void) +{ + return 0; +} diff --git a/stubs/chr-msmouse.c b/stubs/chr-msmouse.c new file mode 100644 index 0000000000..812f8b0abe --- /dev/null +++ b/stubs/chr-msmouse.c @@ -0,0 +1,7 @@ +#include "qemu-common.h" +#include "sysemu/char.h" + +CharDriverState *qemu_chr_open_msmouse(void) +{ + return 0; +} diff --git a/stubs/get-next-serial.c b/stubs/get-next-serial.c new file mode 100644 index 0000000000..40c56d13d7 --- /dev/null +++ b/stubs/get-next-serial.c @@ -0,0 +1,3 @@ +#include "qemu-common.h" + +CharDriverState *serial_hds[0]; diff --git a/stubs/is-daemonized.c b/stubs/is-daemonized.c new file mode 100644 index 0000000000..c0ee9171a7 --- /dev/null +++ b/stubs/is-daemonized.c @@ -0,0 +1,9 @@ +#include "qemu-common.h" + +/* Win32 has its own inline stub */ +#ifndef _WIN32 +bool is_daemonized(void) +{ + return false; +} +#endif diff --git a/stubs/machine-init-done.c b/stubs/machine-init-done.c new file mode 100644 index 0000000000..28a92555b6 --- /dev/null +++ b/stubs/machine-init-done.c @@ -0,0 +1,6 @@ +#include "qemu-common.h" +#include "sysemu/sysemu.h" + +void qemu_add_machine_init_done_notifier(Notifier *notify) +{ +} diff --git a/stubs/monitor-init.c b/stubs/monitor-init.c new file mode 100644 index 0000000000..563902b412 --- /dev/null +++ b/stubs/monitor-init.c @@ -0,0 +1,6 @@ +#include "qemu-common.h" +#include "monitor/monitor.h" + +void monitor_init(CharDriverState *chr, int flags) +{ +} diff --git a/stubs/notify-event.c b/stubs/notify-event.c new file mode 100644 index 0000000000..32f7289d3a --- /dev/null +++ b/stubs/notify-event.c @@ -0,0 +1,6 @@ +#include "qemu-common.h" +#include "qemu/main-loop.h" + +void qemu_notify_event(void) +{ +} diff --git a/stubs/qmp_pc_dimm_device_list.c b/stubs/qmp_pc_dimm_device_list.c new file mode 100644 index 0000000000..5cb220c66c --- /dev/null +++ b/stubs/qmp_pc_dimm_device_list.c @@ -0,0 +1,7 @@ +#include "qom/object.h" +#include "hw/mem/pc-dimm.h" + +int qmp_pc_dimm_device_list(Object *obj, void *opaque) +{ + return 0; +} diff --git a/stubs/vc-init.c b/stubs/vc-init.c new file mode 100644 index 0000000000..2af054fe6b --- /dev/null +++ b/stubs/vc-init.c @@ -0,0 +1,7 @@ +#include "qemu-common.h" +#include "ui/console.h" + +CharDriverState *vc_init(ChardevVC *vc) +{ + return 0; +} diff --git a/target-arm/cpu-qom.h b/target-arm/cpu-qom.h index edc7f262fc..eaee9447ee 100644 --- a/target-arm/cpu-qom.h +++ b/target-arm/cpu-qom.h @@ -94,6 +94,12 @@ typedef struct ARMCPU { /* 'compatible' string for this CPU for Linux device trees */ const char *dtb_compatible; + /* PSCI version for this CPU + * Bits[31:16] = Major Version + * Bits[15:0] = Minor Version + */ + uint32_t psci_version; + /* Should CPU start in PSCI powered-off state? */ bool start_powered_off; @@ -102,6 +108,9 @@ typedef struct ARMCPU { */ uint32_t kvm_target; + /* KVM init features for this CPU */ + uint32_t kvm_init_features[7]; + /* The instance init functions for implementation-specific subclasses * set these fields to specify the implementation-dependent values of * various constant registers and reset values of non-constant diff --git a/target-arm/cpu.c b/target-arm/cpu.c index b8778350f7..05e52e0e83 100644 --- a/target-arm/cpu.c +++ b/target-arm/cpu.c @@ -260,6 +260,7 @@ static void arm_cpu_initfn(Object *obj) * picky DTB consumer will also provide a helpful error message. */ cpu->dtb_compatible = "qemu,unknown"; + cpu->psci_version = 1; /* By default assume PSCI v0.1 */ cpu->kvm_target = QEMU_KVM_ARM_TARGET_NONE; if (tcg_enabled() && !inited) { diff --git a/target-arm/cpu.h b/target-arm/cpu.h index 79e7f82515..369d4727ae 100644 --- a/target-arm/cpu.h +++ b/target-arm/cpu.h @@ -430,6 +430,22 @@ int arm_cpu_handle_mmu_fault(CPUState *cpu, vaddr address, int rw, /* Execution state bits. MRS read as zero, MSR writes ignored. */ #define CPSR_EXEC (CPSR_T | CPSR_IT | CPSR_J) +#define TTBCR_N (7U << 0) /* TTBCR.EAE==0 */ +#define TTBCR_T0SZ (7U << 0) /* TTBCR.EAE==1 */ +#define TTBCR_PD0 (1U << 4) +#define TTBCR_PD1 (1U << 5) +#define TTBCR_EPD0 (1U << 7) +#define TTBCR_IRGN0 (3U << 8) +#define TTBCR_ORGN0 (3U << 10) +#define TTBCR_SH0 (3U << 12) +#define TTBCR_T1SZ (3U << 16) +#define TTBCR_A1 (1U << 22) +#define TTBCR_EPD1 (1U << 23) +#define TTBCR_IRGN1 (3U << 24) +#define TTBCR_ORGN1 (3U << 26) +#define TTBCR_SH1 (1U << 28) +#define TTBCR_EAE (1U << 31) + /* Bit definitions for ARMv8 SPSR (PSTATE) format. * Only these are valid when in AArch64 mode; in * AArch32 mode SPSRs are basically CPSR-format. diff --git a/target-arm/helper.c b/target-arm/helper.c index 050c40981b..ed4d2bb419 100644 --- a/target-arm/helper.c +++ b/target-arm/helper.c @@ -312,7 +312,7 @@ static inline bool extended_addresses_enabled(CPUARMState *env) { return arm_el_is_aa64(env, 1) || ((arm_feature(env, ARM_FEATURE_LPAE) - && (env->cp15.c2_control & (1U << 31)))); + && (env->cp15.c2_control & TTBCR_EAE))); } static void dacr_write(CPUARMState *env, const ARMCPRegInfo *ri, uint64_t value) @@ -1413,11 +1413,22 @@ static void vmsa_ttbcr_raw_write(CPUARMState *env, const ARMCPRegInfo *ri, { int maskshift = extract32(value, 0, 3); - if (arm_feature(env, ARM_FEATURE_LPAE) && (value & (1 << 31))) { - value &= ~((7 << 19) | (3 << 14) | (0xf << 3)); - } else { - value &= 7; + if (!arm_feature(env, ARM_FEATURE_V8)) { + if (arm_feature(env, ARM_FEATURE_LPAE) && (value & TTBCR_EAE)) { + /* Pre ARMv8 bits [21:19], [15:14] and [6:3] are UNK/SBZP when + * using Long-desciptor translation table format */ + value &= ~((7 << 19) | (3 << 14) | (0xf << 3)); + } else if (arm_feature(env, ARM_FEATURE_EL3)) { + /* In an implementation that includes the Security Extensions + * TTBCR has additional fields PD0 [4] and PD1 [5] for + * Short-descriptor translation table format. + */ + value &= TTBCR_PD1 | TTBCR_PD0 | TTBCR_N; + } else { + value &= TTBCR_N; + } } + /* Note that we always calculate c2_mask and c2_base_mask, but * they are only used for short-descriptor tables (ie if EAE is 0); * for long-descriptor tables the TTBCR fields are used differently @@ -3540,17 +3551,24 @@ static inline int check_ap(CPUARMState *env, int ap, int domain_prot, } } -static uint32_t get_level1_table_address(CPUARMState *env, uint32_t address) +static bool get_level1_table_address(CPUARMState *env, uint32_t *table, + uint32_t address) { - uint32_t table; - - if (address & env->cp15.c2_mask) - table = env->cp15.ttbr1_el1 & 0xffffc000; - else - table = env->cp15.ttbr0_el1 & env->cp15.c2_base_mask; - - table |= (address >> 18) & 0x3ffc; - return table; + if (address & env->cp15.c2_mask) { + if ((env->cp15.c2_control & TTBCR_PD1)) { + /* Translation table walk disabled for TTBR1 */ + return false; + } + *table = env->cp15.ttbr1_el1 & 0xffffc000; + } else { + if ((env->cp15.c2_control & TTBCR_PD0)) { + /* Translation table walk disabled for TTBR0 */ + return false; + } + *table = env->cp15.ttbr0_el1 & env->cp15.c2_base_mask; + } + *table |= (address >> 18) & 0x3ffc; + return true; } static int get_phys_addr_v5(CPUARMState *env, uint32_t address, int access_type, @@ -3563,13 +3581,17 @@ static int get_phys_addr_v5(CPUARMState *env, uint32_t address, int access_type, uint32_t desc; int type; int ap; - int domain; + int domain = 0; int domain_prot; hwaddr phys_addr; /* Pagetable walk. */ /* Lookup l1 descriptor. */ - table = get_level1_table_address(env, address); + if (!get_level1_table_address(env, &table, address)) { + /* Section translation fault if page walk is disabled by PD0 or PD1 */ + code = 5; + goto do_fault; + } desc = ldl_phys(cs->as, table); type = (desc & 3); domain = (desc >> 5) & 0x0f; @@ -3667,7 +3689,11 @@ static int get_phys_addr_v6(CPUARMState *env, uint32_t address, int access_type, /* Pagetable walk. */ /* Lookup l1 descriptor. */ - table = get_level1_table_address(env, address); + if (!get_level1_table_address(env, &table, address)) { + /* Section translation fault if page walk is disabled by PD0 or PD1 */ + code = 5; + goto do_fault; + } desc = ldl_phys(cs->as, table); type = (desc & 3); if (type == 0 || (type == 3 && !arm_feature(env, ARM_FEATURE_PXN))) { @@ -3926,7 +3952,7 @@ static int get_phys_addr_lpae(CPUARMState *env, target_ulong address, * These are basically the same thing, although the number * of bits we pull in from the vaddr varies. */ - page_size = (1 << ((granule_sz * (4 - level)) + 3)); + page_size = (1ULL << ((granule_sz * (4 - level)) + 3)); descaddr |= (address & (page_size - 1)); /* Extract attributes from the descriptor and merge with table attrs */ attrs = extract64(descriptor, 2, 10) diff --git a/target-arm/kvm.c b/target-arm/kvm.c index 39202d7eea..319784d689 100644 --- a/target-arm/kvm.c +++ b/target-arm/kvm.c @@ -27,6 +27,17 @@ const KVMCapabilityInfo kvm_arch_required_capabilities[] = { KVM_CAP_LAST_INFO }; +int kvm_arm_vcpu_init(CPUState *cs) +{ + ARMCPU *cpu = ARM_CPU(cs); + struct kvm_vcpu_init init; + + init.target = cpu->kvm_target; + memcpy(init.features, cpu->kvm_init_features, sizeof(init.features)); + + return kvm_vcpu_ioctl(cs, KVM_ARM_VCPU_INIT, &init); +} + bool kvm_arm_create_scratch_host_vcpu(const uint32_t *cpus_to_try, int *fdarray, struct kvm_vcpu_init *init) diff --git a/target-arm/kvm32.c b/target-arm/kvm32.c index b79750c57e..068af7db57 100644 --- a/target-arm/kvm32.c +++ b/target-arm/kvm32.c @@ -166,7 +166,6 @@ static int compare_u64(const void *a, const void *b) int kvm_arch_init_vcpu(CPUState *cs) { - struct kvm_vcpu_init init; int i, ret, arraylen; uint64_t v; struct kvm_one_reg r; @@ -179,15 +178,22 @@ int kvm_arch_init_vcpu(CPUState *cs) return -EINVAL; } - init.target = cpu->kvm_target; - memset(init.features, 0, sizeof(init.features)); + /* Determine init features for this CPU */ + memset(cpu->kvm_init_features, 0, sizeof(cpu->kvm_init_features)); if (cpu->start_powered_off) { - init.features[0] = 1 << KVM_ARM_VCPU_POWER_OFF; + cpu->kvm_init_features[0] |= 1 << KVM_ARM_VCPU_POWER_OFF; + } + if (kvm_check_extension(cs->kvm_state, KVM_CAP_ARM_PSCI_0_2)) { + cpu->psci_version = 2; + cpu->kvm_init_features[0] |= 1 << KVM_ARM_VCPU_PSCI_0_2; } - ret = kvm_vcpu_ioctl(cs, KVM_ARM_VCPU_INIT, &init); + + /* Do KVM_ARM_VCPU_INIT ioctl */ + ret = kvm_arm_vcpu_init(cs); if (ret) { return ret; } + /* Query the kernel to make sure it supports 32 VFP * registers: QEMU's "cortex-a15" CPU is always a * VFP-D32 core. The simplest way to do this is just diff --git a/target-arm/kvm64.c b/target-arm/kvm64.c index 70f311bed6..5d217ca2ad 100644 --- a/target-arm/kvm64.c +++ b/target-arm/kvm64.c @@ -77,9 +77,8 @@ bool kvm_arm_get_host_cpu_features(ARMHostCPUClass *ahcc) int kvm_arch_init_vcpu(CPUState *cs) { - ARMCPU *cpu = ARM_CPU(cs); - struct kvm_vcpu_init init; int ret; + ARMCPU *cpu = ARM_CPU(cs); if (cpu->kvm_target == QEMU_KVM_ARM_TARGET_NONE || !arm_feature(&cpu->env, ARM_FEATURE_AARCH64)) { @@ -87,16 +86,25 @@ int kvm_arch_init_vcpu(CPUState *cs) return -EINVAL; } - init.target = cpu->kvm_target; - memset(init.features, 0, sizeof(init.features)); + /* Determine init features for this CPU */ + memset(cpu->kvm_init_features, 0, sizeof(cpu->kvm_init_features)); if (cpu->start_powered_off) { - init.features[0] = 1 << KVM_ARM_VCPU_POWER_OFF; + cpu->kvm_init_features[0] |= 1 << KVM_ARM_VCPU_POWER_OFF; + } + if (kvm_check_extension(cs->kvm_state, KVM_CAP_ARM_PSCI_0_2)) { + cpu->psci_version = 2; + cpu->kvm_init_features[0] |= 1 << KVM_ARM_VCPU_PSCI_0_2; + } + + /* Do KVM_ARM_VCPU_INIT ioctl */ + ret = kvm_arm_vcpu_init(cs); + if (ret) { + return ret; } - ret = kvm_vcpu_ioctl(cs, KVM_ARM_VCPU_INIT, &init); /* TODO : support for save/restore/reset of system regs via tuple list */ - return ret; + return 0; } #define AARCH64_CORE_REG(x) (KVM_REG_ARM64 | KVM_REG_SIZE_U64 | \ @@ -262,4 +270,8 @@ int kvm_arch_get_registers(CPUState *cs) void kvm_arm_reset_vcpu(ARMCPU *cpu) { + /* Re-init VCPU so that all registers are set to + * their respective reset values. + */ + kvm_arm_vcpu_init(CPU(cpu)); } diff --git a/target-arm/kvm_arm.h b/target-arm/kvm_arm.h index dc4e2336fa..af93105517 100644 --- a/target-arm/kvm_arm.h +++ b/target-arm/kvm_arm.h @@ -15,6 +15,18 @@ #include "exec/memory.h" /** + * kvm_arm_vcpu_init: + * @cs: CPUState + * + * Initialize (or reinitialize) the VCPU by invoking the + * KVM_ARM_VCPU_INIT ioctl with the CPU type and feature + * bitmask specified in the CPUState. + * + * Returns: 0 if success else < 0 error code + */ +int kvm_arm_vcpu_init(CPUState *cs); + +/** * kvm_arm_register_device: * @mr: memory region for this device * @devid: the KVM device ID diff --git a/target-arm/translate-a64.c b/target-arm/translate-a64.c index 63ad787e9f..33b5025fee 100644 --- a/target-arm/translate-a64.c +++ b/target-arm/translate-a64.c @@ -6539,7 +6539,7 @@ static void handle_simd_shift_fpint_conv(DisasContext *s, bool is_scalar, tcg_shift = tcg_const_i32(fracbits); if (is_double) { - int maxpass = is_scalar ? 1 : is_q ? 2 : 1; + int maxpass = is_scalar ? 1 : 2; for (pass = 0; pass < maxpass; pass++) { TCGv_i64 tcg_op = tcg_temp_new_i64(); @@ -9052,7 +9052,8 @@ static void disas_simd_3same_int(DisasContext *s, uint32_t insn) } if (size == 3) { - for (pass = 0; pass < (is_q ? 2 : 1); pass++) { + assert(is_q); + for (pass = 0; pass < 2; pass++) { TCGv_i64 tcg_op1 = tcg_temp_new_i64(); TCGv_i64 tcg_op2 = tcg_temp_new_i64(); TCGv_i64 tcg_res = tcg_temp_new_i64(); diff --git a/target-mips/cpu.h b/target-mips/cpu.h index a9b2c7ae38..8b9a92ebdc 100644 --- a/target-mips/cpu.h +++ b/target-mips/cpu.h @@ -168,6 +168,7 @@ struct TCState { target_ulong CP0_TCSchedule; target_ulong CP0_TCScheFBack; int32_t CP0_Debug_tcstatus; + target_ulong CP0_UserLocal; }; typedef struct CPUMIPSState CPUMIPSState; @@ -362,6 +363,7 @@ struct CPUMIPSState { int32_t CP0_Config3; #define CP0C3_M 31 #define CP0C3_ISA_ON_EXC 16 +#define CP0C3_ULRI 13 #define CP0C3_DSPP 10 #define CP0C3_LPA 7 #define CP0C3_VEIC 6 @@ -470,6 +472,8 @@ struct CPUMIPSState { /* MIPS DSP resources access. */ #define MIPS_HFLAG_DSP 0x40000 /* Enable access to MIPS DSP resources. */ #define MIPS_HFLAG_DSPR2 0x80000 /* Enable access to MIPS DSPR2 resources. */ + /* Extra flag about HWREna register. */ +#define MIPS_HFLAG_HWRENA_ULR 0x100000 /* ULR bit from HWREna is set. */ target_ulong btarget; /* Jump / branch target */ target_ulong bcond; /* Branch condition (if needed) */ @@ -479,8 +483,6 @@ struct CPUMIPSState { uint32_t CP0_TCStatus_rw_bitmask; /* Read/write bits in CP0_TCStatus */ int insn_flags; /* Supported instruction set */ - target_ulong tls_value; /* For usermode emulation */ - CPU_COMMON /* Fields from here on are preserved across CPU reset. */ @@ -523,7 +525,7 @@ void mips_cpu_list (FILE *f, fprintf_function cpu_fprintf); extern void cpu_wrdsp(uint32_t rs, uint32_t mask_num, CPUMIPSState *env); extern uint32_t cpu_rddsp(uint32_t mask_num, CPUMIPSState *env); -#define CPU_SAVE_VERSION 3 +#define CPU_SAVE_VERSION 4 /* MMU modes definitions. We carefully match the indices with our hflags layout. */ @@ -682,7 +684,8 @@ static inline void cpu_get_tb_cpu_state(CPUMIPSState *env, target_ulong *pc, { *pc = env->active_tc.PC; *cs_base = 0; - *flags = env->hflags & (MIPS_HFLAG_TMASK | MIPS_HFLAG_BMASK); + *flags = env->hflags & (MIPS_HFLAG_TMASK | MIPS_HFLAG_BMASK | + MIPS_HFLAG_HWRENA_ULR); } static inline int mips_vpe_active(CPUMIPSState *env) diff --git a/target-mips/machine.c b/target-mips/machine.c index 0a07db8540..0496faa910 100644 --- a/target-mips/machine.c +++ b/target-mips/machine.c @@ -25,6 +25,7 @@ static void save_tc(QEMUFile *f, TCState *tc) qemu_put_betls(f, &tc->CP0_TCSchedule); qemu_put_betls(f, &tc->CP0_TCScheFBack); qemu_put_sbe32s(f, &tc->CP0_Debug_tcstatus); + qemu_put_betls(f, &tc->CP0_UserLocal); } static void save_fpu(QEMUFile *f, CPUMIPSFPUContext *fpu) @@ -151,7 +152,7 @@ void cpu_save(QEMUFile *f, void *opaque) save_fpu(f, &env->fpus[i]); } -static void load_tc(QEMUFile *f, TCState *tc) +static void load_tc(QEMUFile *f, TCState *tc, int version_id) { int i; @@ -173,6 +174,9 @@ static void load_tc(QEMUFile *f, TCState *tc) qemu_get_betls(f, &tc->CP0_TCSchedule); qemu_get_betls(f, &tc->CP0_TCScheFBack); qemu_get_sbe32s(f, &tc->CP0_Debug_tcstatus); + if (version_id >= 4) { + qemu_get_betls(f, &tc->CP0_UserLocal); + } } static void load_fpu(QEMUFile *f, CPUMIPSFPUContext *fpu) @@ -194,11 +198,12 @@ int cpu_load(QEMUFile *f, void *opaque, int version_id) MIPSCPU *cpu = mips_env_get_cpu(env); int i; - if (version_id != 3) + if (version_id < 3) { return -EINVAL; + } /* Load active TC */ - load_tc(f, &env->active_tc); + load_tc(f, &env->active_tc, version_id); /* Load active FPU */ load_fpu(f, &env->active_fpu); @@ -298,8 +303,9 @@ int cpu_load(QEMUFile *f, void *opaque, int version_id) qemu_get_sbe32s(f, &env->CP0_DESAVE); /* Load inactive TC state */ - for (i = 0; i < MIPS_SHADOW_SET_MAX; i++) - load_tc(f, &env->tcs[i]); + for (i = 0; i < MIPS_SHADOW_SET_MAX; i++) { + load_tc(f, &env->tcs[i], version_id); + } for (i = 0; i < MIPS_FPU_MAX; i++) load_fpu(f, &env->fpus[i]); diff --git a/target-mips/op_helper.c b/target-mips/op_helper.c index 4704216834..27651a4a00 100644 --- a/target-mips/op_helper.c +++ b/target-mips/op_helper.c @@ -1297,7 +1297,19 @@ void helper_mtc0_srsconf4(CPUMIPSState *env, target_ulong arg1) void helper_mtc0_hwrena(CPUMIPSState *env, target_ulong arg1) { - env->CP0_HWREna = arg1 & 0x0000000F; + uint32_t mask = 0x0000000F; + + if (env->CP0_Config3 & (1 << CP0C3_ULRI)) { + mask |= (1 << 29); + + if (arg1 & (1 << 29)) { + env->hflags |= MIPS_HFLAG_HWRENA_ULR; + } else { + env->hflags &= ~MIPS_HFLAG_HWRENA_ULR; + } + } + + env->CP0_HWREna = arg1 & mask; } void helper_mtc0_count(CPUMIPSState *env, target_ulong arg1) diff --git a/target-mips/translate.c b/target-mips/translate.c index d95ab9efe7..1b2bf7f790 100644 --- a/target-mips/translate.c +++ b/target-mips/translate.c @@ -1072,6 +1072,7 @@ typedef struct DisasContext { uint32_t hflags, saved_hflags; int bstate; target_ulong btarget; + bool ulri; } DisasContext; enum { @@ -4215,7 +4216,18 @@ static void gen_mfc0(DisasContext *ctx, TCGv arg, int reg, int sel) case 1: // gen_helper_mfc0_contextconfig(arg); /* SmartMIPS ASE */ rn = "ContextConfig"; + goto die; // break; + case 2: + if (ctx->ulri) { + tcg_gen_ld32s_tl(arg, cpu_env, + offsetof(CPUMIPSState, + active_tc.CP0_UserLocal)); + rn = "UserLocal"; + } else { + tcg_gen_movi_tl(arg, 0); + } + break; default: goto die; } @@ -4802,7 +4814,15 @@ static void gen_mtc0(DisasContext *ctx, TCGv arg, int reg, int sel) case 1: // gen_helper_mtc0_contextconfig(cpu_env, arg); /* SmartMIPS ASE */ rn = "ContextConfig"; + goto die; // break; + case 2: + if (ctx->ulri) { + tcg_gen_st_tl(arg, cpu_env, + offsetof(CPUMIPSState, active_tc.CP0_UserLocal)); + rn = "UserLocal"; + } + break; default: goto die; } @@ -4862,6 +4882,7 @@ static void gen_mtc0(DisasContext *ctx, TCGv arg, int reg, int sel) case 0: check_insn(ctx, ISA_MIPS32R2); gen_helper_mtc0_hwrena(cpu_env, arg); + ctx->bstate = BS_STOP; rn = "HWREna"; break; default: @@ -5406,7 +5427,17 @@ static void gen_dmfc0(DisasContext *ctx, TCGv arg, int reg, int sel) case 1: // gen_helper_dmfc0_contextconfig(arg); /* SmartMIPS ASE */ rn = "ContextConfig"; + goto die; // break; + case 2: + if (ctx->ulri) { + tcg_gen_ld_tl(arg, cpu_env, + offsetof(CPUMIPSState, active_tc.CP0_UserLocal)); + rn = "UserLocal"; + } else { + tcg_gen_movi_tl(arg, 0); + } + break; default: goto die; } @@ -5978,7 +6009,15 @@ static void gen_dmtc0(DisasContext *ctx, TCGv arg, int reg, int sel) case 1: // gen_helper_mtc0_contextconfig(cpu_env, arg); /* SmartMIPS ASE */ rn = "ContextConfig"; + goto die; // break; + case 2: + if (ctx->ulri) { + tcg_gen_st_tl(arg, cpu_env, + offsetof(CPUMIPSState, active_tc.CP0_UserLocal)); + rn = "UserLocal"; + } + break; default: goto die; } @@ -6038,6 +6077,7 @@ static void gen_dmtc0(DisasContext *ctx, TCGv arg, int reg, int sel) case 0: check_insn(ctx, ISA_MIPS32R2); gen_helper_mtc0_hwrena(cpu_env, arg); + ctx->bstate = BS_STOP; rn = "HWREna"; break; default: @@ -9060,12 +9100,20 @@ static void gen_rdhwr(DisasContext *ctx, int rt, int rd) break; case 29: #if defined(CONFIG_USER_ONLY) - tcg_gen_ld_tl(t0, cpu_env, offsetof(CPUMIPSState, tls_value)); + tcg_gen_ld_tl(t0, cpu_env, + offsetof(CPUMIPSState, active_tc.CP0_UserLocal)); gen_store_gpr(t0, rt); break; #else - /* XXX: Some CPUs implement this in hardware. - Not supported yet. */ + if ((ctx->hflags & MIPS_HFLAG_CP0) || + (ctx->hflags & MIPS_HFLAG_HWRENA_ULR)) { + tcg_gen_ld_tl(t0, cpu_env, + offsetof(CPUMIPSState, active_tc.CP0_UserLocal)); + gen_store_gpr(t0, rt); + } else { + generate_exception(ctx, EXCP_RI); + } + break; #endif default: /* Invalid */ MIPS_INVAL("rdhwr"); @@ -15609,6 +15657,7 @@ gen_intermediate_code_internal(MIPSCPU *cpu, TranslationBlock *tb, ctx.bstate = BS_NONE; /* Restore delay slot state from the tb context. */ ctx.hflags = (uint32_t)tb->flags; /* FIXME: maybe use 64 bits here? */ + ctx.ulri = env->CP0_Config3 & (1 << CP0C3_ULRI); restore_cpu_state(env, &ctx); #ifdef CONFIG_USER_ONLY ctx.mem_idx = MIPS_HFLAG_UM; diff --git a/tcg/optimize.c b/tcg/optimize.c index 16cebbe16d..34ae3c2857 100644 --- a/tcg/optimize.c +++ b/tcg/optimize.c @@ -911,12 +911,11 @@ static TCGArg *tcg_constant_folding(TCGContext *s, uint16_t *tcg_opc_ptr, break; } - /* 32-bit ops (non 64-bit ops and non load/store ops) generate - 32-bit results. For the result is zero test below, we can - ignore high bits, but for further optimizations we need to - record that the high bits contain garbage. */ + /* 32-bit ops generate 32-bit results. For the result is zero test + below, we can ignore high bits, but for further optimizations we + need to record that the high bits contain garbage. */ partmask = mask; - if (!(def->flags & (TCG_OPF_CALL_CLOBBER | TCG_OPF_64BIT))) { + if (!(def->flags & TCG_OPF_64BIT)) { mask |= ~(tcg_target_ulong)0xffffffffu; partmask &= 0xffffffffu; affected &= 0xffffffffu; diff --git a/tests/Makefile b/tests/Makefile index 361bb7b6e3..4caf7deb89 100644 --- a/tests/Makefile +++ b/tests/Makefile @@ -156,6 +156,7 @@ gcov-files-i386-y += hw/usb/hcd-ehci.c gcov-files-i386-y += hw/usb/hcd-uhci.c gcov-files-i386-y += hw/usb/dev-hid.c gcov-files-i386-y += hw/usb/dev-storage.c +#check-qtest-i386-y += tests/vhost-user-test$(EXESUF) check-qtest-x86_64-y = $(check-qtest-i386-y) gcov-files-i386-y += i386-softmmu/hw/timer/mc146818rtc.c gcov-files-x86_64-y = $(subst i386-softmmu/,x86_64-softmmu/,$(gcov-files-i386-y)) @@ -322,9 +323,12 @@ tests/es1370-test$(EXESUF): tests/es1370-test.o tests/intel-hda-test$(EXESUF): tests/intel-hda-test.o tests/ioh3420-test$(EXESUF): tests/ioh3420-test.o tests/usb-hcd-ehci-test$(EXESUF): tests/usb-hcd-ehci-test.o $(libqos-pc-obj-y) +tests/vhost-user-test$(EXESUF): tests/vhost-user-test.o qemu-char.o qemu-timer.o libqemuutil.a libqemustub.a tests/qemu-iotests/socket_scm_helper$(EXESUF): tests/qemu-iotests/socket_scm_helper.o tests/test-qemu-opts$(EXESUF): tests/test-qemu-opts.o libqemuutil.a libqemustub.a +#LIBS+= -lutil + # QTest rules TARGETS=$(patsubst %-softmmu,%, $(filter %-softmmu,$(TARGET_DIRS))) diff --git a/tests/acpi-test-data/pc/DSDT b/tests/acpi-test-data/pc/DSDT Binary files differindex d0bb3de79d..7ed03fd37e 100644 --- a/tests/acpi-test-data/pc/DSDT +++ b/tests/acpi-test-data/pc/DSDT diff --git a/tests/acpi-test-data/pc/SSDT b/tests/acpi-test-data/pc/SSDT Binary files differindex c987fb2379..eb2d8b698c 100644 --- a/tests/acpi-test-data/pc/SSDT +++ b/tests/acpi-test-data/pc/SSDT diff --git a/tests/acpi-test-data/q35/DSDT b/tests/acpi-test-data/q35/DSDT Binary files differindex fc5b970009..2d2bc4adaf 100644 --- a/tests/acpi-test-data/q35/DSDT +++ b/tests/acpi-test-data/q35/DSDT diff --git a/tests/acpi-test-data/q35/SSDT b/tests/acpi-test-data/q35/SSDT Binary files differindex 9199638757..778b79bf42 100644 --- a/tests/acpi-test-data/q35/SSDT +++ b/tests/acpi-test-data/q35/SSDT diff --git a/tests/test-string-input-visitor.c b/tests/test-string-input-visitor.c index 877e737714..8e3433e0c7 100644 --- a/tests/test-string-input-visitor.c +++ b/tests/test-string-input-visitor.c @@ -64,6 +64,33 @@ static void test_visitor_in_int(TestInputVisitorData *data, g_assert_cmpint(res, ==, value); } +static void test_visitor_in_intList(TestInputVisitorData *data, + const void *unused) +{ + int64_t value[] = {0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 20}; + int16List *res = NULL, *tmp; + Visitor *v; + int i = 0; + + v = visitor_input_test_init(data, "1,2,0,2-4,20,5-9,1-8"); + + visit_type_int16List(v, &res, NULL, &error_abort); + tmp = res; + while (i < sizeof(value) / sizeof(value[0])) { + g_assert(tmp); + g_assert_cmpint(tmp->value, ==, value[i++]); + tmp = tmp->next; + } + g_assert(!tmp); + + tmp = res; + while (tmp) { + res = res->next; + g_free(tmp); + tmp = res; + } +} + static void test_visitor_in_bool(TestInputVisitorData *data, const void *unused) { @@ -170,6 +197,7 @@ static void test_visitor_in_fuzz(TestInputVisitorData *data, const void *unused) { int64_t ires; + intList *ilres; bool bres; double nres; char *sres; @@ -193,6 +221,11 @@ static void test_visitor_in_fuzz(TestInputVisitorData *data, v = visitor_input_test_init(data, buf); visit_type_int(v, &ires, NULL, NULL); + visitor_input_teardown(data, NULL); + + v = visitor_input_test_init(data, buf); + visit_type_intList(v, &ilres, NULL, NULL); + visitor_input_teardown(data, NULL); v = visitor_input_test_init(data, buf); visit_type_bool(v, &bres, NULL, NULL); @@ -200,11 +233,13 @@ static void test_visitor_in_fuzz(TestInputVisitorData *data, v = visitor_input_test_init(data, buf); visit_type_number(v, &nres, NULL, NULL); + visitor_input_teardown(data, NULL); v = visitor_input_test_init(data, buf); sres = NULL; visit_type_str(v, &sres, NULL, NULL); g_free(sres); + visitor_input_teardown(data, NULL); v = visitor_input_test_init(data, buf); visit_type_EnumOne(v, &eres, NULL, NULL); @@ -228,6 +263,8 @@ int main(int argc, char **argv) input_visitor_test_add("/string-visitor/input/int", &in_visitor_data, test_visitor_in_int); + input_visitor_test_add("/string-visitor/input/intList", + &in_visitor_data, test_visitor_in_intList); input_visitor_test_add("/string-visitor/input/bool", &in_visitor_data, test_visitor_in_bool); input_visitor_test_add("/string-visitor/input/number", diff --git a/tests/test-string-output-visitor.c b/tests/test-string-output-visitor.c index 2af5a21ab5..28e7359a2a 100644 --- a/tests/test-string-output-visitor.c +++ b/tests/test-string-output-visitor.c @@ -44,7 +44,7 @@ static void visitor_output_teardown(TestOutputVisitorData *data, static void test_visitor_out_int(TestOutputVisitorData *data, const void *unused) { - int64_t value = -42; + int64_t value = 42; Error *err = NULL; char *str; @@ -53,10 +53,42 @@ static void test_visitor_out_int(TestOutputVisitorData *data, str = string_output_get_string(data->sov); g_assert(str != NULL); - g_assert_cmpstr(str, ==, "-42"); + g_assert_cmpstr(str, ==, "42"); g_free(str); } +static void test_visitor_out_intList(TestOutputVisitorData *data, + const void *unused) +{ + int64_t value[] = {0, 1, 9, 10, 16, 15, 14, + 3, 4, 5, 6, 11, 12, 13, 21, 22, INT64_MAX - 1, INT64_MAX}; + intList *list = NULL, **tmp = &list; + int i; + Error *errp = NULL; + char *str; + + for (i = 0; i < sizeof(value) / sizeof(value[0]); i++) { + *tmp = g_malloc0(sizeof(**tmp)); + (*tmp)->value = value[i]; + tmp = &(*tmp)->next; + } + + visit_type_intList(data->ov, &list, NULL, &errp); + g_assert(errp == NULL); + + str = string_output_get_string(data->sov); + g_assert(str != NULL); + g_assert_cmpstr(str, ==, + "0-1,3-6,9-16,21-22,9223372036854775806-9223372036854775807"); + g_free(str); + while (list) { + intList *tmp2; + tmp2 = list->next; + g_free(list); + list = tmp2; + } +} + static void test_visitor_out_bool(TestOutputVisitorData *data, const void *unused) { @@ -182,6 +214,8 @@ int main(int argc, char **argv) &out_visitor_data, test_visitor_out_enum); output_visitor_test_add("/string-visitor/output/enum-errors", &out_visitor_data, test_visitor_out_enum_errors); + output_visitor_test_add("/string-visitor/output/intList", + &out_visitor_data, test_visitor_out_intList); g_test_run(); diff --git a/tests/vhost-user-test.c b/tests/vhost-user-test.c new file mode 100644 index 0000000000..7c826b49e5 --- /dev/null +++ b/tests/vhost-user-test.c @@ -0,0 +1,312 @@ +/* + * QTest testcase for the vhost-user + * + * Copyright (c) 2014 Virtual Open Systems Sarl. + * + * This work is licensed under the terms of the GNU GPL, version 2 or later. + * See the COPYING file in the top-level directory. + * + */ + +#include "libqtest.h" +#include "qemu/option.h" +#include "sysemu/char.h" +#include "sysemu/sysemu.h" + +#include <glib.h> +#include <linux/vhost.h> +#include <sys/mman.h> +#include <sys/vfs.h> +#include <qemu/sockets.h> + +#define QEMU_CMD_ACCEL " -machine accel=tcg" +#define QEMU_CMD_MEM " -m 512 -object memory-backend-file,id=mem,size=512M,"\ + "mem-path=%s,share=on -numa node,memdev=mem" +#define QEMU_CMD_CHR " -chardev socket,id=chr0,path=%s" +#define QEMU_CMD_NETDEV " -netdev vhost-user,id=net0,chardev=chr0,vhostforce" +#define QEMU_CMD_NET " -device virtio-net-pci,netdev=net0 " +#define QEMU_CMD_ROM " -option-rom ../pc-bios/pxe-virtio.rom" + +#define QEMU_CMD QEMU_CMD_ACCEL QEMU_CMD_MEM QEMU_CMD_CHR \ + QEMU_CMD_NETDEV QEMU_CMD_NET QEMU_CMD_ROM + +#define HUGETLBFS_MAGIC 0x958458f6 + +/*********** FROM hw/virtio/vhost-user.c *************************************/ + +#define VHOST_MEMORY_MAX_NREGIONS 8 + +typedef enum VhostUserRequest { + VHOST_USER_NONE = 0, + VHOST_USER_GET_FEATURES = 1, + VHOST_USER_SET_FEATURES = 2, + VHOST_USER_SET_OWNER = 3, + VHOST_USER_RESET_OWNER = 4, + VHOST_USER_SET_MEM_TABLE = 5, + VHOST_USER_SET_LOG_BASE = 6, + VHOST_USER_SET_LOG_FD = 7, + VHOST_USER_SET_VRING_NUM = 8, + VHOST_USER_SET_VRING_ADDR = 9, + VHOST_USER_SET_VRING_BASE = 10, + VHOST_USER_GET_VRING_BASE = 11, + VHOST_USER_SET_VRING_KICK = 12, + VHOST_USER_SET_VRING_CALL = 13, + VHOST_USER_SET_VRING_ERR = 14, + VHOST_USER_MAX +} VhostUserRequest; + +typedef struct VhostUserMemoryRegion { + uint64_t guest_phys_addr; + uint64_t memory_size; + uint64_t userspace_addr; +} VhostUserMemoryRegion; + +typedef struct VhostUserMemory { + uint32_t nregions; + uint32_t padding; + VhostUserMemoryRegion regions[VHOST_MEMORY_MAX_NREGIONS]; +} VhostUserMemory; + +typedef struct VhostUserMsg { + VhostUserRequest request; + +#define VHOST_USER_VERSION_MASK (0x3) +#define VHOST_USER_REPLY_MASK (0x1<<2) + uint32_t flags; + uint32_t size; /* the following payload size */ + union { + uint64_t u64; + struct vhost_vring_state state; + struct vhost_vring_addr addr; + VhostUserMemory memory; + }; +} QEMU_PACKED VhostUserMsg; + +static VhostUserMsg m __attribute__ ((unused)); +#define VHOST_USER_HDR_SIZE (sizeof(m.request) \ + + sizeof(m.flags) \ + + sizeof(m.size)) + +#define VHOST_USER_PAYLOAD_SIZE (sizeof(m) - VHOST_USER_HDR_SIZE) + +/* The version of the protocol we support */ +#define VHOST_USER_VERSION (0x1) +/*****************************************************************************/ + +int fds_num = 0, fds[VHOST_MEMORY_MAX_NREGIONS]; +static VhostUserMemory memory; +static GMutex data_mutex; +static GCond data_cond; + +static void read_guest_mem(void) +{ + uint32_t *guest_mem; + gint64 end_time; + int i, j; + + g_mutex_lock(&data_mutex); + + end_time = g_get_monotonic_time() + 5 * G_TIME_SPAN_SECOND; + while (!fds_num) { + if (!g_cond_wait_until(&data_cond, &data_mutex, end_time)) { + /* timeout has passed */ + g_assert(fds_num); + break; + } + } + + /* check for sanity */ + g_assert_cmpint(fds_num, >, 0); + g_assert_cmpint(fds_num, ==, memory.nregions); + + /* iterate all regions */ + for (i = 0; i < fds_num; i++) { + + /* We'll check only the region statring at 0x0*/ + if (memory.regions[i].guest_phys_addr != 0x0) { + continue; + } + + g_assert_cmpint(memory.regions[i].memory_size, >, 1024); + + guest_mem = mmap(0, memory.regions[i].memory_size, + PROT_READ | PROT_WRITE, MAP_SHARED, fds[i], 0); + + for (j = 0; j < 256; j++) { + uint32_t a = readl(memory.regions[i].guest_phys_addr + j*4); + uint32_t b = guest_mem[j]; + + g_assert_cmpint(a, ==, b); + } + + munmap(guest_mem, memory.regions[i].memory_size); + } + + g_assert_cmpint(1, ==, 1); + g_mutex_unlock(&data_mutex); +} + +static void *thread_function(void *data) +{ + GMainLoop *loop; + loop = g_main_loop_new(NULL, FALSE); + g_main_loop_run(loop); + return NULL; +} + +static int chr_can_read(void *opaque) +{ + return VHOST_USER_HDR_SIZE; +} + +static void chr_read(void *opaque, const uint8_t *buf, int size) +{ + CharDriverState *chr = opaque; + VhostUserMsg msg; + uint8_t *p = (uint8_t *) &msg; + int fd; + + if (size != VHOST_USER_HDR_SIZE) { + g_test_message("Wrong message size received %d\n", size); + return; + } + + memcpy(p, buf, VHOST_USER_HDR_SIZE); + + if (msg.size) { + p += VHOST_USER_HDR_SIZE; + qemu_chr_fe_read_all(chr, p, msg.size); + } + + switch (msg.request) { + case VHOST_USER_GET_FEATURES: + /* send back features to qemu */ + msg.flags |= VHOST_USER_REPLY_MASK; + msg.size = sizeof(m.u64); + msg.u64 = 0; + p = (uint8_t *) &msg; + qemu_chr_fe_write_all(chr, p, VHOST_USER_HDR_SIZE + msg.size); + break; + + case VHOST_USER_GET_VRING_BASE: + /* send back vring base to qemu */ + msg.flags |= VHOST_USER_REPLY_MASK; + msg.size = sizeof(m.state); + msg.state.num = 0; + p = (uint8_t *) &msg; + qemu_chr_fe_write_all(chr, p, VHOST_USER_HDR_SIZE + msg.size); + break; + + case VHOST_USER_SET_MEM_TABLE: + /* received the mem table */ + memcpy(&memory, &msg.memory, sizeof(msg.memory)); + fds_num = qemu_chr_fe_get_msgfds(chr, fds, sizeof(fds) / sizeof(int)); + + /* signal the test that it can continue */ + g_cond_signal(&data_cond); + g_mutex_unlock(&data_mutex); + break; + + case VHOST_USER_SET_VRING_KICK: + case VHOST_USER_SET_VRING_CALL: + /* consume the fd */ + qemu_chr_fe_get_msgfds(chr, &fd, 1); + /* + * This is a non-blocking eventfd. + * The receive function forces it to be blocking, + * so revert it back to non-blocking. + */ + qemu_set_nonblock(fd); + break; + default: + break; + } +} + +static const char *init_hugepagefs(void) +{ + const char *path; + struct statfs fs; + int ret; + + path = getenv("QTEST_HUGETLBFS_PATH"); + if (!path) { + path = "/hugetlbfs"; + } + + if (access(path, R_OK | W_OK | X_OK)) { + g_test_message("access on path (%s): %s\n", path, strerror(errno)); + return NULL; + } + + do { + ret = statfs(path, &fs); + } while (ret != 0 && errno == EINTR); + + if (ret != 0) { + g_test_message("statfs on path (%s): %s\n", path, strerror(errno)); + return NULL; + } + + if (fs.f_type != HUGETLBFS_MAGIC) { + g_test_message("Warning: path not on HugeTLBFS: %s\n", path); + return NULL; + } + + return path; +} + +int main(int argc, char **argv) +{ + QTestState *s = NULL; + CharDriverState *chr = NULL; + const char *hugefs = 0; + char *socket_path = 0; + char *qemu_cmd = 0; + char *chr_path = 0; + int ret; + + g_test_init(&argc, &argv, NULL); + + module_call_init(MODULE_INIT_QOM); + + hugefs = init_hugepagefs(); + if (!hugefs) { + return 0; + } + + socket_path = g_strdup_printf("/tmp/vhost-%d.sock", getpid()); + + /* create char dev and add read handlers */ + qemu_add_opts(&qemu_chardev_opts); + chr_path = g_strdup_printf("unix:%s,server,nowait", socket_path); + chr = qemu_chr_new("chr0", chr_path, NULL); + g_free(chr_path); + qemu_chr_add_handlers(chr, chr_can_read, chr_read, NULL, chr); + + /* run the main loop thread so the chardev may operate */ + g_mutex_init(&data_mutex); + g_cond_init(&data_cond); + g_mutex_lock(&data_mutex); + g_thread_new(NULL, thread_function, NULL); + + qemu_cmd = g_strdup_printf(QEMU_CMD, hugefs, socket_path); + s = qtest_start(qemu_cmd); + g_free(qemu_cmd); + + qtest_add_func("/vhost-user/read-guest-mem", read_guest_mem); + + ret = g_test_run(); + + if (s) { + qtest_quit(s); + } + + /* cleanup */ + unlink(socket_path); + g_free(socket_path); + g_cond_clear(&data_cond); + g_mutex_clear(&data_mutex); + + return ret; +} diff --git a/trace-events b/trace-events index f8dff485b2..ba01ad52cf 100644 --- a/trace-events +++ b/trace-events @@ -1272,6 +1272,23 @@ xen_pv_mmio_write(uint64_t addr) "WARNING: write to Xen PV Device MMIO space (ad pci_cfg_read(const char *dev, unsigned devid, unsigned fnid, unsigned offs, unsigned val) "%s %02u:%u @0x%x -> 0x%x" pci_cfg_write(const char *dev, unsigned devid, unsigned fnid, unsigned offs, unsigned val) "%s %02u:%u @0x%x <- 0x%x" +#hw/acpi/memory_hotplug.c +mhp_acpi_invalid_slot_selected(uint32_t slot) "0x%"PRIx32 +mhp_acpi_read_addr_lo(uint32_t slot, uint32_t addr) "slot[0x%"PRIx32"] addr lo: 0x%"PRIx32 +mhp_acpi_read_addr_hi(uint32_t slot, uint32_t addr) "slot[0x%"PRIx32"] addr hi: 0x%"PRIx32 +mhp_acpi_read_size_lo(uint32_t slot, uint32_t size) "slot[0x%"PRIx32"] size lo: 0x%"PRIx32 +mhp_acpi_read_size_hi(uint32_t slot, uint32_t size) "slot[0x%"PRIx32"] size hi: 0x%"PRIx32 +mhp_acpi_read_pxm(uint32_t slot, uint32_t pxm) "slot[0x%"PRIx32"] proximity: 0x%"PRIx32 +mhp_acpi_read_flags(uint32_t slot, uint32_t flags) "slot[0x%"PRIx32"] flags: 0x%"PRIx32 +mhp_acpi_write_slot(uint32_t slot) "set active slot: 0x%"PRIx32 +mhp_acpi_write_ost_ev(uint32_t slot, uint32_t ev) "slot[0x%"PRIx32"] OST EVENT: 0x%"PRIx32 +mhp_acpi_write_ost_status(uint32_t slot, uint32_t st) "slot[0x%"PRIx32"] OST STATUS: 0x%"PRIx32 +mhp_acpi_clear_insert_evt(uint32_t slot) "slot[0x%"PRIx32"] clear insert event" + +#hw/i386/pc.c +mhp_pc_dimm_assigned_slot(int slot) "0x%d" +mhp_pc_dimm_assigned_address(uint64_t addr) "0x%"PRIx64 + # target-s390x/kvm.c kvm_enable_cmma(int rc) "CMMA: enabling with result code %d" kvm_clear_cmma(int rc) "CMMA: clearing with result code %d" diff --git a/translate-all.c b/translate-all.c index 6b7b46e761..5425d038d9 100644 --- a/translate-all.c +++ b/translate-all.c @@ -295,14 +295,7 @@ void page_size_init(void) { /* NOTE: we can always suppose that qemu_host_page_size >= TARGET_PAGE_SIZE */ -#ifdef _WIN32 - SYSTEM_INFO system_info; - - GetSystemInfo(&system_info); - qemu_real_host_page_size = system_info.dwPageSize; -#else qemu_real_host_page_size = getpagesize(); -#endif if (qemu_host_page_size == 0) { qemu_host_page_size = qemu_real_host_page_size; } diff --git a/ui/spice-display.c b/ui/spice-display.c index 03040b157f..66e25788ce 100644 --- a/ui/spice-display.c +++ b/ui/spice-display.c @@ -534,7 +534,7 @@ static void interface_release_resource(QXLInstance *sin, QXLCommandExt *ext; dprint(2, "%s/%d:\n", __func__, ssd->qxl.id); - ext = (void *)(rext.info->id); + ext = (void *)(intptr_t)(rext.info->id); switch (ext->cmd.type) { case QXL_CMD_DRAW: update = container_of(ext, SimpleSpiceUpdate, ext); diff --git a/ui/vnc-tls.c b/ui/vnc-tls.c index 50275de64f..63923265fd 100644 --- a/ui/vnc-tls.c +++ b/ui/vnc-tls.c @@ -443,10 +443,8 @@ static int vnc_set_x509_credential(VncDisplay *vd, { struct stat sb; - if (*cred) { - g_free(*cred); - *cred = NULL; - } + g_free(*cred); + *cred = NULL; *cred = g_malloc(strlen(certdir) + strlen(filename) + 2); @@ -935,6 +935,9 @@ static int vnc_update_client(VncState *vs, int has_dirty, bool sync) } vnc_job_push(job); + if (sync) { + vnc_jobs_join(vs); + } vs->force_update = 0; return n; } @@ -2972,10 +2975,8 @@ static void vnc_display_close(DisplayState *ds) if (!vs) return; - if (vs->display) { - g_free(vs->display); - vs->display = NULL; - } + g_free(vs->display); + vs->display = NULL; if (vs->lsock != -1) { qemu_set_fd_handler2(vs->lsock, NULL, NULL, NULL, NULL); close(vs->lsock); @@ -3010,13 +3011,8 @@ int vnc_display_password(DisplayState *ds, const char *password) return -EINVAL; } - if (vs->password) { - g_free(vs->password); - vs->password = NULL; - } - if (password) { - vs->password = g_strdup(password); - } + g_free(vs->password); + vs->password = g_strdup(password); return 0; } diff --git a/util/iov.c b/util/iov.c index 49f88388f8..2b4f46da75 100644 --- a/util/iov.c +++ b/util/iov.c @@ -295,15 +295,15 @@ void qemu_iovec_add(QEMUIOVector *qiov, void *base, size_t len) * of src". * Only vector pointers are processed, not the actual data buffers. */ -void qemu_iovec_concat_iov(QEMUIOVector *dst, - struct iovec *src_iov, unsigned int src_cnt, - size_t soffset, size_t sbytes) +size_t qemu_iovec_concat_iov(QEMUIOVector *dst, + struct iovec *src_iov, unsigned int src_cnt, + size_t soffset, size_t sbytes) { int i; size_t done; if (!sbytes) { - return; + return 0; } assert(dst->nalloc != -1); for (i = 0, done = 0; done < sbytes && i < src_cnt; i++) { @@ -317,6 +317,8 @@ void qemu_iovec_concat_iov(QEMUIOVector *dst, } } assert(soffset == 0); /* offset beyond end of src */ + + return done; } /* diff --git a/util/oslib-posix.c b/util/oslib-posix.c index 8e9c770d28..1524ead755 100644 --- a/util/oslib-posix.c +++ b/util/oslib-posix.c @@ -46,6 +46,7 @@ extern int daemon(int, int); #else # define QEMU_VMALLOC_ALIGN getpagesize() #endif +#define HUGETLBFS_MAGIC 0x958458f6 #include <termios.h> #include <unistd.h> @@ -58,9 +59,12 @@ extern int daemon(int, int); #include "qemu/sockets.h" #include <sys/mman.h> #include <libgen.h> +#include <setjmp.h> +#include <sys/signal.h> #ifdef CONFIG_LINUX #include <sys/syscall.h> +#include <sys/vfs.h> #endif #ifdef __FreeBSD__ @@ -332,3 +336,72 @@ char *qemu_get_exec_dir(void) { return g_strdup(exec_dir); } + +static sigjmp_buf sigjump; + +static void sigbus_handler(int signal) +{ + siglongjmp(sigjump, 1); +} + +static size_t fd_getpagesize(int fd) +{ +#ifdef CONFIG_LINUX + struct statfs fs; + int ret; + + if (fd != -1) { + do { + ret = fstatfs(fd, &fs); + } while (ret != 0 && errno == EINTR); + + if (ret == 0 && fs.f_type == HUGETLBFS_MAGIC) { + return fs.f_bsize; + } + } +#endif + + return getpagesize(); +} + +void os_mem_prealloc(int fd, char *area, size_t memory) +{ + int ret, i; + struct sigaction act, oldact; + sigset_t set, oldset; + size_t hpagesize = fd_getpagesize(fd); + + memset(&act, 0, sizeof(act)); + act.sa_handler = &sigbus_handler; + act.sa_flags = 0; + + ret = sigaction(SIGBUS, &act, &oldact); + if (ret) { + perror("os_mem_prealloc: failed to install signal handler"); + exit(1); + } + + /* unblock SIGBUS */ + sigemptyset(&set); + sigaddset(&set, SIGBUS); + pthread_sigmask(SIG_UNBLOCK, &set, &oldset); + + if (sigsetjmp(sigjump, 1)) { + fprintf(stderr, "os_mem_prealloc: failed to preallocate pages\n"); + exit(1); + } + + /* MAP_POPULATE silently ignores failures */ + memory = (memory + hpagesize - 1) & -hpagesize; + for (i = 0; i < (memory/hpagesize); i++) { + memset(area + (hpagesize*i), 0, 1); + } + + ret = sigaction(SIGBUS, &oldact, NULL); + if (ret) { + perror("os_mem_prealloc: failed to reinstall signal handler"); + exit(1); + } + + pthread_sigmask(SIG_SETMASK, &oldset, NULL); +} diff --git a/util/oslib-win32.c b/util/oslib-win32.c index 69552f7ec3..507cedd84d 100644 --- a/util/oslib-win32.c +++ b/util/oslib-win32.c @@ -24,6 +24,10 @@ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN * THE SOFTWARE. + * + * The implementation of g_poll (functions poll_rest, g_poll) at the end of + * this file are based on code from GNOME glib-2 and use a different license, + * see the license comment there. */ #include <windows.h> #include <glib.h> @@ -138,7 +142,7 @@ int inet_aton(const char *cp, struct in_addr *ia) { uint32_t addr = inet_addr(cp); if (addr == 0xffffffff) { - return 0; + return 0; } ia->s_addr = addr; return 1; @@ -240,113 +244,224 @@ char *qemu_get_exec_dir(void) } /* - * g_poll has a problem on Windows when using - * timeouts < 10ms, in glib/gpoll.c: + * The original implementation of g_poll from glib has a problem on Windows + * when using timeouts < 10 ms. + * + * Whenever g_poll is called with timeout < 10 ms, it does a quick poll instead + * of wait. This causes significant performance degradation of QEMU. + * + * The following code is a copy of the original code from glib/gpoll.c + * (glib commit 20f4d1820b8d4d0fc4447188e33efffd6d4a88d8 from 2014-02-19). + * Some debug code was removed and the code was reformatted. + * All other code modifications are marked with 'QEMU'. + */ + +/* + * gpoll.c: poll(2) abstraction + * Copyright 1998 Owen Taylor + * Copyright 2008 Red Hat, Inc. * - * // If not, and we have a significant timeout, poll again with - * // timeout then. Note that this will return indication for only - * // one event, or only for messages. We ignore timeouts less than - * // ten milliseconds as they are mostly pointless on Windows, the - * // MsgWaitForMultipleObjectsEx() call will timeout right away - * // anyway. + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2 of the License, or (at your option) any later version. * - * if (retval == 0 && (timeout == INFINITE || timeout >= 10)) - * retval = poll_rest (poll_msgs, handles, nhandles, fds, nfds, timeout); + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. * - * So whenever g_poll is called with timeout < 10ms it does - * a quick poll instead of wait, this causes significant performance - * degradation of QEMU, thus we should use WaitForMultipleObjectsEx - * directly + * You should have received a copy of the GNU Lesser General Public + * License along with this library; if not, see <http://www.gnu.org/licenses/>. */ -gint g_poll_fixed(GPollFD *fds, guint nfds, gint timeout) + +static int poll_rest(gboolean poll_msgs, HANDLE *handles, gint nhandles, + GPollFD *fds, guint nfds, gint timeout) { - guint i; - HANDLE handles[MAXIMUM_WAIT_OBJECTS]; - gint nhandles = 0; - int num_completed = 0; + DWORD ready; + GPollFD *f; + int recursed_result; + + if (poll_msgs) { + /* Wait for either messages or handles + * -> Use MsgWaitForMultipleObjectsEx + */ + ready = MsgWaitForMultipleObjectsEx(nhandles, handles, timeout, + QS_ALLINPUT, MWMO_ALERTABLE); + + if (ready == WAIT_FAILED) { + gchar *emsg = g_win32_error_message(GetLastError()); + g_warning("MsgWaitForMultipleObjectsEx failed: %s", emsg); + g_free(emsg); + } + } else if (nhandles == 0) { + /* No handles to wait for, just the timeout */ + if (timeout == INFINITE) { + ready = WAIT_FAILED; + } else { + SleepEx(timeout, TRUE); + ready = WAIT_TIMEOUT; + } + } else { + /* Wait for just handles + * -> Use WaitForMultipleObjectsEx + */ + ready = + WaitForMultipleObjectsEx(nhandles, handles, FALSE, timeout, TRUE); + if (ready == WAIT_FAILED) { + gchar *emsg = g_win32_error_message(GetLastError()); + g_warning("WaitForMultipleObjectsEx failed: %s", emsg); + g_free(emsg); + } + } - for (i = 0; i < nfds; i++) { - gint j; + if (ready == WAIT_FAILED) { + return -1; + } else if (ready == WAIT_TIMEOUT || ready == WAIT_IO_COMPLETION) { + return 0; + } else if (poll_msgs && ready == WAIT_OBJECT_0 + nhandles) { + for (f = fds; f < &fds[nfds]; ++f) { + if (f->fd == G_WIN32_MSG_HANDLE && f->events & G_IO_IN) { + f->revents |= G_IO_IN; + } + } - if (fds[i].fd <= 0) { - continue; + /* If we have a timeout, or no handles to poll, be satisfied + * with just noticing we have messages waiting. + */ + if (timeout != 0 || nhandles == 0) { + return 1; } - /* don't add same handle several times + /* If no timeout and handles to poll, recurse to poll them, + * too. */ - for (j = 0; j < nhandles; j++) { - if (handles[j] == (HANDLE)fds[i].fd) { - break; + recursed_result = poll_rest(FALSE, handles, nhandles, fds, nfds, 0); + return (recursed_result == -1) ? -1 : 1 + recursed_result; + } else if (/* QEMU: removed the following unneeded statement which causes + * a compiler warning: ready >= WAIT_OBJECT_0 && */ + ready < WAIT_OBJECT_0 + nhandles) { + for (f = fds; f < &fds[nfds]; ++f) { + if ((HANDLE) f->fd == handles[ready - WAIT_OBJECT_0]) { + f->revents = f->events; } } - if (j == nhandles) { - if (nhandles == MAXIMUM_WAIT_OBJECTS) { - fprintf(stderr, "Too many handles to wait for!\n"); - break; - } else { - handles[nhandles++] = (HANDLE)fds[i].fd; + /* If no timeout and polling several handles, recurse to poll + * the rest of them. + */ + if (timeout == 0 && nhandles > 1) { + /* Remove the handle that fired */ + int i; + if (ready < nhandles - 1) { + for (i = ready - WAIT_OBJECT_0 + 1; i < nhandles; i++) { + handles[i-1] = handles[i]; + } } + nhandles--; + recursed_result = poll_rest(FALSE, handles, nhandles, fds, nfds, 0); + return (recursed_result == -1) ? -1 : 1 + recursed_result; } + return 1; } - for (i = 0; i < nfds; ++i) { - fds[i].revents = 0; + return 0; +} + +gint g_poll(GPollFD *fds, guint nfds, gint timeout) +{ + HANDLE handles[MAXIMUM_WAIT_OBJECTS]; + gboolean poll_msgs = FALSE; + GPollFD *f; + gint nhandles = 0; + int retval; + + for (f = fds; f < &fds[nfds]; ++f) { + if (f->fd == G_WIN32_MSG_HANDLE && (f->events & G_IO_IN)) { + poll_msgs = TRUE; + } else if (f->fd > 0) { + /* Don't add the same handle several times into the array, as + * docs say that is not allowed, even if it actually does seem + * to work. + */ + gint i; + + for (i = 0; i < nhandles; i++) { + if (handles[i] == (HANDLE) f->fd) { + break; + } + } + + if (i == nhandles) { + if (nhandles == MAXIMUM_WAIT_OBJECTS) { + g_warning("Too many handles to wait for!\n"); + break; + } else { + handles[nhandles++] = (HANDLE) f->fd; + } + } + } + } + + for (f = fds; f < &fds[nfds]; ++f) { + f->revents = 0; } if (timeout == -1) { timeout = INFINITE; } - if (nhandles == 0) { - if (timeout == INFINITE) { - return -1; - } else { - SleepEx(timeout, TRUE); - return 0; + /* Polling for several things? */ + if (nhandles > 1 || (nhandles > 0 && poll_msgs)) { + /* First check if one or several of them are immediately + * available + */ + retval = poll_rest(poll_msgs, handles, nhandles, fds, nfds, 0); + + /* If not, and we have a significant timeout, poll again with + * timeout then. Note that this will return indication for only + * one event, or only for messages. We ignore timeouts less than + * ten milliseconds as they are mostly pointless on Windows, the + * MsgWaitForMultipleObjectsEx() call will timeout right away + * anyway. + * + * Modification for QEMU: replaced timeout >= 10 by timeout > 0. + */ + if (retval == 0 && (timeout == INFINITE || timeout > 0)) { + retval = poll_rest(poll_msgs, handles, nhandles, + fds, nfds, timeout); } + } else { + /* Just polling for one thing, so no need to check first if + * available immediately + */ + retval = poll_rest(poll_msgs, handles, nhandles, fds, nfds, timeout); } - while (1) { - DWORD res; - gint j; - - res = WaitForMultipleObjectsEx(nhandles, handles, FALSE, - timeout, TRUE); - - if (res == WAIT_FAILED) { - for (i = 0; i < nfds; ++i) { - fds[i].revents = 0; - } - - return -1; - } else if ((res == WAIT_TIMEOUT) || (res == WAIT_IO_COMPLETION) || - ((int)res < (int)WAIT_OBJECT_0) || - (res >= (WAIT_OBJECT_0 + nhandles))) { - break; + if (retval == -1) { + for (f = fds; f < &fds[nfds]; ++f) { + f->revents = 0; } + } - for (i = 0; i < nfds; ++i) { - if (handles[res - WAIT_OBJECT_0] == (HANDLE)fds[i].fd) { - fds[i].revents = fds[i].events; - } - } + return retval; +} - ++num_completed; +size_t getpagesize(void) +{ + SYSTEM_INFO system_info; - if (nhandles <= 1) { - break; - } + GetSystemInfo(&system_info); + return system_info.dwPageSize; +} - /* poll the rest of the handles - */ - for (j = res - WAIT_OBJECT_0 + 1; j < nhandles; j++) { - handles[j - 1] = handles[j]; - } - --nhandles; +void os_mem_prealloc(int fd, char *area, size_t memory) +{ + int i; + size_t pagesize = getpagesize(); - timeout = 0; + memory = (memory + pagesize - 1) & -pagesize; + for (i = 0; i < memory / pagesize; i++) { + memset(area + pagesize * i, 0, 1); } - - return num_completed; } @@ -116,7 +116,7 @@ int main(int argc, char **argv) #include "ui/qemu-spice.h" #include "qapi/string-input-visitor.h" -#include "qom/object_interfaces.h" +#include "qapi/opts-visitor.h" #define DEFAULT_RAM_SIZE 128 @@ -195,8 +195,7 @@ static QTAILQ_HEAD(, FWBootEntry) fw_boot_order = QTAILQ_HEAD_INITIALIZER(fw_boot_order); int nb_numa_nodes; -uint64_t node_mem[MAX_NODES]; -unsigned long *node_cpumask[MAX_NODES]; +NodeInfo numa_info[MAX_NODES]; uint8_t qemu_uuid[16]; bool qemu_uuid_set; @@ -520,6 +519,14 @@ static QemuOptsList qemu_mem_opts = { .name = "size", .type = QEMU_OPT_SIZE, }, + { + .name = "slots", + .type = QEMU_OPT_NUMBER, + }, + { + .name = "maxmem", + .type = QEMU_OPT_SIZE, + }, { /* end of list */ } }, }; @@ -1267,102 +1274,6 @@ char *get_boot_devices_list(size_t *size, bool ignore_suffixes) return list; } -static void numa_node_parse_cpus(int nodenr, const char *cpus) -{ - char *endptr; - unsigned long long value, endvalue; - - /* Empty CPU range strings will be considered valid, they will simply - * not set any bit in the CPU bitmap. - */ - if (!*cpus) { - return; - } - - if (parse_uint(cpus, &value, &endptr, 10) < 0) { - goto error; - } - if (*endptr == '-') { - if (parse_uint_full(endptr + 1, &endvalue, 10) < 0) { - goto error; - } - } else if (*endptr == '\0') { - endvalue = value; - } else { - goto error; - } - - if (endvalue >= MAX_CPUMASK_BITS) { - endvalue = MAX_CPUMASK_BITS - 1; - fprintf(stderr, - "qemu: NUMA: A max of %d VCPUs are supported\n", - MAX_CPUMASK_BITS); - } - - if (endvalue < value) { - goto error; - } - - bitmap_set(node_cpumask[nodenr], value, endvalue-value+1); - return; - -error: - fprintf(stderr, "qemu: Invalid NUMA CPU range: %s\n", cpus); - exit(1); -} - -static void numa_add(const char *optarg) -{ - char option[128]; - char *endptr; - unsigned long long nodenr; - - optarg = get_opt_name(option, 128, optarg, ','); - if (*optarg == ',') { - optarg++; - } - if (!strcmp(option, "node")) { - - if (nb_numa_nodes >= MAX_NODES) { - fprintf(stderr, "qemu: too many NUMA nodes\n"); - exit(1); - } - - if (get_param_value(option, 128, "nodeid", optarg) == 0) { - nodenr = nb_numa_nodes; - } else { - if (parse_uint_full(option, &nodenr, 10) < 0) { - fprintf(stderr, "qemu: Invalid NUMA nodeid: %s\n", option); - exit(1); - } - } - - if (nodenr >= MAX_NODES) { - fprintf(stderr, "qemu: invalid NUMA nodeid: %llu\n", nodenr); - exit(1); - } - - if (get_param_value(option, 128, "mem", optarg) == 0) { - node_mem[nodenr] = 0; - } else { - int64_t sval; - sval = strtosz(option, &endptr); - if (sval < 0 || *endptr) { - fprintf(stderr, "qemu: invalid numa mem size: %s\n", optarg); - exit(1); - } - node_mem[nodenr] = sval; - } - if (get_param_value(option, 128, "cpus", optarg) != 0) { - numa_node_parse_cpus(nodenr, option); - } - nb_numa_nodes++; - } else { - fprintf(stderr, "Invalid -numa option: %s\n", option); - exit(1); - } -} - static QemuOptsList qemu_smp_opts = { .name = "smp-opts", .implied_opt_name = "cpus", @@ -2911,43 +2822,51 @@ static int object_set_property(const char *name, const char *value, void *opaque static int object_create(QemuOpts *opts, void *opaque) { - const char *type = qemu_opt_get(opts, "qom-type"); - const char *id = qemu_opts_id(opts); - Error *local_err = NULL; - Object *obj; + Error *err = NULL; + char *type = NULL; + char *id = NULL; + void *dummy = NULL; + OptsVisitor *ov; + QDict *pdict; - g_assert(type != NULL); + ov = opts_visitor_new(opts); + pdict = qemu_opts_to_qdict(opts, NULL); - if (id == NULL) { - qerror_report(QERR_MISSING_PARAMETER, "id"); - return -1; + visit_start_struct(opts_get_visitor(ov), &dummy, NULL, NULL, 0, &err); + if (err) { + goto out; } - obj = object_new(type); - if (qemu_opt_foreach(opts, object_set_property, obj, 1) < 0) { - object_unref(obj); - return -1; + qdict_del(pdict, "qom-type"); + visit_type_str(opts_get_visitor(ov), &type, "qom-type", &err); + if (err) { + goto out; } - if (!object_dynamic_cast(obj, TYPE_USER_CREATABLE)) { - error_setg(&local_err, "object '%s' isn't supported by -object", - id); + qdict_del(pdict, "id"); + visit_type_str(opts_get_visitor(ov), &id, "id", &err); + if (err) { goto out; } - user_creatable_complete(obj, &local_err); - if (local_err) { + object_add(type, id, pdict, opts_get_visitor(ov), &err); + if (err) { goto out; } - - object_property_add_child(container_get(object_get_root(), "/objects"), - id, obj, &local_err); + visit_end_struct(opts_get_visitor(ov), &err); + if (err) { + qmp_object_del(id, NULL); + } out: - object_unref(obj); - if (local_err) { - qerror_report_err(local_err); - error_free(local_err); + opts_visitor_cleanup(ov); + + QDECREF(pdict); + g_free(id); + g_free(type); + g_free(dummy); + if (err) { + qerror_report_err(err); return -1; } return 0; @@ -2991,6 +2910,8 @@ int main(int argc, char **argv, char **envp) const char *trace_file = NULL; const ram_addr_t default_ram_size = (ram_addr_t)DEFAULT_RAM_SIZE * 1024 * 1024; + ram_addr_t maxram_size = default_ram_size; + uint64_t ram_slots = 0; atexit(qemu_run_exit_notifiers); error_set_progname(argv[0]); @@ -3024,6 +2945,7 @@ int main(int argc, char **argv, char **envp) qemu_add_opts(&qemu_realtime_opts); qemu_add_opts(&qemu_msg_opts); qemu_add_opts(&qemu_name_opts); + qemu_add_opts(&qemu_numa_opts); runstate_init(); @@ -3044,8 +2966,8 @@ int main(int argc, char **argv, char **envp) translation = BIOS_ATA_TRANSLATION_AUTO; for (i = 0; i < MAX_NODES; i++) { - node_mem[i] = 0; - node_cpumask[i] = bitmap_new(MAX_CPUMASK_BITS); + numa_info[i].node_mem = 0; + bitmap_zero(numa_info[i].node_cpu, MAX_CPUMASK_BITS); } nb_numa_nodes = 0; @@ -3219,7 +3141,10 @@ int main(int argc, char **argv, char **envp) } break; case QEMU_OPTION_numa: - numa_add(optarg); + opts = qemu_opts_parse(qemu_find_opts("numa"), optarg, 1); + if (!opts) { + exit(1); + } break; case QEMU_OPTION_display: display_type = select_display(optarg); @@ -3326,6 +3251,7 @@ int main(int argc, char **argv, char **envp) case QEMU_OPTION_m: { uint64_t sz; const char *mem_str; + const char *maxmem_str, *slots_str; opts = qemu_opts_parse(qemu_find_opts("memory"), optarg, 1); @@ -3367,6 +3293,44 @@ int main(int argc, char **argv, char **envp) error_report("ram size too large"); exit(EXIT_FAILURE); } + + maxmem_str = qemu_opt_get(opts, "maxmem"); + slots_str = qemu_opt_get(opts, "slots"); + if (maxmem_str && slots_str) { + uint64_t slots; + + sz = qemu_opt_get_size(opts, "maxmem", 0); + if (sz < ram_size) { + fprintf(stderr, "qemu: invalid -m option value: maxmem " + "(%" PRIu64 ") <= initial memory (" + RAM_ADDR_FMT ")\n", sz, ram_size); + exit(EXIT_FAILURE); + } + + slots = qemu_opt_get_number(opts, "slots", 0); + if ((sz > ram_size) && !slots) { + fprintf(stderr, "qemu: invalid -m option value: maxmem " + "(%" PRIu64 ") more than initial memory (" + RAM_ADDR_FMT ") but no hotplug slots where " + "specified\n", sz, ram_size); + exit(EXIT_FAILURE); + } + + if ((sz <= ram_size) && slots) { + fprintf(stderr, "qemu: invalid -m option value: %" + PRIu64 " hotplug slots where specified but " + "maxmem (%" PRIu64 ") <= initial memory (" + RAM_ADDR_FMT ")\n", slots, sz, ram_size); + exit(EXIT_FAILURE); + } + maxram_size = sz; + ram_slots = slots; + } else if ((!maxmem_str && slots_str) || + (maxmem_str && !slots_str)) { + fprintf(stderr, "qemu: invalid -m option value: missing " + "'%s' option\n", slots_str ? "maxmem" : "slots"); + exit(EXIT_FAILURE); + } break; } #ifdef CONFIG_TPM @@ -3964,6 +3928,8 @@ int main(int argc, char **argv, char **envp) } loc_set_none(); + os_daemonize(); + if (qemu_init_main_loop()) { fprintf(stderr, "qemu_init_main_loop failed\n"); exit(1); @@ -3993,6 +3959,8 @@ int main(int argc, char **argv, char **envp) exit(1); } + cpu_exec_init_all(); + current_machine = MACHINE(object_new(object_class_get_name( OBJECT_CLASS(machine_class)))); object_property_add_child(object_get_root(), "machine", @@ -4205,8 +4173,6 @@ int main(int argc, char **argv, char **envp) } #endif - os_daemonize(); - if (pid_file && qemu_create_pidfile(pid_file) != 0) { os_pidfile_error(); exit(1); @@ -4332,8 +4298,6 @@ int main(int argc, char **argv, char **envp) } } - cpu_exec_init_all(); - blk_mig_init(); ram_mig_init(); @@ -4350,49 +4314,13 @@ int main(int argc, char **argv, char **envp) default_drive(default_floppy, snapshot, IF_FLOPPY, 0, FD_OPTS); default_drive(default_sdcard, snapshot, IF_SD, 0, SD_OPTS); - if (nb_numa_nodes > 0) { - int i; - - if (nb_numa_nodes > MAX_NODES) { - nb_numa_nodes = MAX_NODES; - } - - /* If no memory size if given for any node, assume the default case - * and distribute the available memory equally across all nodes - */ - for (i = 0; i < nb_numa_nodes; i++) { - if (node_mem[i] != 0) - break; - } - if (i == nb_numa_nodes) { - uint64_t usedmem = 0; - - /* On Linux, the each node's border has to be 8MB aligned, - * the final node gets the rest. - */ - for (i = 0; i < nb_numa_nodes - 1; i++) { - node_mem[i] = (ram_size / nb_numa_nodes) & ~((1 << 23UL) - 1); - usedmem += node_mem[i]; - } - node_mem[i] = ram_size - usedmem; - } - - for (i = 0; i < nb_numa_nodes; i++) { - if (!bitmap_empty(node_cpumask[i], MAX_CPUMASK_BITS)) { - break; - } - } - /* assigning the VCPUs round-robin is easier to implement, guest OSes - * must cope with this anyway, because there are BIOSes out there in - * real machines which also use this scheme. - */ - if (i == nb_numa_nodes) { - for (i = 0; i < max_cpus; i++) { - set_bit(i, node_cpumask[i % nb_numa_nodes]); - } - } + if (qemu_opts_foreach(qemu_find_opts("numa"), numa_init_func, + NULL, 1) != 0) { + exit(1); } + set_numa_nodes(); + if (qemu_opts_foreach(qemu_find_opts("mon"), mon_init_func, NULL, 1) != 0) { exit(1); } @@ -4435,6 +4363,8 @@ int main(int argc, char **argv, char **envp) qdev_machine_init(); current_machine->ram_size = ram_size; + current_machine->maxram_size = maxram_size; + current_machine->ram_slots = ram_slots; current_machine->boot_order = boot_order; current_machine->cpu_model = cpu_model; |