aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorPeter Maydell <peter.maydell@linaro.org>2021-06-17 15:43:26 +0100
committerPeter Maydell <peter.maydell@linaro.org>2021-06-17 15:43:26 +0100
commitb6d73e9cb1c620960ca7d864ee0725f8a55fe778 (patch)
treebfea4ab0f516d9795a165b29640dc23b8c12b1b8
parent18e53dff939898c6dd00d206a3c2f5cd3d6669db (diff)
parentf51f8e3591393f7f274e1435ac22188e2dafdfe8 (diff)
Merge remote-tracking branch 'remotes/bonzini-gitlab/tags/for-upstream' into staging
* avoid deprecation warnings for SASL on macOS 10.11 or newer * fix -readconfig when config blocks have an id (like [chardev "qmp"]) * Error* initialization fixes * Improvements to ESP emulation (Mark) * Allow creating noreserve memory backends (David) * Improvements to query-memdev (David) * Bump compiler to C11 (Richard) * First round of SVM fixes from GSoC project (Lara) # gpg: Signature made Wed 16 Jun 2021 16:37:49 BST # gpg: using RSA key F13338574B662389866C7682BFFBD25F78C7AE83 # gpg: issuer "pbonzini@redhat.com" # gpg: Good signature from "Paolo Bonzini <bonzini@gnu.org>" [full] # gpg: aka "Paolo Bonzini <pbonzini@redhat.com>" [full] # Primary key fingerprint: 46F5 9FBD 57D6 12E7 BFD4 E2F7 7E15 100C CD36 69B1 # Subkey fingerprint: F133 3857 4B66 2389 866C 7682 BFFB D25F 78C7 AE83 * remotes/bonzini-gitlab/tags/for-upstream: (45 commits) configure: Remove probe for _Static_assert qemu/compiler: Remove QEMU_GENERIC include/qemu/lockable: Use _Generic instead of QEMU_GENERIC util: Use unique type for QemuRecMutex in thread-posix.h util: Pass file+line to qemu_rec_mutex_unlock_impl util: Use real functions for thread-posix QemuRecMutex softfloat: Use _Generic instead of QEMU_GENERIC configure: Use -std=gnu11 target/i386: Added Intercept CR0 writes check target/i386: Added consistency checks for CR0 target/i386: Added consistency checks for VMRUN intercept and ASID target/i386: Refactored intercept checks into cpu_svm_has_intercept configure: map x32 to cpu_family x86_64 for meson hmp: Print "reserve" property of memory backends with "info memdev" qmp: Include "reserve" property of memory backends hmp: Print "share" property of memory backends with "info memdev" qmp: Include "share" property of memory backends qmp: Clarify memory backend properties returned via query-memdev hostmem: Wire up RAM_NORESERVE via "reserve" property util/mmap-alloc: Support RAM_NORESERVE via MAP_NORESERVE under Linux ... Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
-rw-r--r--backends/hostmem-file.c11
-rw-r--r--backends/hostmem-memfd.c8
-rw-r--r--backends/hostmem-ram.c7
-rw-r--r--backends/hostmem.c36
-rwxr-xr-xconfigure2
-rw-r--r--hw/core/machine-hmp-cmds.c6
-rw-r--r--hw/core/machine-qmp-cmds.c8
-rw-r--r--hw/m68k/next-cube.c4
-rw-r--r--hw/misc/ivshmem.c5
-rw-r--r--hw/scsi/esp.c199
-rw-r--r--hw/scsi/scsi-generic.c9
-rw-r--r--hw/scsi/trace-events3
-rw-r--r--include/exec/cpu-common.h1
-rw-r--r--include/exec/memory.h42
-rw-r--r--include/exec/ram_addr.h9
-rw-r--r--include/hw/scsi/esp.h1
-rw-r--r--include/qemu/mmap-alloc.h16
-rw-r--r--include/qemu/osdep.h34
-rw-r--r--include/sysemu/hostmem.h2
-rw-r--r--migration/ram.c3
-rw-r--r--qapi/machine.json16
-rw-r--r--qapi/qom.json10
-rw-r--r--scripts/coccinelle/memory-region-housekeeping.cocci8
-rw-r--r--softmmu/memory.c27
-rw-r--r--softmmu/physmem.c68
-rw-r--r--softmmu/runstate.c2
-rw-r--r--softmmu/vl.c2
-rw-r--r--target/i386/cpu.h6
-rw-r--r--target/i386/svm.h2
-rw-r--r--target/i386/tcg/sysemu/misc_helper.c9
-rw-r--r--target/i386/tcg/sysemu/svm_helper.c127
-rw-r--r--ui/vnc-auth-sasl.c20
-rw-r--r--ui/vnc-auth-sasl.h1
-rw-r--r--ui/vnc.c10
-rw-r--r--util/mmap-alloc.c212
-rw-r--r--util/oslib-posix.c7
-rw-r--r--util/oslib-win32.c13
-rw-r--r--util/qemu-config.c17
38 files changed, 645 insertions, 318 deletions
diff --git a/backends/hostmem-file.c b/backends/hostmem-file.c
index 9b1b9f0a56..cd038024fa 100644
--- a/backends/hostmem-file.c
+++ b/backends/hostmem-file.c
@@ -39,6 +39,7 @@ file_backend_memory_alloc(HostMemoryBackend *backend, Error **errp)
object_get_typename(OBJECT(backend)));
#else
HostMemoryBackendFile *fb = MEMORY_BACKEND_FILE(backend);
+ uint32_t ram_flags;
gchar *name;
if (!backend->size) {
@@ -51,11 +52,11 @@ file_backend_memory_alloc(HostMemoryBackend *backend, Error **errp)
}
name = host_memory_backend_get_name(backend);
- memory_region_init_ram_from_file(&backend->mr, OBJECT(backend),
- name,
- backend->size, fb->align,
- (backend->share ? RAM_SHARED : 0) |
- (fb->is_pmem ? RAM_PMEM : 0),
+ ram_flags = backend->share ? RAM_SHARED : 0;
+ ram_flags |= backend->reserve ? 0 : RAM_NORESERVE;
+ ram_flags |= fb->is_pmem ? RAM_PMEM : 0;
+ memory_region_init_ram_from_file(&backend->mr, OBJECT(backend), name,
+ backend->size, fb->align, ram_flags,
fb->mem_path, fb->readonly, errp);
g_free(name);
#endif
diff --git a/backends/hostmem-memfd.c b/backends/hostmem-memfd.c
index da75e27057..3fc85c3db8 100644
--- a/backends/hostmem-memfd.c
+++ b/backends/hostmem-memfd.c
@@ -35,6 +35,7 @@ static void
memfd_backend_memory_alloc(HostMemoryBackend *backend, Error **errp)
{
HostMemoryBackendMemfd *m = MEMORY_BACKEND_MEMFD(backend);
+ uint32_t ram_flags;
char *name;
int fd;
@@ -52,9 +53,10 @@ memfd_backend_memory_alloc(HostMemoryBackend *backend, Error **errp)
}
name = host_memory_backend_get_name(backend);
- memory_region_init_ram_from_fd(&backend->mr, OBJECT(backend),
- name, backend->size,
- backend->share, fd, 0, errp);
+ ram_flags = backend->share ? RAM_SHARED : 0;
+ ram_flags |= backend->reserve ? 0 : RAM_NORESERVE;
+ memory_region_init_ram_from_fd(&backend->mr, OBJECT(backend), name,
+ backend->size, ram_flags, fd, 0, errp);
g_free(name);
}
diff --git a/backends/hostmem-ram.c b/backends/hostmem-ram.c
index 5cc53e76c9..b8e55cdbd0 100644
--- a/backends/hostmem-ram.c
+++ b/backends/hostmem-ram.c
@@ -19,6 +19,7 @@
static void
ram_backend_memory_alloc(HostMemoryBackend *backend, Error **errp)
{
+ uint32_t ram_flags;
char *name;
if (!backend->size) {
@@ -27,8 +28,10 @@ ram_backend_memory_alloc(HostMemoryBackend *backend, Error **errp)
}
name = host_memory_backend_get_name(backend);
- memory_region_init_ram_shared_nomigrate(&backend->mr, OBJECT(backend), name,
- backend->size, backend->share, errp);
+ ram_flags = backend->share ? RAM_SHARED : 0;
+ ram_flags |= backend->reserve ? 0 : RAM_NORESERVE;
+ memory_region_init_ram_flags_nomigrate(&backend->mr, OBJECT(backend), name,
+ backend->size, ram_flags, errp);
g_free(name);
}
diff --git a/backends/hostmem.c b/backends/hostmem.c
index aab3de8408..4c05862ed5 100644
--- a/backends/hostmem.c
+++ b/backends/hostmem.c
@@ -216,6 +216,11 @@ static void host_memory_backend_set_prealloc(Object *obj, bool value,
Error *local_err = NULL;
HostMemoryBackend *backend = MEMORY_BACKEND(obj);
+ if (!backend->reserve && value) {
+ error_setg(errp, "'prealloc=on' and 'reserve=off' are incompatible");
+ return;
+ }
+
if (!host_memory_backend_mr_inited(backend)) {
backend->prealloc = value;
return;
@@ -267,6 +272,7 @@ static void host_memory_backend_init(Object *obj)
/* TODO: convert access to globals to compat properties */
backend->merge = machine_mem_merge(machine);
backend->dump = machine_dump_guest_core(machine);
+ backend->reserve = true;
backend->prealloc_threads = 1;
}
@@ -425,6 +431,30 @@ static void host_memory_backend_set_share(Object *o, bool value, Error **errp)
backend->share = value;
}
+#ifdef CONFIG_LINUX
+static bool host_memory_backend_get_reserve(Object *o, Error **errp)
+{
+ HostMemoryBackend *backend = MEMORY_BACKEND(o);
+
+ return backend->reserve;
+}
+
+static void host_memory_backend_set_reserve(Object *o, bool value, Error **errp)
+{
+ HostMemoryBackend *backend = MEMORY_BACKEND(o);
+
+ if (host_memory_backend_mr_inited(backend)) {
+ error_setg(errp, "cannot change property value");
+ return;
+ }
+ if (backend->prealloc && !value) {
+ error_setg(errp, "'prealloc=on' and 'reserve=off' are incompatible");
+ return;
+ }
+ backend->reserve = value;
+}
+#endif /* CONFIG_LINUX */
+
static bool
host_memory_backend_get_use_canonical_path(Object *obj, Error **errp)
{
@@ -493,6 +523,12 @@ host_memory_backend_class_init(ObjectClass *oc, void *data)
host_memory_backend_get_share, host_memory_backend_set_share);
object_class_property_set_description(oc, "share",
"Mark the memory as private to QEMU or shared");
+#ifdef CONFIG_LINUX
+ object_class_property_add_bool(oc, "reserve",
+ host_memory_backend_get_reserve, host_memory_backend_set_reserve);
+ object_class_property_set_description(oc, "reserve",
+ "Reserve swap space (or huge pages) if applicable");
+#endif /* CONFIG_LINUX */
/*
* Do not delete/rename option. This option must be considered stable
* (as if it didn't have the 'x-' prefix including deprecation period) as
diff --git a/configure b/configure
index debd50c085..262ab71802 100755
--- a/configure
+++ b/configure
@@ -6366,7 +6366,7 @@ if test "$skip_meson" = no; then
i386)
echo "cpu_family = 'x86'" >> $cross
;;
- x86_64)
+ x86_64|x32)
echo "cpu_family = 'x86_64'" >> $cross
;;
ppc64le)
diff --git a/hw/core/machine-hmp-cmds.c b/hw/core/machine-hmp-cmds.c
index 58248cffa3..76b22b00d6 100644
--- a/hw/core/machine-hmp-cmds.c
+++ b/hw/core/machine-hmp-cmds.c
@@ -110,6 +110,12 @@ void hmp_info_memdev(Monitor *mon, const QDict *qdict)
m->value->dump ? "true" : "false");
monitor_printf(mon, " prealloc: %s\n",
m->value->prealloc ? "true" : "false");
+ monitor_printf(mon, " share: %s\n",
+ m->value->share ? "true" : "false");
+ if (m->value->has_reserve) {
+ monitor_printf(mon, " reserve: %s\n",
+ m->value->reserve ? "true" : "false");
+ }
monitor_printf(mon, " policy: %s\n",
HostMemPolicy_str(m->value->policy));
visit_complete(v, &str);
diff --git a/hw/core/machine-qmp-cmds.c b/hw/core/machine-qmp-cmds.c
index a36c96608f..216fdfaf3a 100644
--- a/hw/core/machine-qmp-cmds.c
+++ b/hw/core/machine-qmp-cmds.c
@@ -157,6 +157,7 @@ void qmp_set_numa_node(NumaOptions *cmd, Error **errp)
static int query_memdev(Object *obj, void *opaque)
{
+ Error *err = NULL;
MemdevList **list = opaque;
Memdev *m;
QObject *host_nodes;
@@ -172,6 +173,13 @@ static int query_memdev(Object *obj, void *opaque)
m->merge = object_property_get_bool(obj, "merge", &error_abort);
m->dump = object_property_get_bool(obj, "dump", &error_abort);
m->prealloc = object_property_get_bool(obj, "prealloc", &error_abort);
+ m->share = object_property_get_bool(obj, "share", &error_abort);
+ m->reserve = object_property_get_bool(obj, "reserve", &err);
+ if (err) {
+ error_free_or_abort(&err);
+ } else {
+ m->has_reserve = true;
+ }
m->policy = object_property_get_enum(obj, "policy", "HostMemPolicy",
&error_abort);
host_nodes = object_property_get_qobject(obj,
diff --git a/hw/m68k/next-cube.c b/hw/m68k/next-cube.c
index de951ffe5d..e0d4a94f9d 100644
--- a/hw/m68k/next-cube.c
+++ b/hw/m68k/next-cube.c
@@ -984,8 +984,8 @@ static void next_cube_init(MachineState *machine)
sysbus_mmio_map(SYS_BUS_DEVICE(pcdev), 1, 0x02100000);
/* BMAP memory */
- memory_region_init_ram_shared_nomigrate(bmapm1, NULL, "next.bmapmem", 64,
- true, &error_fatal);
+ memory_region_init_ram_flags_nomigrate(bmapm1, NULL, "next.bmapmem", 64,
+ RAM_SHARED, &error_fatal);
memory_region_add_subregion(sysmem, 0x020c0000, bmapm1);
/* The Rev_2.5_v66.bin firmware accesses it at 0x820c0020, too */
memory_region_init_alias(bmapm2, NULL, "next.bmapmem2", bmapm1, 0x0, 64);
diff --git a/hw/misc/ivshmem.c b/hw/misc/ivshmem.c
index a1fa4878be..1ba4a98377 100644
--- a/hw/misc/ivshmem.c
+++ b/hw/misc/ivshmem.c
@@ -493,9 +493,8 @@ static void process_msg_shmem(IVShmemState *s, int fd, Error **errp)
size = buf.st_size;
/* mmap the region and map into the BAR2 */
- memory_region_init_ram_from_fd(&s->server_bar2, OBJECT(s),
- "ivshmem.bar2", size, true, fd, 0,
- &local_err);
+ memory_region_init_ram_from_fd(&s->server_bar2, OBJECT(s), "ivshmem.bar2",
+ size, RAM_SHARED, fd, 0, &local_err);
if (local_err) {
error_propagate(errp, local_err);
return;
diff --git a/hw/scsi/esp.c b/hw/scsi/esp.c
index b668acef82..4ac2114788 100644
--- a/hw/scsi/esp.c
+++ b/hw/scsi/esp.c
@@ -213,7 +213,7 @@ static int esp_select(ESPState *s)
if (!s->current_dev) {
/* No such drive */
s->rregs[ESP_RSTAT] = 0;
- s->rregs[ESP_RINTR] |= INTR_DC;
+ s->rregs[ESP_RINTR] = INTR_DC;
s->rregs[ESP_RSEQ] = SEQ_0;
esp_raise_irq(s);
return -1;
@@ -221,7 +221,7 @@ static int esp_select(ESPState *s)
/*
* Note that we deliberately don't raise the IRQ here: this will be done
- * either in do_busid_cmd() for DATA OUT transfers or by the deferred
+ * either in do_command_phase() for DATA OUT transfers or by the deferred
* IRQ mechanism in esp_transfer_data() for DATA IN transfers
*/
s->rregs[ESP_RINTR] |= INTR_FC;
@@ -260,9 +260,6 @@ static uint32_t get_cmd(ESPState *s, uint32_t maxlen)
return 0;
}
n = esp_fifo_pop_buf(&s->fifo, buf, dmalen);
- if (n >= 3) {
- buf[0] = buf[2] >> 5;
- }
n = MIN(fifo8_num_free(&s->cmdfifo), n);
fifo8_push_all(&s->cmdfifo, buf, n);
}
@@ -275,24 +272,22 @@ static uint32_t get_cmd(ESPState *s, uint32_t maxlen)
return dmalen;
}
-static void do_busid_cmd(ESPState *s, uint8_t busid)
+static void do_command_phase(ESPState *s)
{
uint32_t cmdlen;
int32_t datalen;
- int lun;
SCSIDevice *current_lun;
uint8_t buf[ESP_CMDFIFO_SZ];
- trace_esp_do_busid_cmd(busid);
- lun = busid & 7;
+ trace_esp_do_command_phase(s->lun);
cmdlen = fifo8_num_used(&s->cmdfifo);
if (!cmdlen || !s->current_dev) {
return;
}
esp_fifo_pop_buf(&s->cmdfifo, buf, cmdlen);
- current_lun = scsi_device_find(&s->bus, 0, s->current_dev->id, lun);
- s->current_req = scsi_req_new(current_lun, 0, lun, buf, s);
+ current_lun = scsi_device_find(&s->bus, 0, s->current_dev->id, s->lun);
+ s->current_req = scsi_req_new(current_lun, 0, s->lun, buf, s);
datalen = scsi_req_enqueue(s->current_req);
s->ti_size = datalen;
fifo8_reset(&s->cmdfifo);
@@ -319,28 +314,36 @@ static void do_busid_cmd(ESPState *s, uint8_t busid)
}
}
-static void do_cmd(ESPState *s)
+static void do_message_phase(ESPState *s)
{
- uint8_t busid = esp_fifo_pop(&s->cmdfifo);
- int len;
+ if (s->cmdfifo_cdb_offset) {
+ uint8_t message = esp_fifo_pop(&s->cmdfifo);
- s->cmdfifo_cdb_offset--;
+ trace_esp_do_identify(message);
+ s->lun = message & 7;
+ s->cmdfifo_cdb_offset--;
+ }
/* Ignore extended messages for now */
if (s->cmdfifo_cdb_offset) {
- len = MIN(s->cmdfifo_cdb_offset, fifo8_num_used(&s->cmdfifo));
+ int len = MIN(s->cmdfifo_cdb_offset, fifo8_num_used(&s->cmdfifo));
esp_fifo_pop_buf(&s->cmdfifo, NULL, len);
s->cmdfifo_cdb_offset = 0;
}
+}
- do_busid_cmd(s, busid);
+static void do_cmd(ESPState *s)
+{
+ do_message_phase(s);
+ assert(s->cmdfifo_cdb_offset == 0);
+ do_command_phase(s);
}
static void satn_pdma_cb(ESPState *s)
{
- s->do_cmd = 0;
- if (!fifo8_is_empty(&s->cmdfifo)) {
+ if (!esp_get_tc(s) && !fifo8_is_empty(&s->cmdfifo)) {
s->cmdfifo_cdb_offset = 1;
+ s->do_cmd = 0;
do_cmd(s);
}
}
@@ -369,13 +372,10 @@ static void handle_satn(ESPState *s)
static void s_without_satn_pdma_cb(ESPState *s)
{
- uint32_t len;
-
- s->do_cmd = 0;
- len = fifo8_num_used(&s->cmdfifo);
- if (len) {
+ if (!esp_get_tc(s) && !fifo8_is_empty(&s->cmdfifo)) {
s->cmdfifo_cdb_offset = 0;
- do_busid_cmd(s, 0);
+ s->do_cmd = 0;
+ do_cmd(s);
}
}
@@ -392,7 +392,7 @@ static void handle_s_without_atn(ESPState *s)
if (cmdlen > 0) {
s->cmdfifo_cdb_offset = 0;
s->do_cmd = 0;
- do_busid_cmd(s, 0);
+ do_cmd(s);
} else if (cmdlen == 0) {
s->do_cmd = 1;
/* Target present, but no cmd yet - switch to command phase */
@@ -403,8 +403,7 @@ static void handle_s_without_atn(ESPState *s)
static void satn_stop_pdma_cb(ESPState *s)
{
- s->do_cmd = 0;
- if (!fifo8_is_empty(&s->cmdfifo)) {
+ if (!esp_get_tc(s) && !fifo8_is_empty(&s->cmdfifo)) {
trace_esp_handle_satn_stop(fifo8_num_used(&s->cmdfifo));
s->do_cmd = 1;
s->cmdfifo_cdb_offset = 1;
@@ -481,7 +480,6 @@ static void esp_dma_done(ESPState *s)
{
s->rregs[ESP_RSTAT] |= STAT_TC;
s->rregs[ESP_RINTR] |= INTR_BS;
- s->rregs[ESP_RSEQ] = 0;
s->rregs[ESP_RFLAGS] = 0;
esp_set_tc(s, 0);
esp_raise_irq(s);
@@ -494,10 +492,32 @@ static void do_dma_pdma_cb(ESPState *s)
uint32_t n;
if (s->do_cmd) {
+ /* Ensure we have received complete command after SATN and stop */
+ if (esp_get_tc(s) || fifo8_is_empty(&s->cmdfifo)) {
+ return;
+ }
+
s->ti_size = 0;
- s->do_cmd = 0;
- do_cmd(s);
- esp_lower_drq(s);
+ if ((s->rregs[ESP_RSTAT] & 7) == STAT_CD) {
+ /* No command received */
+ if (s->cmdfifo_cdb_offset == fifo8_num_used(&s->cmdfifo)) {
+ return;
+ }
+
+ /* Command has been received */
+ s->do_cmd = 0;
+ do_cmd(s);
+ } else {
+ /*
+ * Extra message out bytes received: update cmdfifo_cdb_offset
+ * and then switch to commmand phase
+ */
+ s->cmdfifo_cdb_offset = fifo8_num_used(&s->cmdfifo);
+ s->rregs[ESP_RSTAT] = STAT_TC | STAT_CD;
+ s->rregs[ESP_RSEQ] = SEQ_CD;
+ s->rregs[ESP_RINTR] |= INTR_BS;
+ esp_raise_irq(s);
+ }
return;
}
@@ -740,20 +760,17 @@ static void esp_do_nodma(ESPState *s)
s->async_len -= len;
s->ti_size += len;
} else {
- len = MIN(s->ti_size, s->async_len);
- len = MIN(len, fifo8_num_free(&s->fifo));
- fifo8_push_all(&s->fifo, s->async_buf, len);
- s->async_buf += len;
- s->async_len -= len;
- s->ti_size -= len;
+ if (fifo8_is_empty(&s->fifo)) {
+ fifo8_push(&s->fifo, s->async_buf[0]);
+ s->async_buf++;
+ s->async_len--;
+ s->ti_size--;
+ }
}
if (s->async_len == 0) {
scsi_req_continue(s->current_req);
-
- if (to_device || s->ti_size == 0) {
- return;
- }
+ return;
}
s->rregs[ESP_RINTR] |= INTR_BS;
@@ -763,20 +780,37 @@ static void esp_do_nodma(ESPState *s)
void esp_command_complete(SCSIRequest *req, size_t resid)
{
ESPState *s = req->hba_private;
+ int to_device = ((s->rregs[ESP_RSTAT] & 7) == STAT_DO);
trace_esp_command_complete();
- if (s->ti_size != 0) {
- trace_esp_command_complete_unexpected();
+
+ /*
+ * Non-DMA transfers from the target will leave the last byte in
+ * the FIFO so don't reset ti_size in this case
+ */
+ if (s->dma || to_device) {
+ if (s->ti_size != 0) {
+ trace_esp_command_complete_unexpected();
+ }
+ s->ti_size = 0;
}
- s->ti_size = 0;
+
s->async_len = 0;
if (req->status) {
trace_esp_command_complete_fail();
}
s->status = req->status;
- s->rregs[ESP_RSTAT] = STAT_ST;
- esp_dma_done(s);
- esp_lower_drq(s);
+
+ /*
+ * If the transfer is finished, switch to status phase. For non-DMA
+ * transfers from the target the last byte is still in the FIFO
+ */
+ if (s->ti_size == 0) {
+ s->rregs[ESP_RSTAT] = STAT_TC | STAT_ST;
+ esp_dma_done(s);
+ esp_lower_drq(s);
+ }
+
if (s->current_req) {
scsi_req_unref(s->current_req);
s->current_req = NULL;
@@ -804,16 +838,6 @@ void esp_transfer_data(SCSIRequest *req, uint32_t len)
s->rregs[ESP_RSTAT] |= STAT_TC;
s->rregs[ESP_RINTR] |= INTR_BS;
esp_raise_irq(s);
-
- /*
- * If data is ready to transfer and the TI command has already
- * been executed, start DMA immediately. Otherwise DMA will start
- * when host sends the TI command
- */
- if (s->ti_size && (s->rregs[ESP_CMD] == (CMD_TI | CMD_DMA))) {
- esp_do_dma(s);
- }
- return;
}
if (s->ti_cmd == 0) {
@@ -827,7 +851,7 @@ void esp_transfer_data(SCSIRequest *req, uint32_t len)
return;
}
- if (s->ti_cmd & CMD_DMA) {
+ if (s->ti_cmd == (CMD_TI | CMD_DMA)) {
if (dmalen) {
esp_do_dma(s);
} else if (s->ti_size <= 0) {
@@ -838,7 +862,7 @@ void esp_transfer_data(SCSIRequest *req, uint32_t len)
esp_dma_done(s);
esp_lower_drq(s);
}
- } else {
+ } else if (s->ti_cmd == CMD_TI) {
esp_do_nodma(s);
}
}
@@ -905,6 +929,17 @@ uint64_t esp_reg_read(ESPState *s, uint32_t saddr)
qemu_log_mask(LOG_UNIMP, "esp: PIO data read not implemented\n");
s->rregs[ESP_FIFO] = 0;
} else {
+ if ((s->rregs[ESP_RSTAT] & 0x7) == STAT_DI) {
+ if (s->ti_size) {
+ esp_do_nodma(s);
+ } else {
+ /*
+ * The last byte of a non-DMA transfer has been read out
+ * of the FIFO so switch to status phase
+ */
+ s->rregs[ESP_RSTAT] = STAT_TC | STAT_ST;
+ }
+ }
s->rregs[ESP_FIFO] = esp_fifo_pop(&s->fifo);
}
val = s->rregs[ESP_FIFO];
@@ -917,7 +952,15 @@ uint64_t esp_reg_read(ESPState *s, uint32_t saddr)
val = s->rregs[ESP_RINTR];
s->rregs[ESP_RINTR] = 0;
s->rregs[ESP_RSTAT] &= ~STAT_TC;
- s->rregs[ESP_RSEQ] = SEQ_0;
+ /*
+ * According to the datasheet ESP_RSEQ should be cleared, but as the
+ * emulation currently defers information transfers to the next TI
+ * command leave it for now so that pedantic guests such as the old
+ * Linux 2.6 driver see the correct flags before the next SCSI phase
+ * transition.
+ *
+ * s->rregs[ESP_RSEQ] = SEQ_0;
+ */
esp_lower_irq(s);
break;
case ESP_TCHI:
@@ -955,15 +998,18 @@ void esp_reg_write(ESPState *s, uint32_t saddr, uint64_t val)
case ESP_FIFO:
if (s->do_cmd) {
esp_fifo_push(&s->cmdfifo, val);
+
+ /*
+ * If any unexpected message out/command phase data is
+ * transferred using non-DMA, raise the interrupt
+ */
+ if (s->rregs[ESP_CMD] == CMD_TI) {
+ s->rregs[ESP_RINTR] |= INTR_BS;
+ esp_raise_irq(s);
+ }
} else {
esp_fifo_push(&s->fifo, val);
}
-
- /* Non-DMA transfers raise an interrupt after every byte */
- if (s->rregs[ESP_CMD] == CMD_TI) {
- s->rregs[ESP_RINTR] |= INTR_FC | INTR_BS;
- esp_raise_irq(s);
- }
break;
case ESP_CMD:
s->rregs[saddr] = val;
@@ -1088,7 +1134,15 @@ static bool esp_is_version_5(void *opaque, int version_id)
ESPState *s = ESP(opaque);
version_id = MIN(version_id, s->mig_version_id);
- return version_id == 5;
+ return version_id >= 5;
+}
+
+static bool esp_is_version_6(void *opaque, int version_id)
+{
+ ESPState *s = ESP(opaque);
+
+ version_id = MIN(version_id, s->mig_version_id);
+ return version_id >= 6;
}
int esp_pre_save(void *opaque)
@@ -1128,7 +1182,7 @@ static int esp_post_load(void *opaque, int version_id)
const VMStateDescription vmstate_esp = {
.name = "esp",
- .version_id = 5,
+ .version_id = 6,
.minimum_version_id = 3,
.post_load = esp_post_load,
.fields = (VMStateField[]) {
@@ -1157,6 +1211,7 @@ const VMStateDescription vmstate_esp = {
VMSTATE_FIFO8_TEST(fifo, ESPState, esp_is_version_5),
VMSTATE_FIFO8_TEST(cmdfifo, ESPState, esp_is_version_5),
VMSTATE_UINT8_TEST(ti_cmd, ESPState, esp_is_version_5),
+ VMSTATE_UINT8_TEST(lun, ESPState, esp_is_version_6),
VMSTATE_END_OF_LIST()
},
};
@@ -1195,7 +1250,6 @@ static void sysbus_esp_pdma_write(void *opaque, hwaddr addr,
{
SysBusESPState *sysbus = opaque;
ESPState *s = ESP(&sysbus->esp);
- uint32_t dmalen;
trace_esp_pdma_write(size);
@@ -1208,10 +1262,7 @@ static void sysbus_esp_pdma_write(void *opaque, hwaddr addr,
esp_pdma_write(s, val);
break;
}
- dmalen = esp_get_tc(s);
- if (dmalen == 0 || fifo8_num_free(&s->fifo) < 2) {
- s->pdma_cb(s);
- }
+ s->pdma_cb(s);
}
static uint64_t sysbus_esp_pdma_read(void *opaque, hwaddr addr,
diff --git a/hw/scsi/scsi-generic.c b/hw/scsi/scsi-generic.c
index 98c30c5d5c..40e039864f 100644
--- a/hw/scsi/scsi-generic.c
+++ b/hw/scsi/scsi-generic.c
@@ -147,7 +147,7 @@ static int execute_command(BlockBackend *blk,
return 0;
}
-static void scsi_handle_inquiry_reply(SCSIGenericReq *r, SCSIDevice *s)
+static int scsi_handle_inquiry_reply(SCSIGenericReq *r, SCSIDevice *s, int len)
{
uint8_t page, page_idx;
@@ -213,8 +213,13 @@ static void scsi_handle_inquiry_reply(SCSIGenericReq *r, SCSIDevice *s)
r->buf[page_idx] = 0xb0;
}
stw_be_p(r->buf + 2, lduw_be_p(r->buf + 2) + 1);
+
+ if (len < r->buflen) {
+ len++;
+ }
}
}
+ return len;
}
static int scsi_generic_emulate_block_limits(SCSIGenericReq *r, SCSIDevice *s)
@@ -332,7 +337,7 @@ static void scsi_read_complete(void * opaque, int ret)
}
}
if (r->req.cmd.buf[0] == INQUIRY) {
- scsi_handle_inquiry_reply(r, s);
+ len = scsi_handle_inquiry_reply(r, s, len);
}
req_complete:
diff --git a/hw/scsi/trace-events b/hw/scsi/trace-events
index 1a27e141ae..92d5b40f89 100644
--- a/hw/scsi/trace-events
+++ b/hw/scsi/trace-events
@@ -166,7 +166,8 @@ esp_dma_disable(void) "Lower enable"
esp_pdma_read(int size) "pDMA read %u bytes"
esp_pdma_write(int size) "pDMA write %u bytes"
esp_get_cmd(uint32_t dmalen, int target) "len %d target %d"
-esp_do_busid_cmd(uint8_t busid) "busid 0x%x"
+esp_do_command_phase(uint8_t busid) "busid 0x%x"
+esp_do_identify(uint8_t byte) "0x%x"
esp_handle_satn_stop(uint32_t cmdlen) "cmdlen %d"
esp_write_response(uint32_t status) "Transfer status (status=%d)"
esp_do_dma(uint32_t cmdlen, uint32_t len) "command len %d + %d"
diff --git a/include/exec/cpu-common.h b/include/exec/cpu-common.h
index ccabed4003..039d422bf4 100644
--- a/include/exec/cpu-common.h
+++ b/include/exec/cpu-common.h
@@ -59,6 +59,7 @@ ram_addr_t qemu_ram_get_offset(RAMBlock *rb);
ram_addr_t qemu_ram_get_used_length(RAMBlock *rb);
ram_addr_t qemu_ram_get_max_length(RAMBlock *rb);
bool qemu_ram_is_shared(RAMBlock *rb);
+bool qemu_ram_is_noreserve(RAMBlock *rb);
bool qemu_ram_is_uf_zeroable(RAMBlock *rb);
void qemu_ram_set_uf_zeroable(RAMBlock *rb);
bool qemu_ram_is_migratable(RAMBlock *rb);
diff --git a/include/exec/memory.h b/include/exec/memory.h
index b114f5454b..b116f7c64e 100644
--- a/include/exec/memory.h
+++ b/include/exec/memory.h
@@ -155,6 +155,13 @@ typedef struct IOMMUTLBEvent {
*/
#define RAM_UF_WRITEPROTECT (1 << 6)
+/*
+ * RAM is mmap-ed with MAP_NORESERVE. When set, reserving swap space (or huge
+ * pages if applicable) is skipped: will bail out if not supported. When not
+ * set, the OS will do the reservation, if supported for the memory type.
+ */
+#define RAM_NORESERVE (1 << 7)
+
static inline void iommu_notifier_init(IOMMUNotifier *n, IOMMUNotify fn,
IOMMUNotifierFlag flags,
hwaddr start, hwaddr end,
@@ -940,27 +947,27 @@ void memory_region_init_ram_nomigrate(MemoryRegion *mr,
Error **errp);
/**
- * memory_region_init_ram_shared_nomigrate: Initialize RAM memory region.
- * Accesses into the region will
- * modify memory directly.
+ * memory_region_init_ram_flags_nomigrate: Initialize RAM memory region.
+ * Accesses into the region will
+ * modify memory directly.
*
* @mr: the #MemoryRegion to be initialized.
* @owner: the object that tracks the region's reference count
* @name: Region name, becomes part of RAMBlock name used in migration stream
* must be unique within any device
* @size: size of the region.
- * @share: allow remapping RAM to different addresses
+ * @ram_flags: RamBlock flags. Supported flags: RAM_SHARED, RAM_NORESERVE.
* @errp: pointer to Error*, to store an error if it happens.
*
- * Note that this function is similar to memory_region_init_ram_nomigrate.
- * The only difference is part of the RAM region can be remapped.
+ * Note that this function does not do anything to cause the data in the
+ * RAM memory region to be migrated; that is the responsibility of the caller.
*/
-void memory_region_init_ram_shared_nomigrate(MemoryRegion *mr,
- Object *owner,
- const char *name,
- uint64_t size,
- bool share,
- Error **errp);
+void memory_region_init_ram_flags_nomigrate(MemoryRegion *mr,
+ Object *owner,
+ const char *name,
+ uint64_t size,
+ uint32_t ram_flags,
+ Error **errp);
/**
* memory_region_init_resizeable_ram: Initialize memory region with resizeable
@@ -1005,10 +1012,8 @@ void memory_region_init_resizeable_ram(MemoryRegion *mr,
* @size: size of the region.
* @align: alignment of the region base address; if 0, the default alignment
* (getpagesize()) will be used.
- * @ram_flags: Memory region features:
- * - RAM_SHARED: memory must be mmaped with the MAP_SHARED flag
- * - RAM_PMEM: the memory is persistent memory
- * Other bits are ignored now.
+ * @ram_flags: RamBlock flags. Supported flags: RAM_SHARED, RAM_PMEM,
+ * RAM_NORESERVE,
* @path: the path in which to allocate the RAM.
* @readonly: true to open @path for reading, false for read/write.
* @errp: pointer to Error*, to store an error if it happens.
@@ -1034,7 +1039,8 @@ void memory_region_init_ram_from_file(MemoryRegion *mr,
* @owner: the object that tracks the region's reference count
* @name: the name of the region.
* @size: size of the region.
- * @share: %true if memory must be mmaped with the MAP_SHARED flag
+ * @ram_flags: RamBlock flags. Supported flags: RAM_SHARED, RAM_PMEM,
+ * RAM_NORESERVE.
* @fd: the fd to mmap.
* @offset: offset within the file referenced by fd
* @errp: pointer to Error*, to store an error if it happens.
@@ -1046,7 +1052,7 @@ void memory_region_init_ram_from_fd(MemoryRegion *mr,
Object *owner,
const char *name,
uint64_t size,
- bool share,
+ uint32_t ram_flags,
int fd,
ram_addr_t offset,
Error **errp);
diff --git a/include/exec/ram_addr.h b/include/exec/ram_addr.h
index 3cb9791df3..551876bed0 100644
--- a/include/exec/ram_addr.h
+++ b/include/exec/ram_addr.h
@@ -104,11 +104,8 @@ long qemu_maxrampagesize(void);
* Parameters:
* @size: the size in bytes of the ram block
* @mr: the memory region where the ram block is
- * @ram_flags: specify the properties of the ram block, which can be one
- * or bit-or of following values
- * - RAM_SHARED: mmap the backing file or device with MAP_SHARED
- * - RAM_PMEM: the backend @mem_path or @fd is persistent memory
- * Other bits are ignored.
+ * @ram_flags: RamBlock flags. Supported flags: RAM_SHARED, RAM_PMEM,
+ * RAM_NORESERVE.
* @mem_path or @fd: specify the backing file or device
* @readonly: true to open @path for reading, false for read/write.
* @errp: pointer to Error*, to store an error if it happens
@@ -126,7 +123,7 @@ RAMBlock *qemu_ram_alloc_from_fd(ram_addr_t size, MemoryRegion *mr,
RAMBlock *qemu_ram_alloc_from_ptr(ram_addr_t size, void *host,
MemoryRegion *mr, Error **errp);
-RAMBlock *qemu_ram_alloc(ram_addr_t size, bool share, MemoryRegion *mr,
+RAMBlock *qemu_ram_alloc(ram_addr_t size, uint32_t ram_flags, MemoryRegion *mr,
Error **errp);
RAMBlock *qemu_ram_alloc_resizeable(ram_addr_t size, ram_addr_t max_size,
void (*resized)(const char*,
diff --git a/include/hw/scsi/esp.h b/include/hw/scsi/esp.h
index aada3680b7..b1ec27612f 100644
--- a/include/hw/scsi/esp.h
+++ b/include/hw/scsi/esp.h
@@ -37,6 +37,7 @@ struct ESPState {
SCSIRequest *current_req;
Fifo8 cmdfifo;
uint8_t cmdfifo_cdb_offset;
+ uint8_t lun;
uint32_t do_cmd;
bool data_in_ready;
diff --git a/include/qemu/mmap-alloc.h b/include/qemu/mmap-alloc.h
index 456ff87df1..90d0eee705 100644
--- a/include/qemu/mmap-alloc.h
+++ b/include/qemu/mmap-alloc.h
@@ -7,18 +7,22 @@ size_t qemu_fd_getpagesize(int fd);
size_t qemu_mempath_getpagesize(const char *mem_path);
/**
- * qemu_ram_mmap: mmap the specified file or device.
+ * qemu_ram_mmap: mmap anonymous memory, the specified file or device.
+ *
+ * mmap() abstraction to map guest RAM, simplifying flag handling, taking
+ * care of alignment requirements and installing guard pages.
*
* Parameters:
* @fd: the file or the device to mmap
* @size: the number of bytes to be mmaped
* @align: if not zero, specify the alignment of the starting mapping address;
* otherwise, the alignment in use will be determined by QEMU.
- * @readonly: true for a read-only mapping, false for read/write.
- * @shared: map has RAM_SHARED flag.
- * @is_pmem: map has RAM_PMEM flag.
+ * @qemu_map_flags: QEMU_MAP_* flags
* @map_offset: map starts at offset of map_offset from the start of fd
*
+ * Internally, MAP_PRIVATE, MAP_ANONYMOUS and MAP_SHARED_VALIDATE are set
+ * implicitly based on other parameters.
+ *
* Return:
* On success, return a pointer to the mapped area.
* On failure, return MAP_FAILED.
@@ -26,9 +30,7 @@ size_t qemu_mempath_getpagesize(const char *mem_path);
void *qemu_ram_mmap(int fd,
size_t size,
size_t align,
- bool readonly,
- bool shared,
- bool is_pmem,
+ uint32_t qemu_map_flags,
off_t map_offset);
void qemu_ram_munmap(int fd, void *ptr, size_t size);
diff --git a/include/qemu/osdep.h b/include/qemu/osdep.h
index 236a045671..0a54bf7be8 100644
--- a/include/qemu/osdep.h
+++ b/include/qemu/osdep.h
@@ -195,6 +195,9 @@ extern "C" {
#ifndef MAP_FIXED_NOREPLACE
#define MAP_FIXED_NOREPLACE 0
#endif
+#ifndef MAP_NORESERVE
+#define MAP_NORESERVE 0
+#endif
#ifndef ENOMEDIUM
#define ENOMEDIUM ENODEV
#endif
@@ -362,10 +365,35 @@ extern "C" {
int qemu_daemon(int nochdir, int noclose);
void *qemu_try_memalign(size_t alignment, size_t size);
void *qemu_memalign(size_t alignment, size_t size);
-void *qemu_anon_ram_alloc(size_t size, uint64_t *align, bool shared);
+void *qemu_anon_ram_alloc(size_t size, uint64_t *align, bool shared,
+ bool noreserve);
void qemu_vfree(void *ptr);
void qemu_anon_ram_free(void *ptr, size_t size);
+/*
+ * Abstraction of PROT_ and MAP_ flags as passed to mmap(), for example,
+ * consumed by qemu_ram_mmap().
+ */
+
+/* Map PROT_READ instead of PROT_READ | PROT_WRITE. */
+#define QEMU_MAP_READONLY (1 << 0)
+
+/* Use MAP_SHARED instead of MAP_PRIVATE. */
+#define QEMU_MAP_SHARED (1 << 1)
+
+/*
+ * Use MAP_SYNC | MAP_SHARED_VALIDATE if supported. Ignored without
+ * QEMU_MAP_SHARED. If mapping fails, warn and fallback to !QEMU_MAP_SYNC.
+ */
+#define QEMU_MAP_SYNC (1 << 2)
+
+/*
+ * Use MAP_NORESERVE to skip reservation of swap space (or huge pages if
+ * applicable). Bail out if not supported/effective.
+ */
+#define QEMU_MAP_NORESERVE (1 << 3)
+
+
#define QEMU_MADV_INVALID -1
#if defined(CONFIG_MADVISE)
@@ -410,7 +438,7 @@ void qemu_anon_ram_free(void *ptr, size_t size);
#ifdef MADV_REMOVE
#define QEMU_MADV_REMOVE MADV_REMOVE
#else
-#define QEMU_MADV_REMOVE QEMU_MADV_INVALID
+#define QEMU_MADV_REMOVE QEMU_MADV_DONTNEED
#endif
#elif defined(CONFIG_POSIX_MADVISE)
@@ -424,7 +452,7 @@ void qemu_anon_ram_free(void *ptr, size_t size);
#define QEMU_MADV_DONTDUMP QEMU_MADV_INVALID
#define QEMU_MADV_HUGEPAGE QEMU_MADV_INVALID
#define QEMU_MADV_NOHUGEPAGE QEMU_MADV_INVALID
-#define QEMU_MADV_REMOVE QEMU_MADV_INVALID
+#define QEMU_MADV_REMOVE QEMU_MADV_DONTNEED
#else /* no-op */
diff --git a/include/sysemu/hostmem.h b/include/sysemu/hostmem.h
index df5644723a..9ff5c16963 100644
--- a/include/sysemu/hostmem.h
+++ b/include/sysemu/hostmem.h
@@ -64,7 +64,7 @@ struct HostMemoryBackend {
/* protected */
uint64_t size;
bool merge, dump, use_canonical_path;
- bool prealloc, is_mapped, share;
+ bool prealloc, is_mapped, share, reserve;
uint32_t prealloc_threads;
DECLARE_BITMAP(host_nodes, MAX_NODES + 1);
HostMemPolicy policy;
diff --git a/migration/ram.c b/migration/ram.c
index 60ea913c54..723af67c2e 100644
--- a/migration/ram.c
+++ b/migration/ram.c
@@ -3343,8 +3343,7 @@ int colo_init_ram_cache(void)
WITH_RCU_READ_LOCK_GUARD() {
RAMBLOCK_FOREACH_NOT_IGNORED(block) {
block->colo_cache = qemu_anon_ram_alloc(block->used_length,
- NULL,
- false);
+ NULL, false, false);
if (!block->colo_cache) {
error_report("%s: Can't alloc memory for COLO cache of block %s,"
"size 0x" RAM_ADDR_FMT, __func__, block->idstr,
diff --git a/qapi/machine.json b/qapi/machine.json
index 58a9c86b36..e4d0f9b24f 100644
--- a/qapi/machine.json
+++ b/qapi/machine.json
@@ -790,11 +790,19 @@
#
# @size: memory backend size
#
-# @merge: enables or disables memory merge support
+# @merge: whether memory merge support is enabled
#
-# @dump: includes memory backend's memory in a core dump or not
+# @dump: whether memory backend's memory is included in a core dump
#
-# @prealloc: enables or disables memory preallocation
+# @prealloc: whether memory was preallocated
+#
+# @share: whether memory is private to QEMU or shared (since 6.1)
+#
+# @reserve: whether swap space (or huge pages) was reserved if applicable.
+# This corresponds to the user configuration and not the actual
+# behavior implemented in the OS to perform the reservation.
+# For example, Linux will never reserve swap space for shared
+# file mappings. (since 6.1)
#
# @host-nodes: host nodes for its memory policy
#
@@ -809,6 +817,8 @@
'merge': 'bool',
'dump': 'bool',
'prealloc': 'bool',
+ 'share': 'bool',
+ '*reserve': 'bool',
'host-nodes': ['uint16'],
'policy': 'HostMemPolicy' }}
diff --git a/qapi/qom.json b/qapi/qom.json
index f7ef30f940..652be317b8 100644
--- a/qapi/qom.json
+++ b/qapi/qom.json
@@ -545,6 +545,9 @@
# @share: if false, the memory is private to QEMU; if true, it is shared
# (default: false)
#
+# @reserve: if true, reserve swap space (or huge pages) if applicable
+# (default: true) (since 6.1)
+#
# @size: size of the memory region in bytes
#
# @x-use-canonical-path-for-ramblock-id: if true, the canoncial path is used
@@ -556,6 +559,12 @@
# false generally, but true for machine
# types <= 4.0)
#
+# Note: prealloc=true and reserve=false cannot be set at the same time. With
+# reserve=true, the behavior depends on the operating system: for example,
+# Linux will not reserve swap space for shared file mappings --
+# "not applicable". In contrast, reserve=false will bail out if it cannot
+# be configured accordingly.
+#
# Since: 2.1
##
{ 'struct': 'MemoryBackendProperties',
@@ -566,6 +575,7 @@
'*prealloc': 'bool',
'*prealloc-threads': 'uint32',
'*share': 'bool',
+ '*reserve': 'bool',
'size': 'size',
'*x-use-canonical-path-for-ramblock-id': 'bool' } }
diff --git a/scripts/coccinelle/memory-region-housekeeping.cocci b/scripts/coccinelle/memory-region-housekeeping.cocci
index c768d8140a..29651ebde9 100644
--- a/scripts/coccinelle/memory-region-housekeeping.cocci
+++ b/scripts/coccinelle/memory-region-housekeeping.cocci
@@ -127,8 +127,8 @@ static void device_fn(DeviceState *dev, ...)
- memory_region_init_rom(E1, NULL, E2, E3, E4);
+ memory_region_init_rom(E1, obj, E2, E3, E4);
|
-- memory_region_init_ram_shared_nomigrate(E1, NULL, E2, E3, E4, E5);
-+ memory_region_init_ram_shared_nomigrate(E1, obj, E2, E3, E4, E5);
+- memory_region_init_ram_flags_nomigrate(E1, NULL, E2, E3, E4, E5);
++ memory_region_init_ram_flags_nomigrate(E1, obj, E2, E3, E4, E5);
)
...+>
}
@@ -152,8 +152,8 @@ static void device_fn(DeviceState *dev, ...)
- memory_region_init_rom(E1, NULL, E2, E3, E4);
+ memory_region_init_rom(E1, OBJECT(dev), E2, E3, E4);
|
-- memory_region_init_ram_shared_nomigrate(E1, NULL, E2, E3, E4, E5);
-+ memory_region_init_ram_shared_nomigrate(E1, OBJECT(dev), E2, E3, E4, E5);
+- memory_region_init_ram_flags_nomigrate(E1, NULL, E2, E3, E4, E5);
++ memory_region_init_ram_flags_nomigrate(E1, OBJECT(dev), E2, E3, E4, E5);
)
...+>
}
diff --git a/softmmu/memory.c b/softmmu/memory.c
index c19b0be6b1..f0161515e9 100644
--- a/softmmu/memory.c
+++ b/softmmu/memory.c
@@ -1531,22 +1531,22 @@ void memory_region_init_ram_nomigrate(MemoryRegion *mr,
uint64_t size,
Error **errp)
{
- memory_region_init_ram_shared_nomigrate(mr, owner, name, size, false, errp);
+ memory_region_init_ram_flags_nomigrate(mr, owner, name, size, 0, errp);
}
-void memory_region_init_ram_shared_nomigrate(MemoryRegion *mr,
- Object *owner,
- const char *name,
- uint64_t size,
- bool share,
- Error **errp)
+void memory_region_init_ram_flags_nomigrate(MemoryRegion *mr,
+ Object *owner,
+ const char *name,
+ uint64_t size,
+ uint32_t ram_flags,
+ Error **errp)
{
Error *err = NULL;
memory_region_init(mr, owner, name, size);
mr->ram = true;
mr->terminates = true;
mr->destructor = memory_region_destructor_ram;
- mr->ram_block = qemu_ram_alloc(size, share, mr, &err);
+ mr->ram_block = qemu_ram_alloc(size, ram_flags, mr, &err);
if (err) {
mr->size = int128_zero();
object_unparent(OBJECT(mr));
@@ -1609,7 +1609,7 @@ void memory_region_init_ram_from_fd(MemoryRegion *mr,
Object *owner,
const char *name,
uint64_t size,
- bool share,
+ uint32_t ram_flags,
int fd,
ram_addr_t offset,
Error **errp)
@@ -1619,9 +1619,8 @@ void memory_region_init_ram_from_fd(MemoryRegion *mr,
mr->ram = true;
mr->terminates = true;
mr->destructor = memory_region_destructor_ram;
- mr->ram_block = qemu_ram_alloc_from_fd(size, mr,
- share ? RAM_SHARED : 0,
- fd, offset, false, &err);
+ mr->ram_block = qemu_ram_alloc_from_fd(size, mr, ram_flags, fd, offset,
+ false, &err);
if (err) {
mr->size = int128_zero();
object_unparent(OBJECT(mr));
@@ -1683,7 +1682,7 @@ void memory_region_init_rom_nomigrate(MemoryRegion *mr,
uint64_t size,
Error **errp)
{
- memory_region_init_ram_shared_nomigrate(mr, owner, name, size, false, errp);
+ memory_region_init_ram_flags_nomigrate(mr, owner, name, size, 0, errp);
mr->readonly = true;
}
@@ -1703,7 +1702,7 @@ void memory_region_init_rom_device_nomigrate(MemoryRegion *mr,
mr->terminates = true;
mr->rom_device = true;
mr->destructor = memory_region_destructor_ram;
- mr->ram_block = qemu_ram_alloc(size, false, mr, &err);
+ mr->ram_block = qemu_ram_alloc(size, 0, mr, &err);
if (err) {
mr->size = int128_zero();
object_unparent(OBJECT(mr));
diff --git a/softmmu/physmem.c b/softmmu/physmem.c
index 1c8717684a..9b171c9dbe 100644
--- a/softmmu/physmem.c
+++ b/softmmu/physmem.c
@@ -1540,6 +1540,7 @@ static void *file_ram_alloc(RAMBlock *block,
off_t offset,
Error **errp)
{
+ uint32_t qemu_map_flags;
void *area;
block->page_size = qemu_fd_getpagesize(fd);
@@ -1587,9 +1588,11 @@ static void *file_ram_alloc(RAMBlock *block,
perror("ftruncate");
}
- area = qemu_ram_mmap(fd, memory, block->mr->align, readonly,
- block->flags & RAM_SHARED, block->flags & RAM_PMEM,
- offset);
+ qemu_map_flags = readonly ? QEMU_MAP_READONLY : 0;
+ qemu_map_flags |= (block->flags & RAM_SHARED) ? QEMU_MAP_SHARED : 0;
+ qemu_map_flags |= (block->flags & RAM_PMEM) ? QEMU_MAP_SYNC : 0;
+ qemu_map_flags |= (block->flags & RAM_NORESERVE) ? QEMU_MAP_NORESERVE : 0;
+ area = qemu_ram_mmap(fd, memory, block->mr->align, qemu_map_flags, offset);
if (area == MAP_FAILED) {
error_setg_errno(errp, errno,
"unable to map backing store for guest RAM");
@@ -1714,6 +1717,11 @@ bool qemu_ram_is_shared(RAMBlock *rb)
return rb->flags & RAM_SHARED;
}
+bool qemu_ram_is_noreserve(RAMBlock *rb)
+{
+ return rb->flags & RAM_NORESERVE;
+}
+
/* Note: Only set at the start of postcopy */
bool qemu_ram_is_uf_zeroable(RAMBlock *rb)
{
@@ -1946,8 +1954,10 @@ static void dirty_memory_extend(ram_addr_t old_ram_size,
}
}
-static void ram_block_add(RAMBlock *new_block, Error **errp, bool shared)
+static void ram_block_add(RAMBlock *new_block, Error **errp)
{
+ const bool noreserve = qemu_ram_is_noreserve(new_block);
+ const bool shared = qemu_ram_is_shared(new_block);
RAMBlock *block;
RAMBlock *last_block = NULL;
ram_addr_t old_ram_size, new_ram_size;
@@ -1970,7 +1980,7 @@ static void ram_block_add(RAMBlock *new_block, Error **errp, bool shared)
} else {
new_block->host = qemu_anon_ram_alloc(new_block->max_length,
&new_block->mr->align,
- shared);
+ shared, noreserve);
if (!new_block->host) {
error_setg_errno(errp, errno,
"cannot set up guest memory '%s'",
@@ -2042,7 +2052,7 @@ RAMBlock *qemu_ram_alloc_from_fd(ram_addr_t size, MemoryRegion *mr,
int64_t file_size, file_align;
/* Just support these ram flags by now. */
- assert((ram_flags & ~(RAM_SHARED | RAM_PMEM)) == 0);
+ assert((ram_flags & ~(RAM_SHARED | RAM_PMEM | RAM_NORESERVE)) == 0);
if (xen_enabled()) {
error_setg(errp, "-mem-path not supported with Xen");
@@ -2084,7 +2094,7 @@ RAMBlock *qemu_ram_alloc_from_fd(ram_addr_t size, MemoryRegion *mr,
return NULL;
}
- ram_block_add(new_block, &local_err, ram_flags & RAM_SHARED);
+ ram_block_add(new_block, &local_err);
if (local_err) {
g_free(new_block);
error_propagate(errp, local_err);
@@ -2127,12 +2137,17 @@ RAMBlock *qemu_ram_alloc_internal(ram_addr_t size, ram_addr_t max_size,
void (*resized)(const char*,
uint64_t length,
void *host),
- void *host, bool resizeable, bool share,
+ void *host, uint32_t ram_flags,
MemoryRegion *mr, Error **errp)
{
RAMBlock *new_block;
Error *local_err = NULL;
+ assert((ram_flags & ~(RAM_SHARED | RAM_RESIZEABLE | RAM_PREALLOC)) == 0);
+ assert((ram_flags & ~(RAM_SHARED | RAM_RESIZEABLE | RAM_PREALLOC |
+ RAM_NORESERVE)) == 0);
+ assert(!host ^ (ram_flags & RAM_PREALLOC));
+
size = HOST_PAGE_ALIGN(size);
max_size = HOST_PAGE_ALIGN(max_size);
new_block = g_malloc0(sizeof(*new_block));
@@ -2144,13 +2159,8 @@ RAMBlock *qemu_ram_alloc_internal(ram_addr_t size, ram_addr_t max_size,
new_block->fd = -1;
new_block->page_size = qemu_real_host_page_size;
new_block->host = host;
- if (host) {
- new_block->flags |= RAM_PREALLOC;
- }
- if (resizeable) {
- new_block->flags |= RAM_RESIZEABLE;
- }
- ram_block_add(new_block, &local_err, share);
+ new_block->flags = ram_flags;
+ ram_block_add(new_block, &local_err);
if (local_err) {
g_free(new_block);
error_propagate(errp, local_err);
@@ -2162,15 +2172,15 @@ RAMBlock *qemu_ram_alloc_internal(ram_addr_t size, ram_addr_t max_size,
RAMBlock *qemu_ram_alloc_from_ptr(ram_addr_t size, void *host,
MemoryRegion *mr, Error **errp)
{
- return qemu_ram_alloc_internal(size, size, NULL, host, false,
- false, mr, errp);
+ return qemu_ram_alloc_internal(size, size, NULL, host, RAM_PREALLOC, mr,
+ errp);
}
-RAMBlock *qemu_ram_alloc(ram_addr_t size, bool share,
+RAMBlock *qemu_ram_alloc(ram_addr_t size, uint32_t ram_flags,
MemoryRegion *mr, Error **errp)
{
- return qemu_ram_alloc_internal(size, size, NULL, NULL, false,
- share, mr, errp);
+ assert((ram_flags & ~(RAM_SHARED | RAM_NORESERVE)) == 0);
+ return qemu_ram_alloc_internal(size, size, NULL, NULL, ram_flags, mr, errp);
}
RAMBlock *qemu_ram_alloc_resizeable(ram_addr_t size, ram_addr_t maxsz,
@@ -2179,8 +2189,8 @@ RAMBlock *qemu_ram_alloc_resizeable(ram_addr_t size, ram_addr_t maxsz,
void *host),
MemoryRegion *mr, Error **errp)
{
- return qemu_ram_alloc_internal(size, maxsz, resized, NULL, true,
- false, mr, errp);
+ return qemu_ram_alloc_internal(size, maxsz, resized, NULL,
+ RAM_RESIZEABLE, mr, errp);
}
static void reclaim_ramblock(RAMBlock *block)
@@ -2239,13 +2249,14 @@ void qemu_ram_remap(ram_addr_t addr, ram_addr_t length)
abort();
} else {
flags = MAP_FIXED;
+ flags |= block->flags & RAM_SHARED ?
+ MAP_SHARED : MAP_PRIVATE;
+ flags |= block->flags & RAM_NORESERVE ? MAP_NORESERVE : 0;
if (block->fd >= 0) {
- flags |= (block->flags & RAM_SHARED ?
- MAP_SHARED : MAP_PRIVATE);
area = mmap(vaddr, length, PROT_READ | PROT_WRITE,
flags, block->fd, offset);
} else {
- flags |= MAP_PRIVATE | MAP_ANONYMOUS;
+ flags |= MAP_ANONYMOUS;
area = mmap(vaddr, length, PROT_READ | PROT_WRITE,
flags, -1, 0);
}
@@ -3523,6 +3534,7 @@ int ram_block_discard_range(RAMBlock *rb, uint64_t start, size_t length)
/* The logic here is messy;
* madvise DONTNEED fails for hugepages
* fallocate works on hugepages and shmem
+ * shared anonymous memory requires madvise REMOVE
*/
need_madvise = (rb->page_size == qemu_host_page_size);
need_fallocate = rb->fd != -1;
@@ -3556,7 +3568,11 @@ int ram_block_discard_range(RAMBlock *rb, uint64_t start, size_t length)
* fallocate'd away).
*/
#if defined(CONFIG_MADVISE)
- ret = madvise(host_startaddr, length, MADV_DONTNEED);
+ if (qemu_ram_is_shared(rb) && rb->fd < 0) {
+ ret = madvise(host_startaddr, length, QEMU_MADV_REMOVE);
+ } else {
+ ret = madvise(host_startaddr, length, QEMU_MADV_DONTNEED);
+ }
if (ret) {
ret = -errno;
error_report("ram_block_discard_range: Failed to discard range "
diff --git a/softmmu/runstate.c b/softmmu/runstate.c
index 15640572c0..10d9b7365a 100644
--- a/softmmu/runstate.c
+++ b/softmmu/runstate.c
@@ -747,7 +747,7 @@ static void qemu_run_exit_notifiers(void)
void qemu_init_subsystems(void)
{
- Error *err;
+ Error *err = NULL;
os_set_line_buffering();
diff --git a/softmmu/vl.c b/softmmu/vl.c
index 326c1e9080..feb4d201f3 100644
--- a/softmmu/vl.c
+++ b/softmmu/vl.c
@@ -2522,7 +2522,7 @@ static void qemu_process_help_options(void)
static void qemu_maybe_daemonize(const char *pid_file)
{
- Error *err;
+ Error *err = NULL;
os_daemonize();
rcu_disable_atfork();
diff --git a/target/i386/cpu.h b/target/i386/cpu.h
index ac3abea97c..1e11071d81 100644
--- a/target/i386/cpu.h
+++ b/target/i386/cpu.h
@@ -224,6 +224,8 @@ typedef enum X86Seg {
#define CR0_NE_MASK (1U << 5)
#define CR0_WP_MASK (1U << 16)
#define CR0_AM_MASK (1U << 18)
+#define CR0_NW_MASK (1U << 29)
+#define CR0_CD_MASK (1U << 30)
#define CR0_PG_MASK (1U << 31)
#define CR4_VME_MASK (1U << 0)
@@ -2149,9 +2151,13 @@ static inline void
cpu_svm_check_intercept_param(CPUX86State *env1, uint32_t type,
uint64_t param, uintptr_t retaddr)
{ /* no-op */ }
+static inline bool
+cpu_svm_has_intercept(CPUX86State *env, uint32_t type)
+{ return false; }
#else
void cpu_svm_check_intercept_param(CPUX86State *env1, uint32_t type,
uint64_t param, uintptr_t retaddr);
+bool cpu_svm_has_intercept(CPUX86State *env, uint32_t type);
#endif
/* apic.c */
diff --git a/target/i386/svm.h b/target/i386/svm.h
index 87965e5bc2..5098733053 100644
--- a/target/i386/svm.h
+++ b/target/i386/svm.h
@@ -135,6 +135,8 @@
#define SVM_NPTEXIT_GPA (1ULL << 32)
#define SVM_NPTEXIT_GPT (1ULL << 33)
+#define SVM_CR0_RESERVED_MASK 0xffffffff00000000U
+
struct QEMU_PACKED vmcb_control_area {
uint16_t intercept_cr_read;
uint16_t intercept_cr_write;
diff --git a/target/i386/tcg/sysemu/misc_helper.c b/target/i386/tcg/sysemu/misc_helper.c
index 0cef2f1a4c..db0d8a9d79 100644
--- a/target/i386/tcg/sysemu/misc_helper.c
+++ b/target/i386/tcg/sysemu/misc_helper.c
@@ -84,6 +84,15 @@ void helper_write_crN(CPUX86State *env, int reg, target_ulong t0)
{
switch (reg) {
case 0:
+ /*
+ * If we reach this point, the CR0 write intercept is disabled.
+ * But we could still exit if the hypervisor has requested the selective
+ * intercept for bits other than TS and MP
+ */
+ if (cpu_svm_has_intercept(env, SVM_EXIT_CR0_SEL_WRITE) &&
+ ((env->cr[0] ^ t0) & ~(CR0_TS_MASK | CR0_MP_MASK))) {
+ cpu_vmexit(env, SVM_EXIT_CR0_SEL_WRITE, 0, GETPC());
+ }
cpu_x86_update_cr0(env, t0);
break;
case 3:
diff --git a/target/i386/tcg/sysemu/svm_helper.c b/target/i386/tcg/sysemu/svm_helper.c
index 9d671297cf..1c2dbc1862 100644
--- a/target/i386/tcg/sysemu/svm_helper.c
+++ b/target/i386/tcg/sysemu/svm_helper.c
@@ -72,6 +72,8 @@ void helper_vmrun(CPUX86State *env, int aflag, int next_eip_addend)
uint64_t nested_ctl;
uint32_t event_inj;
uint32_t int_ctl;
+ uint32_t asid;
+ uint64_t new_cr0;
cpu_svm_check_intercept_param(env, SVM_EXIT_VMRUN, 0, GETPC());
@@ -154,9 +156,18 @@ void helper_vmrun(CPUX86State *env, int aflag, int next_eip_addend)
nested_ctl = x86_ldq_phys(cs, env->vm_vmcb + offsetof(struct vmcb,
control.nested_ctl));
+ asid = x86_ldq_phys(cs, env->vm_vmcb + offsetof(struct vmcb,
+ control.asid));
env->nested_pg_mode = 0;
+ if (!cpu_svm_has_intercept(env, SVM_EXIT_VMRUN)) {
+ cpu_vmexit(env, SVM_EXIT_ERR, 0, GETPC());
+ }
+ if (asid == 0) {
+ cpu_vmexit(env, SVM_EXIT_ERR, 0, GETPC());
+ }
+
if (nested_ctl & SVM_NPT_ENABLED) {
env->nested_cr3 = x86_ldq_phys(cs,
env->vm_vmcb + offsetof(struct vmcb,
@@ -182,13 +193,18 @@ void helper_vmrun(CPUX86State *env, int aflag, int next_eip_addend)
env->idt.limit = x86_ldl_phys(cs, env->vm_vmcb + offsetof(struct vmcb,
save.idtr.limit));
+ new_cr0 = x86_ldq_phys(cs, env->vm_vmcb + offsetof(struct vmcb, save.cr0));
+ if (new_cr0 & SVM_CR0_RESERVED_MASK) {
+ cpu_vmexit(env, SVM_EXIT_ERR, 0, GETPC());
+ }
+ if ((new_cr0 & CR0_NW_MASK) && !(new_cr0 & CR0_CD_MASK)) {
+ cpu_vmexit(env, SVM_EXIT_ERR, 0, GETPC());
+ }
/* clear exit_info_2 so we behave like the real hardware */
x86_stq_phys(cs,
env->vm_vmcb + offsetof(struct vmcb, control.exit_info_2), 0);
- cpu_x86_update_cr0(env, x86_ldq_phys(cs,
- env->vm_vmcb + offsetof(struct vmcb,
- save.cr0)));
+ cpu_x86_update_cr0(env, new_cr0);
cpu_x86_update_cr4(env, x86_ldq_phys(cs,
env->vm_vmcb + offsetof(struct vmcb,
save.cr4)));
@@ -412,80 +428,91 @@ void helper_clgi(CPUX86State *env)
env->hflags2 &= ~HF2_GIF_MASK;
}
-void cpu_svm_check_intercept_param(CPUX86State *env, uint32_t type,
- uint64_t param, uintptr_t retaddr)
+bool cpu_svm_has_intercept(CPUX86State *env, uint32_t type)
{
- CPUState *cs = env_cpu(env);
-
- if (likely(!(env->hflags & HF_GUEST_MASK))) {
- return;
- }
switch (type) {
case SVM_EXIT_READ_CR0 ... SVM_EXIT_READ_CR0 + 8:
if (env->intercept_cr_read & (1 << (type - SVM_EXIT_READ_CR0))) {
- cpu_vmexit(env, type, param, retaddr);
+ return true;
}
break;
case SVM_EXIT_WRITE_CR0 ... SVM_EXIT_WRITE_CR0 + 8:
if (env->intercept_cr_write & (1 << (type - SVM_EXIT_WRITE_CR0))) {
- cpu_vmexit(env, type, param, retaddr);
+ return true;
}
break;
case SVM_EXIT_READ_DR0 ... SVM_EXIT_READ_DR0 + 7:
if (env->intercept_dr_read & (1 << (type - SVM_EXIT_READ_DR0))) {
- cpu_vmexit(env, type, param, retaddr);
+ return true;
}
break;
case SVM_EXIT_WRITE_DR0 ... SVM_EXIT_WRITE_DR0 + 7:
if (env->intercept_dr_write & (1 << (type - SVM_EXIT_WRITE_DR0))) {
- cpu_vmexit(env, type, param, retaddr);
+ return true;
}
break;
case SVM_EXIT_EXCP_BASE ... SVM_EXIT_EXCP_BASE + 31:
if (env->intercept_exceptions & (1 << (type - SVM_EXIT_EXCP_BASE))) {
- cpu_vmexit(env, type, param, retaddr);
- }
- break;
- case SVM_EXIT_MSR:
- if (env->intercept & (1ULL << (SVM_EXIT_MSR - SVM_EXIT_INTR))) {
- /* FIXME: this should be read in at vmrun (faster this way?) */
- uint64_t addr = x86_ldq_phys(cs, env->vm_vmcb +
- offsetof(struct vmcb,
- control.msrpm_base_pa));
- uint32_t t0, t1;
-
- switch ((uint32_t)env->regs[R_ECX]) {
- case 0 ... 0x1fff:
- t0 = (env->regs[R_ECX] * 2) % 8;
- t1 = (env->regs[R_ECX] * 2) / 8;
- break;
- case 0xc0000000 ... 0xc0001fff:
- t0 = (8192 + env->regs[R_ECX] - 0xc0000000) * 2;
- t1 = (t0 / 8);
- t0 %= 8;
- break;
- case 0xc0010000 ... 0xc0011fff:
- t0 = (16384 + env->regs[R_ECX] - 0xc0010000) * 2;
- t1 = (t0 / 8);
- t0 %= 8;
- break;
- default:
- cpu_vmexit(env, type, param, retaddr);
- t0 = 0;
- t1 = 0;
- break;
- }
- if (x86_ldub_phys(cs, addr + t1) & ((1 << param) << t0)) {
- cpu_vmexit(env, type, param, retaddr);
- }
+ return true;
}
break;
default:
if (env->intercept & (1ULL << (type - SVM_EXIT_INTR))) {
- cpu_vmexit(env, type, param, retaddr);
+ return true;
}
break;
}
+ return false;
+}
+
+void cpu_svm_check_intercept_param(CPUX86State *env, uint32_t type,
+ uint64_t param, uintptr_t retaddr)
+{
+ CPUState *cs = env_cpu(env);
+
+ if (likely(!(env->hflags & HF_GUEST_MASK))) {
+ return;
+ }
+
+ if (!cpu_svm_has_intercept(env, type)) {
+ return;
+ }
+
+ if (type == SVM_EXIT_MSR) {
+ /* FIXME: this should be read in at vmrun (faster this way?) */
+ uint64_t addr = x86_ldq_phys(cs, env->vm_vmcb +
+ offsetof(struct vmcb,
+ control.msrpm_base_pa));
+ uint32_t t0, t1;
+
+ switch ((uint32_t)env->regs[R_ECX]) {
+ case 0 ... 0x1fff:
+ t0 = (env->regs[R_ECX] * 2) % 8;
+ t1 = (env->regs[R_ECX] * 2) / 8;
+ break;
+ case 0xc0000000 ... 0xc0001fff:
+ t0 = (8192 + env->regs[R_ECX] - 0xc0000000) * 2;
+ t1 = (t0 / 8);
+ t0 %= 8;
+ break;
+ case 0xc0010000 ... 0xc0011fff:
+ t0 = (16384 + env->regs[R_ECX] - 0xc0010000) * 2;
+ t1 = (t0 / 8);
+ t0 %= 8;
+ break;
+ default:
+ cpu_vmexit(env, type, param, retaddr);
+ t0 = 0;
+ t1 = 0;
+ break;
+ }
+ if (x86_ldub_phys(cs, addr + t1) & ((1 << param) << t0)) {
+ cpu_vmexit(env, type, param, retaddr);
+ }
+ return;
+ }
+
+ cpu_vmexit(env, type, param, retaddr);
}
void helper_svm_check_intercept(CPUX86State *env, uint32_t type)
diff --git a/ui/vnc-auth-sasl.c b/ui/vnc-auth-sasl.c
index df7dc08e9f..47fdae5b21 100644
--- a/ui/vnc-auth-sasl.c
+++ b/ui/vnc-auth-sasl.c
@@ -28,10 +28,30 @@
#include "vnc.h"
#include "trace.h"
+/*
+ * Apple has deprecated sasl.h functions in OS X 10.11. Therefore,
+ * files that use SASL API need to disable -Wdeprecated-declarations.
+ */
+#ifdef CONFIG_DARWIN
+#pragma GCC diagnostic ignored "-Wdeprecated-declarations"
+#endif
+
/* Max amount of data we send/recv for SASL steps to prevent DOS */
#define SASL_DATA_MAX_LEN (1024 * 1024)
+bool vnc_sasl_server_init(Error **errp)
+{
+ int saslErr = sasl_server_init(NULL, "qemu");
+
+ if (saslErr != SASL_OK) {
+ error_setg(errp, "Failed to initialize SASL auth: %s",
+ sasl_errstring(saslErr, NULL, NULL));
+ return false;
+ }
+ return true;
+}
+
void vnc_sasl_client_cleanup(VncState *vs)
{
if (vs->sasl.conn) {
diff --git a/ui/vnc-auth-sasl.h b/ui/vnc-auth-sasl.h
index 1bfb86c6f5..367b8672cc 100644
--- a/ui/vnc-auth-sasl.h
+++ b/ui/vnc-auth-sasl.h
@@ -63,6 +63,7 @@ struct VncDisplaySASL {
char *authzid;
};
+bool vnc_sasl_server_init(Error **errp);
void vnc_sasl_client_cleanup(VncState *vs);
size_t vnc_client_read_sasl(VncState *vs);
diff --git a/ui/vnc.c b/ui/vnc.c
index b3d4d7b9a5..f0a1550d58 100644
--- a/ui/vnc.c
+++ b/ui/vnc.c
@@ -4154,14 +4154,8 @@ void vnc_display_open(const char *id, Error **errp)
trace_vnc_auth_init(vd, 1, vd->ws_auth, vd->ws_subauth);
#ifdef CONFIG_VNC_SASL
- if (sasl) {
- int saslErr = sasl_server_init(NULL, "qemu");
-
- if (saslErr != SASL_OK) {
- error_setg(errp, "Failed to initialize SASL auth: %s",
- sasl_errstring(saslErr, NULL, NULL));
- goto fail;
- }
+ if (sasl && !vnc_sasl_server_init(errp)) {
+ goto fail;
}
#endif
vd->lock_key_sync = lock_key_sync;
diff --git a/util/mmap-alloc.c b/util/mmap-alloc.c
index e6fa8b598b..838e286ce5 100644
--- a/util/mmap-alloc.c
+++ b/util/mmap-alloc.c
@@ -20,6 +20,8 @@
#include "qemu/osdep.h"
#include "qemu/mmap-alloc.h"
#include "qemu/host-utils.h"
+#include "qemu/cutils.h"
+#include "qemu/error-report.h"
#define HUGETLBFS_MAGIC 0x958458f6
@@ -82,32 +84,81 @@ size_t qemu_mempath_getpagesize(const char *mem_path)
return qemu_real_host_page_size;
}
-void *qemu_ram_mmap(int fd,
- size_t size,
- size_t align,
- bool readonly,
- bool shared,
- bool is_pmem,
- off_t map_offset)
+#define OVERCOMMIT_MEMORY_PATH "/proc/sys/vm/overcommit_memory"
+static bool map_noreserve_effective(int fd, uint32_t qemu_map_flags)
{
- int prot;
- int flags;
- int map_sync_flags = 0;
- int guardfd;
- size_t offset;
- size_t pagesize;
- size_t total;
- void *guardptr;
- void *ptr;
+#if defined(__linux__)
+ const bool readonly = qemu_map_flags & QEMU_MAP_READONLY;
+ const bool shared = qemu_map_flags & QEMU_MAP_SHARED;
+ gchar *content = NULL;
+ const char *endptr;
+ unsigned int tmp;
/*
- * Note: this always allocates at least one extra page of virtual address
- * space, even if size is already aligned.
+ * hugeltb accounting is different than ordinary swap reservation:
+ * a) Hugetlb pages from the pool are reserved for both private and
+ * shared mappings. For shared mappings, all mappers have to specify
+ * MAP_NORESERVE.
+ * b) MAP_NORESERVE is not affected by /proc/sys/vm/overcommit_memory.
*/
- total = size + align;
+ if (qemu_fd_getpagesize(fd) != qemu_real_host_page_size) {
+ return true;
+ }
+
+ /*
+ * Accountable mappings in the kernel that can be affected by MAP_NORESEVE
+ * are private writable mappings (see mm/mmap.c:accountable_mapping() in
+ * Linux). For all shared or readonly mappings, MAP_NORESERVE is always
+ * implicitly active -- no reservation; this includes shmem. The only
+ * exception is shared anonymous memory, it is accounted like private
+ * anonymous memory.
+ */
+ if (readonly || (shared && fd >= 0)) {
+ return true;
+ }
+
+ /*
+ * MAP_NORESERVE is globally ignored for applicable !hugetlb mappings when
+ * memory overcommit is set to "never". Sparse memory regions aren't really
+ * possible in this system configuration.
+ *
+ * Bail out now instead of silently committing way more memory than
+ * currently desired by the user.
+ */
+ if (g_file_get_contents(OVERCOMMIT_MEMORY_PATH, &content, NULL, NULL) &&
+ !qemu_strtoui(content, &endptr, 0, &tmp) &&
+ (!endptr || *endptr == '\n')) {
+ if (tmp == 2) {
+ error_report("Skipping reservation of swap space is not supported:"
+ " \"" OVERCOMMIT_MEMORY_PATH "\" is \"2\"");
+ return false;
+ }
+ return true;
+ }
+ /* this interface has been around since Linux 2.6 */
+ error_report("Skipping reservation of swap space is not supported:"
+ " Could not read: \"" OVERCOMMIT_MEMORY_PATH "\"");
+ return false;
+#endif
+ /*
+ * E.g., FreeBSD used to define MAP_NORESERVE, never implemented it,
+ * and removed it a while ago.
+ */
+ error_report("Skipping reservation of swap space is not supported");
+ return false;
+}
+
+/*
+ * Reserve a new memory region of the requested size to be used for mapping
+ * from the given fd (if any).
+ */
+static void *mmap_reserve(size_t size, int fd)
+{
+ int flags = MAP_PRIVATE;
#if defined(__powerpc64__) && defined(__linux__)
- /* On ppc64 mappings in the same segment (aka slice) must share the same
+ /*
+ * On ppc64 mappings in the same segment (aka slice) must share the same
* page size. Since we will be re-allocating part of this segment
* from the supplied fd, we should make sure to use the same page size, to
* this end we mmap the supplied fd. In this case, set MAP_NORESERVE to
@@ -115,52 +166,55 @@ void *qemu_ram_mmap(int fd,
* We do this unless we are using the system page size, in which case
* anonymous memory is OK.
*/
- flags = MAP_PRIVATE;
- pagesize = qemu_fd_getpagesize(fd);
- if (fd == -1 || pagesize == qemu_real_host_page_size) {
- guardfd = -1;
+ if (fd == -1 || qemu_fd_getpagesize(fd) == qemu_real_host_page_size) {
+ fd = -1;
flags |= MAP_ANONYMOUS;
} else {
- guardfd = fd;
flags |= MAP_NORESERVE;
}
#else
- guardfd = -1;
- pagesize = qemu_real_host_page_size;
- flags = MAP_PRIVATE | MAP_ANONYMOUS;
+ fd = -1;
+ flags |= MAP_ANONYMOUS;
#endif
- guardptr = mmap(0, total, PROT_NONE, flags, guardfd, 0);
+ return mmap(0, size, PROT_NONE, flags, fd, 0);
+}
- if (guardptr == MAP_FAILED) {
+/*
+ * Activate memory in a reserved region from the given fd (if any), to make
+ * it accessible.
+ */
+static void *mmap_activate(void *ptr, size_t size, int fd,
+ uint32_t qemu_map_flags, off_t map_offset)
+{
+ const bool noreserve = qemu_map_flags & QEMU_MAP_NORESERVE;
+ const bool readonly = qemu_map_flags & QEMU_MAP_READONLY;
+ const bool shared = qemu_map_flags & QEMU_MAP_SHARED;
+ const bool sync = qemu_map_flags & QEMU_MAP_SYNC;
+ const int prot = PROT_READ | (readonly ? 0 : PROT_WRITE);
+ int map_sync_flags = 0;
+ int flags = MAP_FIXED;
+ void *activated_ptr;
+
+ if (noreserve && !map_noreserve_effective(fd, qemu_map_flags)) {
return MAP_FAILED;
}
- assert(is_power_of_2(align));
- /* Always align to host page size */
- assert(align >= pagesize);
-
- flags = MAP_FIXED;
flags |= fd == -1 ? MAP_ANONYMOUS : 0;
flags |= shared ? MAP_SHARED : MAP_PRIVATE;
- if (shared && is_pmem) {
+ flags |= noreserve ? MAP_NORESERVE : 0;
+ if (shared && sync) {
map_sync_flags = MAP_SYNC | MAP_SHARED_VALIDATE;
}
- offset = QEMU_ALIGN_UP((uintptr_t)guardptr, align) - (uintptr_t)guardptr;
-
- prot = PROT_READ | (readonly ? 0 : PROT_WRITE);
-
- ptr = mmap(guardptr + offset, size, prot,
- flags | map_sync_flags, fd, map_offset);
-
- if (ptr == MAP_FAILED && map_sync_flags) {
+ activated_ptr = mmap(ptr, size, prot, flags | map_sync_flags, fd,
+ map_offset);
+ if (activated_ptr == MAP_FAILED && map_sync_flags) {
if (errno == ENOTSUP) {
- char *proc_link, *file_name;
- int len;
- proc_link = g_strdup_printf("/proc/self/fd/%d", fd);
- file_name = g_malloc0(PATH_MAX);
- len = readlink(proc_link, file_name, PATH_MAX - 1);
+ char *proc_link = g_strdup_printf("/proc/self/fd/%d", fd);
+ char *file_name = g_malloc0(PATH_MAX);
+ int len = readlink(proc_link, file_name, PATH_MAX - 1);
+
if (len < 0) {
len = 0;
}
@@ -173,12 +227,53 @@ void *qemu_ram_mmap(int fd,
g_free(file_name);
}
/*
- * if map failed with MAP_SHARED_VALIDATE | MAP_SYNC,
- * we will remove these flags to handle compatibility.
+ * If mmap failed with MAP_SHARED_VALIDATE | MAP_SYNC, we will try
+ * again without these flags to handle backwards compatibility.
*/
- ptr = mmap(guardptr + offset, size, prot, flags, fd, map_offset);
+ activated_ptr = mmap(ptr, size, prot, flags, fd, map_offset);
}
+ return activated_ptr;
+}
+static inline size_t mmap_guard_pagesize(int fd)
+{
+#if defined(__powerpc64__) && defined(__linux__)
+ /* Mappings in the same segment must share the same page size */
+ return qemu_fd_getpagesize(fd);
+#else
+ return qemu_real_host_page_size;
+#endif
+}
+
+void *qemu_ram_mmap(int fd,
+ size_t size,
+ size_t align,
+ uint32_t qemu_map_flags,
+ off_t map_offset)
+{
+ const size_t guard_pagesize = mmap_guard_pagesize(fd);
+ size_t offset, total;
+ void *ptr, *guardptr;
+
+ /*
+ * Note: this always allocates at least one extra page of virtual address
+ * space, even if size is already aligned.
+ */
+ total = size + align;
+
+ guardptr = mmap_reserve(total, fd);
+ if (guardptr == MAP_FAILED) {
+ return MAP_FAILED;
+ }
+
+ assert(is_power_of_2(align));
+ /* Always align to host page size */
+ assert(align >= guard_pagesize);
+
+ offset = QEMU_ALIGN_UP((uintptr_t)guardptr, align) - (uintptr_t)guardptr;
+
+ ptr = mmap_activate(guardptr + offset, size, fd, qemu_map_flags,
+ map_offset);
if (ptr == MAP_FAILED) {
munmap(guardptr, total);
return MAP_FAILED;
@@ -193,8 +288,8 @@ void *qemu_ram_mmap(int fd,
* a guard page guarding against potential buffer overflows.
*/
total -= offset;
- if (total > size + pagesize) {
- munmap(ptr + size + pagesize, total - size - pagesize);
+ if (total > size + guard_pagesize) {
+ munmap(ptr + size + guard_pagesize, total - size - guard_pagesize);
}
return ptr;
@@ -202,15 +297,8 @@ void *qemu_ram_mmap(int fd,
void qemu_ram_munmap(int fd, void *ptr, size_t size)
{
- size_t pagesize;
-
if (ptr) {
/* Unmap both the RAM block and the guard page */
-#if defined(__powerpc64__) && defined(__linux__)
- pagesize = qemu_fd_getpagesize(fd);
-#else
- pagesize = qemu_real_host_page_size;
-#endif
- munmap(ptr, size + pagesize);
+ munmap(ptr, size + mmap_guard_pagesize(fd));
}
}
diff --git a/util/oslib-posix.c b/util/oslib-posix.c
index 7b4bec1402..e8bdb02e1d 100644
--- a/util/oslib-posix.c
+++ b/util/oslib-posix.c
@@ -227,10 +227,13 @@ void *qemu_memalign(size_t alignment, size_t size)
}
/* alloc shared memory pages */
-void *qemu_anon_ram_alloc(size_t size, uint64_t *alignment, bool shared)
+void *qemu_anon_ram_alloc(size_t size, uint64_t *alignment, bool shared,
+ bool noreserve)
{
+ const uint32_t qemu_map_flags = (shared ? QEMU_MAP_SHARED : 0) |
+ (noreserve ? QEMU_MAP_NORESERVE : 0);
size_t align = QEMU_VMALLOC_ALIGN;
- void *ptr = qemu_ram_mmap(-1, size, align, false, shared, false, 0);
+ void *ptr = qemu_ram_mmap(-1, size, align, qemu_map_flags, 0);
if (ptr == MAP_FAILED) {
return NULL;
diff --git a/util/oslib-win32.c b/util/oslib-win32.c
index ca99356fdf..ee3a3692d8 100644
--- a/util/oslib-win32.c
+++ b/util/oslib-win32.c
@@ -38,6 +38,7 @@
#include "trace.h"
#include "qemu/sockets.h"
#include "qemu/cutils.h"
+#include "qemu/error-report.h"
#include <malloc.h>
/* this must come after including "trace.h" */
@@ -76,10 +77,20 @@ static int get_allocation_granularity(void)
return system_info.dwAllocationGranularity;
}
-void *qemu_anon_ram_alloc(size_t size, uint64_t *align, bool shared)
+void *qemu_anon_ram_alloc(size_t size, uint64_t *align, bool shared,
+ bool noreserve)
{
void *ptr;
+ if (noreserve) {
+ /*
+ * We need a MEM_COMMIT before accessing any memory in a MEM_RESERVE
+ * area; we cannot easily mimic POSIX MAP_NORESERVE semantics.
+ */
+ error_report("Skipping reservation of swap space is not supported.");
+ return NULL;
+ }
+
ptr = VirtualAlloc(NULL, size, MEM_COMMIT, PAGE_READWRITE);
trace_qemu_anon_ram_alloc(size, ptr);
diff --git a/util/qemu-config.c b/util/qemu-config.c
index 374f3bc460..84ee6dc4ea 100644
--- a/util/qemu-config.c
+++ b/util/qemu-config.c
@@ -429,29 +429,14 @@ out:
void qemu_config_do_parse(const char *group, QDict *qdict, void *opaque, Error **errp)
{
QemuOptsList **lists = opaque;
- const char *id = qdict_get_try_str(qdict, "id");
QemuOptsList *list;
- QemuOpts *opts;
- const QDictEntry *unrecognized;
list = find_list(lists, group, errp);
if (!list) {
return;
}
- opts = qemu_opts_create(list, id, 1, errp);
- if (!opts) {
- return;
- }
- if (!qemu_opts_absorb_qdict(opts, qdict, errp)) {
- qemu_opts_del(opts);
- return;
- }
- unrecognized = qdict_first(qdict);
- if (unrecognized) {
- error_setg(errp, QERR_INVALID_PARAMETER, unrecognized->key);
- qemu_opts_del(opts);
- }
+ qemu_opts_from_qdict(list, qdict, errp);
}
int qemu_config_parse(FILE *fp, QemuOptsList **lists, const char *fname, Error **errp)