From c3543fb5fe4520f03dd4fef04fab7745eeca1c96 Mon Sep 17 00:00:00 2001 From: Hannes Reinecke Date: Fri, 7 Nov 2014 13:22:32 +0100 Subject: esp-pci: fixup deadlock with linux A linux guest will be issuing messages: [ 32.124042] DC390: Deadlock in DataIn_0: DMA aborted unfinished: 000000 bytes remain!! [ 32.126348] DC390: DataIn_0: DMA State: 0 and the HBA will fail to work properly. Reason is the emulation is not setting the 'DMA transfer done' status correctly. Signed-off-by: Hannes Reinecke Cc: qemu-stable@nongnu.org Signed-off-by: Paolo Bonzini --- hw/scsi/esp-pci.c | 2 ++ 1 file changed, 2 insertions(+) (limited to 'hw') diff --git a/hw/scsi/esp-pci.c b/hw/scsi/esp-pci.c index 82795e68b8..77b8647fca 100644 --- a/hw/scsi/esp-pci.c +++ b/hw/scsi/esp-pci.c @@ -268,6 +268,8 @@ static void esp_pci_dma_memory_rw(PCIESPState *pci, uint8_t *buf, int len, /* update status registers */ pci->dma_regs[DMA_WBC] -= len; pci->dma_regs[DMA_WAC] += len; + if (pci->dma_regs[DMA_WBC] == 0) + pci->dma_regs[DMA_STAT] |= DMA_STAT_DONE; } static void esp_pci_dma_memory_read(void *opaque, uint8_t *buf, int len) -- cgit v1.2.3 From 55783a5521a3b1f93ee6a072e414a27c6cfa15f0 Mon Sep 17 00:00:00 2001 From: Paolo Bonzini Date: Fri, 7 Nov 2014 14:00:02 +0100 Subject: virtio-scsi: work around bug in old BIOSes Old BIOSes left some padding by mistake after the req_size/resp_size. New QEMU does not like it, thinking it is a bidirectional command. As a workaround, we can check if the ANY_LAYOUT bit is set; if not, we always consider the first buffer as the virtio-scsi request/response, because, back when QEMU did not support ANY_LAYOUT, it expected the payload to start at the second element of the iovec. This can show up during migration. Cc: qemu-stable@nongnu.org Signed-off-by: Paolo Bonzini --- hw/scsi/virtio-scsi.c | 17 +++++++++++++++++ 1 file changed, 17 insertions(+) (limited to 'hw') diff --git a/hw/scsi/virtio-scsi.c b/hw/scsi/virtio-scsi.c index fdcacfd79a..ef485508b1 100644 --- a/hw/scsi/virtio-scsi.c +++ b/hw/scsi/virtio-scsi.c @@ -118,6 +118,7 @@ static size_t qemu_sgl_concat(VirtIOSCSIReq *req, struct iovec *iov, static int virtio_scsi_parse_req(VirtIOSCSIReq *req, unsigned req_size, unsigned resp_size) { + VirtIODevice *vdev = (VirtIODevice *) req->dev; size_t in_size, out_size; if (iov_to_buf(req->elem.out_sg, req->elem.out_num, 0, @@ -130,8 +131,24 @@ static int virtio_scsi_parse_req(VirtIOSCSIReq *req, resp_size) < resp_size) { return -EINVAL; } + req->resp_size = resp_size; + /* Old BIOSes left some padding by mistake after the req_size/resp_size. + * As a workaround, always consider the first buffer as the virtio-scsi + * request/response, making the payload start at the second element + * of the iovec. + * + * The actual length of the response header, stored in req->resp_size, + * does not change. + * + * TODO: always disable this workaround for virtio 1.0 devices. + */ + if ((vdev->guest_features & VIRTIO_F_ANY_LAYOUT) == 0) { + req_size = req->elem.out_sg[0].iov_len; + resp_size = req->elem.in_sg[0].iov_len; + } + out_size = qemu_sgl_concat(req, req->elem.out_sg, &req->elem.out_addr[0], req->elem.out_num, req_size); -- cgit v1.2.3 From 25aaa2c568a11bd79b7b83c857278232f6fa7be6 Mon Sep 17 00:00:00 2001 From: Paolo Bonzini Date: Mon, 10 Nov 2014 13:58:14 +0100 Subject: esp: fix coding standards Reported-by: Mark Cave-Ayland Signed-off-by: Paolo Bonzini --- hw/scsi/esp-pci.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'hw') diff --git a/hw/scsi/esp-pci.c b/hw/scsi/esp-pci.c index 77b8647fca..00b7297354 100644 --- a/hw/scsi/esp-pci.c +++ b/hw/scsi/esp-pci.c @@ -268,8 +268,9 @@ static void esp_pci_dma_memory_rw(PCIESPState *pci, uint8_t *buf, int len, /* update status registers */ pci->dma_regs[DMA_WBC] -= len; pci->dma_regs[DMA_WAC] += len; - if (pci->dma_regs[DMA_WBC] == 0) + if (pci->dma_regs[DMA_WBC] == 0) { pci->dma_regs[DMA_STAT] |= DMA_STAT_DONE; + } } static void esp_pci_dma_memory_read(void *opaque, uint8_t *buf, int len) -- cgit v1.2.3 From ed4b43265d4d0c7ecfbbcb4001f61700756f22b9 Mon Sep 17 00:00:00 2001 From: Ming Lei Date: Tue, 11 Nov 2014 09:17:09 +0800 Subject: virtio-scsi: dataplane: fix allocation for 'cmd_vrings' The size of each element should be sizeof(VirtIOSCSIVring *). Signed-off-by: Ming Lei Reviewed-by: Fam Zheng Signed-off-by: Paolo Bonzini --- hw/scsi/virtio-scsi-dataplane.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'hw') diff --git a/hw/scsi/virtio-scsi-dataplane.c b/hw/scsi/virtio-scsi-dataplane.c index 9651e6f274..969b931557 100644 --- a/hw/scsi/virtio-scsi-dataplane.c +++ b/hw/scsi/virtio-scsi-dataplane.c @@ -230,7 +230,7 @@ void virtio_scsi_dataplane_start(VirtIOSCSI *s) if (!s->event_vring) { goto fail_vrings; } - s->cmd_vrings = g_malloc0(sizeof(VirtIOSCSIVring) * vs->conf.num_queues); + s->cmd_vrings = g_new(VirtIOSCSIVring *, vs->conf.num_queues); for (i = 0; i < vs->conf.num_queues; i++) { s->cmd_vrings[i] = virtio_scsi_vring_init(s, vs->cmd_vqs[i], -- cgit v1.2.3 From c9cf45c1a475e594c560862d9df35b16e3a42702 Mon Sep 17 00:00:00 2001 From: Hannes Reinecke Date: Mon, 10 Nov 2014 16:52:55 +0100 Subject: esp: Do not overwrite ESP_TCHI after reset After a reset ESP_TCHI should contain the unique ID of the chip. This value will be overwritten with the current tranfer count if the transfer count has previously been set. So we should always return the chip id if ESP_TCHI has never been written to. Signed-off-by: Hannes Reinecke Signed-off-by: Paolo Bonzini --- hw/scsi/esp.c | 11 +++++++++-- 1 file changed, 9 insertions(+), 2 deletions(-) (limited to 'hw') diff --git a/hw/scsi/esp.c b/hw/scsi/esp.c index 5ab44d860b..272d13d633 100644 --- a/hw/scsi/esp.c +++ b/hw/scsi/esp.c @@ -364,7 +364,7 @@ void esp_hard_reset(ESPState *s) { memset(s->rregs, 0, ESP_REGS); memset(s->wregs, 0, ESP_REGS); - s->rregs[ESP_TCHI] = s->chip_id; + s->tchi_written = 0; s->ti_size = 0; s->ti_rptr = 0; s->ti_wptr = 0; @@ -422,6 +422,11 @@ uint64_t esp_reg_read(ESPState *s, uint32_t saddr) esp_lower_irq(s); return old_val; + case ESP_TCHI: + /* Return the unique id if the value has never been written */ + if (!s->tchi_written) { + return s->chip_id; + } default: break; } @@ -432,9 +437,11 @@ void esp_reg_write(ESPState *s, uint32_t saddr, uint64_t val) { trace_esp_mem_writeb(saddr, s->wregs[saddr], val); switch (saddr) { + case ESP_TCHI: + s->tchi_written = true; + /* fall through */ case ESP_TCLO: case ESP_TCMID: - case ESP_TCHI: s->rregs[ESP_RSTAT] &= ~STAT_TC; break; case ESP_FIFO: -- cgit v1.2.3 From 6012ca8159a7f78e6fc5122a8e9f22d82e9723e9 Mon Sep 17 00:00:00 2001 From: Ming Lei Date: Wed, 12 Nov 2014 11:24:35 +0800 Subject: virtio-scsi: dataplane: suppress guest notification This patch uses vring_should_notify() to suppress guest notification, and looks notification frequency can be decreased from ~33K/sec to ~2K/sec in my test environment. Suggested-by: Stefan Hajnoczi Signed-off-by: Ming Lei Signed-off-by: Paolo Bonzini --- hw/scsi/virtio-scsi-dataplane.c | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) (limited to 'hw') diff --git a/hw/scsi/virtio-scsi-dataplane.c b/hw/scsi/virtio-scsi-dataplane.c index 969b931557..03a1e8cfcf 100644 --- a/hw/scsi/virtio-scsi-dataplane.c +++ b/hw/scsi/virtio-scsi-dataplane.c @@ -92,9 +92,14 @@ VirtIOSCSIReq *virtio_scsi_pop_req_vring(VirtIOSCSI *s, void virtio_scsi_vring_push_notify(VirtIOSCSIReq *req) { + VirtIODevice *vdev = VIRTIO_DEVICE(req->vring->parent); + vring_push(&req->vring->vring, &req->elem, req->qsgl.size + req->resp_iov.size); - event_notifier_set(&req->vring->guest_notifier); + + if (vring_should_notify(vdev, &req->vring->vring)) { + event_notifier_set(&req->vring->guest_notifier); + } } static void virtio_scsi_iothread_handle_ctrl(EventNotifier *notifier) -- cgit v1.2.3 From c2c00148ec54f77c9432fec16585834e1d677fda Mon Sep 17 00:00:00 2001 From: Pavel Dovgalyuk Date: Thu, 28 Aug 2014 15:18:57 +0400 Subject: apic_common: migrate missing fields This patch adds missed sipi_vector and wait_for_sipi fields to a new subsection of the vmstate of the apic_common module. Saving and loading of these fields makes migration of the apic state deterministic. Signed-off-by: Pavel Dovgalyuk [Initialize the field in pre_load and kvm_apic_realize. - Paolo] Signed-off-by: Paolo Bonzini --- hw/i386/kvm/apic.c | 3 +++ hw/intc/apic_common.c | 38 ++++++++++++++++++++++++++++++++++++++ 2 files changed, 41 insertions(+) (limited to 'hw') diff --git a/hw/i386/kvm/apic.c b/hw/i386/kvm/apic.c index e873b509a5..271e97f86f 100644 --- a/hw/i386/kvm/apic.c +++ b/hw/i386/kvm/apic.c @@ -175,6 +175,9 @@ static void kvm_apic_realize(DeviceState *dev, Error **errp) { APICCommonState *s = APIC_COMMON(dev); + /* Not used by KVM, which uses the CPU mp_state instead. */ + s->wait_for_sipi = 0; + memory_region_init_io(&s->io_memory, NULL, &kvm_apic_io_ops, s, "kvm-apic-msi", APIC_SPACE_SIZE); diff --git a/hw/intc/apic_common.c b/hw/intc/apic_common.c index ce3d903b13..4e62f25edb 100644 --- a/hw/intc/apic_common.c +++ b/hw/intc/apic_common.c @@ -324,6 +324,19 @@ static void apic_common_realize(DeviceState *dev, Error **errp) } +static int apic_pre_load(void *opaque) +{ + APICCommonState *s = APIC_COMMON(opaque); + + /* The default is !cpu_is_bsp(s->cpu), but the common value is 0 + * so that's what apic_common_sipi_needed checks for. Reset to + * the value that is assumed when the apic_sipi subsection is + * absent. + */ + s->wait_for_sipi = 0; + return 0; +} + static void apic_dispatch_pre_save(void *opaque) { APICCommonState *s = APIC_COMMON(opaque); @@ -345,12 +358,30 @@ static int apic_dispatch_post_load(void *opaque, int version_id) return 0; } +static bool apic_common_sipi_needed(void *opaque) +{ + APICCommonState *s = APIC_COMMON(opaque); + return s->wait_for_sipi != 0; +} + +static const VMStateDescription vmstate_apic_common_sipi = { + .name = "apic_sipi", + .version_id = 1, + .minimum_version_id = 1, + .fields = (VMStateField[]) { + VMSTATE_INT32(sipi_vector, APICCommonState), + VMSTATE_INT32(wait_for_sipi, APICCommonState), + VMSTATE_END_OF_LIST() + } +}; + static const VMStateDescription vmstate_apic_common = { .name = "apic", .version_id = 3, .minimum_version_id = 3, .minimum_version_id_old = 1, .load_state_old = apic_load_old, + .pre_load = apic_pre_load, .pre_save = apic_dispatch_pre_save, .post_load = apic_dispatch_post_load, .fields = (VMStateField[]) { @@ -375,6 +406,13 @@ static const VMStateDescription vmstate_apic_common = { VMSTATE_INT64(timer_expiry, APICCommonState), /* open-coded timer state */ VMSTATE_END_OF_LIST() + }, + .subsections = (VMStateSubsection[]) { + { + .vmsd = &vmstate_apic_common_sipi, + .needed = apic_common_sipi_needed, + }, + VMSTATE_END_OF_LIST() } }; -- cgit v1.2.3 From 1154d84dcc5f46e83db94281d071775819dd8884 Mon Sep 17 00:00:00 2001 From: Eduardo Habkost Date: Mon, 3 Nov 2014 15:45:34 -0200 Subject: kvmclock: Add comment explaining why we need cpu_clean_all_dirty() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Try to explain why commit 317b0a6d8ba44e9bf8f9c3dbd776c4536843d82c needed a cpu_clean_all_dirty() call just after calling cpu_synchronize_all_states(). Signed-off-by: Eduardo Habkost Cc: Andrey Korolyov Cc: Marcin Gibuła Cc: Marcelo Tosatti Cc: Paolo Bonzini Signed-off-by: Paolo Bonzini --- hw/i386/kvm/clock.c | 14 ++++++++++++++ 1 file changed, 14 insertions(+) (limited to 'hw') diff --git a/hw/i386/kvm/clock.c b/hw/i386/kvm/clock.c index 1ac60d6cdd..58be2bda27 100644 --- a/hw/i386/kvm/clock.c +++ b/hw/i386/kvm/clock.c @@ -127,7 +127,21 @@ static void kvmclock_vm_state_change(void *opaque, int running, } cpu_synchronize_all_states(); + /* In theory, the cpu_synchronize_all_states() call above wouldn't + * affect the rest of the code, as the VCPU state inside CPUState + * is supposed to always match the VCPU state on the kernel side. + * + * In practice, calling cpu_synchronize_state() too soon will load the + * kernel-side APIC state into X86CPU.apic_state too early, APIC state + * won't be reloaded later because CPUState.vcpu_dirty==true, and + * outdated APIC state may be migrated to another host. + * + * The real fix would be to make sure outdated APIC state is read + * from the kernel again when necessary. While this is not fixed, we + * need the cpu_clean_all_dirty() call below. + */ cpu_clean_all_dirty(); + ret = kvm_vm_ioctl(kvm_state, KVM_GET_CLOCK, &data); if (ret < 0) { fprintf(stderr, "KVM_GET_CLOCK failed: %s\n", strerror(ret)); -- cgit v1.2.3 From f4ec5cd29d1f0d3a265039767399d2cf3e75950b Mon Sep 17 00:00:00 2001 From: SeokYeon Hwang Date: Wed, 5 Nov 2014 15:19:54 +0900 Subject: smbios: change 'ram_addr_t' variables to 'uint64_t' ram_addr_t should not be used except if referring to a RAMBlobk. Using 'uint64_t' avoids a -Wconstant-conversion warning, which clang >= 3.4 produces in "smbios_get_tables()". Signed-off-by: SeokYeon Hwang Signed-off-by: Paolo Bonzini --- hw/i386/smbios.c | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) (limited to 'hw') diff --git a/hw/i386/smbios.c b/hw/i386/smbios.c index 8a7ad48921..024e59445b 100644 --- a/hw/i386/smbios.c +++ b/hw/i386/smbios.c @@ -645,7 +645,7 @@ static void smbios_build_type_4_table(unsigned instance) static void smbios_build_type_16_table(unsigned dimm_cnt) { - ram_addr_t size_kb; + uint64_t size_kb; SMBIOS_BUILD_TABLE_PRE(16, 0x1000, true); /* required */ @@ -669,10 +669,10 @@ static void smbios_build_type_16_table(unsigned dimm_cnt) #define MAX_T17_STD_SZ 0x7FFF /* (32G - 1M), in Megabytes */ #define MAX_T17_EXT_SZ 0x80000000 /* 2P, in Megabytes */ -static void smbios_build_type_17_table(unsigned instance, ram_addr_t size) +static void smbios_build_type_17_table(unsigned instance, uint64_t size) { char loc_str[128]; - ram_addr_t size_mb; + uint64_t size_mb; SMBIOS_BUILD_TABLE_PRE(17, 0x1100 + instance, true); /* required */ @@ -711,9 +711,9 @@ static void smbios_build_type_17_table(unsigned instance, ram_addr_t size) } static void smbios_build_type_19_table(unsigned instance, - ram_addr_t start, ram_addr_t size) + uint64_t start, uint64_t size) { - ram_addr_t end, start_kb, end_kb; + uint64_t end, start_kb, end_kb; SMBIOS_BUILD_TABLE_PRE(19, 0x1300 + instance, true); /* required */ -- cgit v1.2.3 From 3ef0eab178e5120a0e1c079d163d5c71689d9b71 Mon Sep 17 00:00:00 2001 From: Pavel Dovgalyuk Date: Fri, 7 Nov 2014 13:31:33 +0300 Subject: acpi: accurate overflow check Compare clock in ns, because acpi_pm_tmr_update uses rounded to ns value instead of ticks. Signed-off-by: Pavel Dovgalyuk [This lets Windows boot in icount mode. - Paolo] Signed-off-by: Paolo Bonzini --- hw/acpi/core.c | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) (limited to 'hw') diff --git a/hw/acpi/core.c b/hw/acpi/core.c index a7368fb242..51913d6932 100644 --- a/hw/acpi/core.c +++ b/hw/acpi/core.c @@ -376,8 +376,11 @@ static void acpi_notify_wakeup(Notifier *notifier, void *data) /* ACPI PM1a EVT */ uint16_t acpi_pm1_evt_get_sts(ACPIREGS *ar) { - int64_t d = acpi_pm_tmr_get_clock(); - if (d >= ar->tmr.overflow_time) { + /* Compare ns-clock, not PM timer ticks, because + acpi_pm_tmr_update function uses ns for setting the timer. */ + int64_t d = qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL); + if (d >= muldiv64(ar->tmr.overflow_time, + get_ticks_per_sec(), PM_TIMER_FREQUENCY)) { ar->pm1.evt.sts |= ACPI_BITMASK_TIMER_STATUS; } return ar->pm1.evt.sts; -- cgit v1.2.3