aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorPeter Maydell <peter.maydell@linaro.org>2016-05-31 09:29:23 +0100
committerPeter Maydell <peter.maydell@linaro.org>2016-05-31 09:29:23 +0100
commit07e070aac4eeb186905148461f331e43f2b828aa (patch)
tree725db4314abc3e4975ec17d427d560cc0c1bbc13
parentd6550e9ed2e1a60d889dfb721de00d9a4e3bafbe (diff)
parent0878d0e11ba8013dd759c6921cbf05ba6a41bd71 (diff)
Merge remote-tracking branch 'remotes/bonzini/tags/for-upstream' into staging
* docs/atomics fixes and atomic_rcu_* optimization (Emilio) * NBD bugfix (Eric) * Memory fixes and cleanups (Paolo, Paul) * scsi-block support for SCSI status, including persistent reservations (Paolo) * kvm_stat moves to the Linux repository * SCSI bug fixes (Peter, Prasad) * Killing qemu_char_get_next_serial, non-ARM parts (Xiaoqiang) # gpg: Signature made Sun 29 May 2016 08:11:20 BST using RSA key ID 78C7AE83 # gpg: Good signature from "Paolo Bonzini <bonzini@gnu.org>" # gpg: aka "Paolo Bonzini <pbonzini@redhat.com>" * remotes/bonzini/tags/for-upstream: (30 commits) exec: hide mr->ram_addr from qemu_get_ram_ptr users memory: split memory_region_from_host from qemu_ram_addr_from_host exec: remove ram_addr argument from qemu_ram_block_from_host memory: remove qemu_get_ram_fd, qemu_set_ram_fd, qemu_ram_block_host_ptr scsi-generic: Merge block max xfer len in INQUIRY response scsi-block: always use SG_IO scsi-disk: introduce scsi_disk_req_check_error scsi-disk: add need_fua_emulation to SCSIDiskClass scsi-disk: introduce dma_readv and dma_writev scsi-disk: introduce a common base class xen-hvm: ignore background I/O sections docs/atomics: update comparison with Linux atomics: do not emit consume barrier for atomic_rcu_read atomics: emit an smp_read_barrier_depends() barrier only for Alpha and Thread Sanitizer docs/atomics: update atomic_read/set comparison with Linux bt: rewrite csrhci_write to avoid out-of-bounds writes block/iscsi: avoid potential overflow of acb->task->cdb scsi: megasas: check 'read_queue_head' index value scsi: megasas: initialise local configuration data buffer scsi: megasas: use appropriate property buffer size ... Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
-rw-r--r--Makefile9
-rw-r--r--block/iscsi.c7
-rw-r--r--cputlb.c3
-rw-r--r--docs/atomics.txt38
-rw-r--r--exec.c110
-rw-r--r--hw/bt/hci-csr.c67
-rw-r--r--hw/char/escc.c30
-rw-r--r--hw/char/etraxfs_ser.c27
-rw-r--r--hw/char/lm32_juart.c17
-rw-r--r--hw/char/lm32_uart.c28
-rw-r--r--hw/char/milkymist-uart.c10
-rw-r--r--hw/cris/axis_dev88.c4
-rw-r--r--hw/lm32/lm32.h19
-rw-r--r--hw/lm32/lm32_boards.c9
-rw-r--r--hw/lm32/milkymist-hw.h4
-rw-r--r--hw/lm32/milkymist.c4
-rw-r--r--hw/misc/ivshmem.c5
-rw-r--r--hw/scsi/megasas.c6
-rw-r--r--hw/scsi/mptsas.c9
-rw-r--r--hw/scsi/scsi-disk.c415
-rw-r--r--hw/scsi/scsi-generic.c12
-rw-r--r--hw/scsi/vmw_pvscsi.c24
-rw-r--r--hw/virtio/vhost-user.c25
-rw-r--r--include/exec/cpu-common.h4
-rw-r--r--include/exec/memory.h36
-rw-r--r--include/exec/ram_addr.h3
-rw-r--r--include/hw/cris/etraxfs.h16
-rw-r--r--include/qemu/atomic.h25
-rw-r--r--memory.c43
-rw-r--r--migration/postcopy-ram.c3
-rw-r--r--nbd/server.c20
-rw-r--r--scripts/dump-guest-memory.py19
-rwxr-xr-xscripts/kvm/kvm_stat825
-rw-r--r--scripts/kvm/kvm_stat.texi55
-rw-r--r--target-i386/kvm.c6
-rw-r--r--xen-hvm.c14
36 files changed, 709 insertions, 1242 deletions
diff --git a/Makefile b/Makefile
index a5d7e62626..3a3c5dc647 100644
--- a/Makefile
+++ b/Makefile
@@ -92,9 +92,6 @@ HELPERS-$(CONFIG_LINUX) = qemu-bridge-helper$(EXESUF)
ifdef BUILD_DOCS
DOCS=qemu-doc.html qemu-tech.html qemu.1 qemu-img.1 qemu-nbd.8 qemu-ga.8
DOCS+=qmp-commands.txt
-ifdef CONFIG_LINUX
-DOCS+=kvm_stat.1
-endif
ifdef CONFIG_VIRTFS
DOCS+=fsdev/virtfs-proxy-helper.1
endif
@@ -571,12 +568,6 @@ qemu-ga.8: qemu-ga.texi
$(POD2MAN) --section=8 --center=" " --release=" " qemu-ga.pod > $@, \
" GEN $@")
-kvm_stat.1: scripts/kvm/kvm_stat.texi
- $(call quiet-command, \
- perl -Ww -- $(SRC_PATH)/scripts/texi2pod.pl $< kvm_stat.pod && \
- $(POD2MAN) --section=1 --center=" " --release=" " kvm_stat.pod > $@, \
- " GEN $@")
-
dvi: qemu-doc.dvi qemu-tech.dvi
html: qemu-doc.html qemu-tech.html
info: qemu-doc.info qemu-tech.info
diff --git a/block/iscsi.c b/block/iscsi.c
index 2ca8e72967..e7d5f7b0c3 100644
--- a/block/iscsi.c
+++ b/block/iscsi.c
@@ -833,6 +833,13 @@ static BlockAIOCB *iscsi_aio_ioctl(BlockDriverState *bs,
return &acb->common;
}
+ if (acb->ioh->cmd_len > SCSI_CDB_MAX_SIZE) {
+ error_report("iSCSI: ioctl error CDB exceeds max size (%d > %d)",
+ acb->ioh->cmd_len, SCSI_CDB_MAX_SIZE);
+ qemu_aio_unref(acb);
+ return NULL;
+ }
+
acb->task = malloc(sizeof(struct scsi_task));
if (acb->task == NULL) {
error_report("iSCSI: Failed to allocate task for scsi command. %s",
diff --git a/cputlb.c b/cputlb.c
index 1ff6354b04..23c9b91200 100644
--- a/cputlb.c
+++ b/cputlb.c
@@ -246,7 +246,8 @@ static inline ram_addr_t qemu_ram_addr_from_host_nofail(void *ptr)
{
ram_addr_t ram_addr;
- if (qemu_ram_addr_from_host(ptr, &ram_addr) == NULL) {
+ ram_addr = qemu_ram_addr_from_host(ptr);
+ if (ram_addr == RAM_ADDR_INVALID) {
fprintf(stderr, "Bad ram pointer %p\n", ptr);
abort();
}
diff --git a/docs/atomics.txt b/docs/atomics.txt
index bba771ecd6..c95950b6c5 100644
--- a/docs/atomics.txt
+++ b/docs/atomics.txt
@@ -326,21 +326,41 @@ and memory barriers, and the equivalents in QEMU:
use a boxed atomic_t type; atomic operations in QEMU are polymorphic
and use normal C types.
-- atomic_read and atomic_set in Linux give no guarantee at all;
- atomic_read and atomic_set in QEMU include a compiler barrier
- (similar to the READ_ONCE/WRITE_ONCE macros in Linux).
-
-- most atomic read-modify-write operations in Linux return void;
- in QEMU, all of them return the old value of the variable.
+- Originally, atomic_read and atomic_set in Linux gave no guarantee
+ at all. Linux 4.1 updated them to implement volatile
+ semantics via ACCESS_ONCE (or the more recent READ/WRITE_ONCE).
+
+ QEMU's atomic_read/set implement, if the compiler supports it, C11
+ atomic relaxed semantics, and volatile semantics otherwise.
+ Both semantics prevent the compiler from doing certain transformations;
+ the difference is that atomic accesses are guaranteed to be atomic,
+ while volatile accesses aren't. Thus, in the volatile case we just cross
+ our fingers hoping that the compiler will generate atomic accesses,
+ since we assume the variables passed are machine-word sized and
+ properly aligned.
+ No barriers are implied by atomic_read/set in either Linux or QEMU.
+
+- atomic read-modify-write operations in Linux are of three kinds:
+
+ atomic_OP returns void
+ atomic_OP_return returns new value of the variable
+ atomic_fetch_OP returns the old value of the variable
+ atomic_cmpxchg returns the old value of the variable
+
+ In QEMU, the second kind does not exist. Currently Linux has
+ atomic_fetch_or only. QEMU provides and, or, inc, dec, add, sub.
- different atomic read-modify-write operations in Linux imply
a different set of memory barriers; in QEMU, all of them enforce
sequential consistency, which means they imply full memory barriers
before and after the operation.
-- Linux does not have an equivalent of atomic_mb_read() and
- atomic_mb_set(). In particular, note that set_mb() is a little
- weaker than atomic_mb_set().
+- Linux does not have an equivalent of atomic_mb_set(). In particular,
+ note that smp_store_mb() is a little weaker than atomic_mb_set().
+ atomic_mb_read() compiles to the same instructions as Linux's
+ smp_load_acquire(), but this should be treated as an implementation
+ detail. If required, QEMU might later add atomic_load_acquire() and
+ atomic_store_release() macros.
SOURCES
diff --git a/exec.c b/exec.c
index a3a93aeed3..448882154f 100644
--- a/exec.c
+++ b/exec.c
@@ -1815,40 +1815,6 @@ void qemu_ram_remap(ram_addr_t addr, ram_addr_t length)
}
#endif /* !_WIN32 */
-int qemu_get_ram_fd(ram_addr_t addr)
-{
- RAMBlock *block;
- int fd;
-
- rcu_read_lock();
- block = qemu_get_ram_block(addr);
- fd = block->fd;
- rcu_read_unlock();
- return fd;
-}
-
-void qemu_set_ram_fd(ram_addr_t addr, int fd)
-{
- RAMBlock *block;
-
- rcu_read_lock();
- block = qemu_get_ram_block(addr);
- block->fd = fd;
- rcu_read_unlock();
-}
-
-void *qemu_get_ram_block_host_ptr(ram_addr_t addr)
-{
- RAMBlock *block;
- void *ptr;
-
- rcu_read_lock();
- block = qemu_get_ram_block(addr);
- ptr = ramblock_ptr(block, 0);
- rcu_read_unlock();
- return ptr;
-}
-
/* Return a host pointer to ram allocated with qemu_ram_alloc.
* This should not be used for general purpose DMA. Use address_space_map
* or address_space_rw instead. For local memory (e.g. video ram) that the
@@ -1856,12 +1822,13 @@ void *qemu_get_ram_block_host_ptr(ram_addr_t addr)
*
* Called within RCU critical section.
*/
-void *qemu_get_ram_ptr(RAMBlock *ram_block, ram_addr_t addr)
+void *qemu_map_ram_ptr(RAMBlock *ram_block, ram_addr_t addr)
{
RAMBlock *block = ram_block;
if (block == NULL) {
block = qemu_get_ram_block(addr);
+ addr -= block->offset;
}
if (xen_enabled() && block->host == NULL) {
@@ -1875,10 +1842,10 @@ void *qemu_get_ram_ptr(RAMBlock *ram_block, ram_addr_t addr)
block->host = xen_map_cache(block->offset, block->max_length, 1);
}
- return ramblock_ptr(block, addr - block->offset);
+ return ramblock_ptr(block, addr);
}
-/* Return a host pointer to guest's ram. Similar to qemu_get_ram_ptr
+/* Return a host pointer to guest's ram. Similar to qemu_map_ram_ptr
* but takes a size argument.
*
* Called within RCU critical section.
@@ -1887,16 +1854,15 @@ static void *qemu_ram_ptr_length(RAMBlock *ram_block, ram_addr_t addr,
hwaddr *size)
{
RAMBlock *block = ram_block;
- ram_addr_t offset_inside_block;
if (*size == 0) {
return NULL;
}
if (block == NULL) {
block = qemu_get_ram_block(addr);
+ addr -= block->offset;
}
- offset_inside_block = addr - block->offset;
- *size = MIN(*size, block->max_length - offset_inside_block);
+ *size = MIN(*size, block->max_length - addr);
if (xen_enabled() && block->host == NULL) {
/* We need to check if the requested address is in the RAM
@@ -1910,7 +1876,7 @@ static void *qemu_ram_ptr_length(RAMBlock *ram_block, ram_addr_t addr,
block->host = xen_map_cache(block->offset, block->max_length, 1);
}
- return ramblock_ptr(block, offset_inside_block);
+ return ramblock_ptr(block, addr);
}
/*
@@ -1931,16 +1897,16 @@ static void *qemu_ram_ptr_length(RAMBlock *ram_block, ram_addr_t addr,
* ram_addr_t.
*/
RAMBlock *qemu_ram_block_from_host(void *ptr, bool round_offset,
- ram_addr_t *ram_addr,
ram_addr_t *offset)
{
RAMBlock *block;
uint8_t *host = ptr;
if (xen_enabled()) {
+ ram_addr_t ram_addr;
rcu_read_lock();
- *ram_addr = xen_ram_addr_from_mapcache(ptr);
- block = qemu_get_ram_block(*ram_addr);
+ ram_addr = xen_ram_addr_from_mapcache(ptr);
+ block = qemu_get_ram_block(ram_addr);
if (block) {
*offset = (host - block->host);
}
@@ -1972,7 +1938,6 @@ found:
if (round_offset) {
*offset &= TARGET_PAGE_MASK;
}
- *ram_addr = block->offset + *offset;
rcu_read_unlock();
return block;
}
@@ -1999,18 +1964,17 @@ RAMBlock *qemu_ram_block_by_name(const char *name)
/* Some of the softmmu routines need to translate from a host pointer
(typically a TLB entry) back to a ram offset. */
-MemoryRegion *qemu_ram_addr_from_host(void *ptr, ram_addr_t *ram_addr)
+ram_addr_t qemu_ram_addr_from_host(void *ptr)
{
RAMBlock *block;
- ram_addr_t offset; /* Not used */
-
- block = qemu_ram_block_from_host(ptr, false, ram_addr, &offset);
+ ram_addr_t offset;
+ block = qemu_ram_block_from_host(ptr, false, &offset);
if (!block) {
- return NULL;
+ return RAM_ADDR_INVALID;
}
- return block->mr;
+ return block->offset + offset;
}
/* Called within RCU critical section. */
@@ -2022,13 +1986,13 @@ static void notdirty_mem_write(void *opaque, hwaddr ram_addr,
}
switch (size) {
case 1:
- stb_p(qemu_get_ram_ptr(NULL, ram_addr), val);
+ stb_p(qemu_map_ram_ptr(NULL, ram_addr), val);
break;
case 2:
- stw_p(qemu_get_ram_ptr(NULL, ram_addr), val);
+ stw_p(qemu_map_ram_ptr(NULL, ram_addr), val);
break;
case 4:
- stl_p(qemu_get_ram_ptr(NULL, ram_addr), val);
+ stl_p(qemu_map_ram_ptr(NULL, ram_addr), val);
break;
default:
abort();
@@ -2490,6 +2454,8 @@ static void invalidate_and_set_dirty(MemoryRegion *mr, hwaddr addr,
hwaddr length)
{
uint8_t dirty_log_mask = memory_region_get_dirty_log_mask(mr);
+ addr += memory_region_get_ram_addr(mr);
+
/* No early return if dirty_log_mask is or becomes 0, because
* cpu_physical_memory_set_dirty_range will still call
* xen_modified_memory.
@@ -2602,9 +2568,8 @@ static MemTxResult address_space_write_continue(AddressSpace *as, hwaddr addr,
abort();
}
} else {
- addr1 += memory_region_get_ram_addr(mr);
/* RAM case */
- ptr = qemu_get_ram_ptr(mr->ram_block, addr1);
+ ptr = qemu_map_ram_ptr(mr->ram_block, addr1);
memcpy(ptr, buf, l);
invalidate_and_set_dirty(mr, addr1, l);
}
@@ -2695,8 +2660,7 @@ MemTxResult address_space_read_continue(AddressSpace *as, hwaddr addr,
}
} else {
/* RAM case */
- ptr = qemu_get_ram_ptr(mr->ram_block,
- memory_region_get_ram_addr(mr) + addr1);
+ ptr = qemu_map_ram_ptr(mr->ram_block, addr1);
memcpy(buf, ptr, l);
}
@@ -2779,9 +2743,8 @@ static inline void cpu_physical_memory_write_rom_internal(AddressSpace *as,
memory_region_is_romd(mr))) {
l = memory_access_size(mr, l, addr1);
} else {
- addr1 += memory_region_get_ram_addr(mr);
/* ROM/RAM case */
- ptr = qemu_get_ram_ptr(mr->ram_block, addr1);
+ ptr = qemu_map_ram_ptr(mr->ram_block, addr1);
switch (type) {
case WRITE_DATA:
memcpy(ptr, buf, l);
@@ -2939,7 +2902,6 @@ void *address_space_map(AddressSpace *as,
hwaddr done = 0;
hwaddr l, xlat, base;
MemoryRegion *mr, *this_mr;
- ram_addr_t raddr;
void *ptr;
if (len == 0) {
@@ -2974,7 +2936,6 @@ void *address_space_map(AddressSpace *as,
}
base = xlat;
- raddr = memory_region_get_ram_addr(mr);
for (;;) {
len -= l;
@@ -2993,7 +2954,7 @@ void *address_space_map(AddressSpace *as,
memory_region_ref(mr);
*plen = done;
- ptr = qemu_ram_ptr_length(mr->ram_block, raddr + base, plen);
+ ptr = qemu_ram_ptr_length(mr->ram_block, base, plen);
rcu_read_unlock();
return ptr;
@@ -3010,7 +2971,7 @@ void address_space_unmap(AddressSpace *as, void *buffer, hwaddr len,
MemoryRegion *mr;
ram_addr_t addr1;
- mr = qemu_ram_addr_from_host(buffer, &addr1);
+ mr = memory_region_from_host(buffer, &addr1);
assert(mr != NULL);
if (is_write) {
invalidate_and_set_dirty(mr, addr1, access_len);
@@ -3077,8 +3038,7 @@ static inline uint32_t address_space_ldl_internal(AddressSpace *as, hwaddr addr,
#endif
} else {
/* RAM case */
- ptr = qemu_get_ram_ptr(mr->ram_block,
- memory_region_get_ram_addr(mr) + addr1);
+ ptr = qemu_map_ram_ptr(mr->ram_block, addr1);
switch (endian) {
case DEVICE_LITTLE_ENDIAN:
val = ldl_le_p(ptr);
@@ -3171,8 +3131,7 @@ static inline uint64_t address_space_ldq_internal(AddressSpace *as, hwaddr addr,
#endif
} else {
/* RAM case */
- ptr = qemu_get_ram_ptr(mr->ram_block,
- memory_region_get_ram_addr(mr) + addr1);
+ ptr = qemu_map_ram_ptr(mr->ram_block, addr1);
switch (endian) {
case DEVICE_LITTLE_ENDIAN:
val = ldq_le_p(ptr);
@@ -3285,8 +3244,7 @@ static inline uint32_t address_space_lduw_internal(AddressSpace *as,
#endif
} else {
/* RAM case */
- ptr = qemu_get_ram_ptr(mr->ram_block,
- memory_region_get_ram_addr(mr) + addr1);
+ ptr = qemu_map_ram_ptr(mr->ram_block, addr1);
switch (endian) {
case DEVICE_LITTLE_ENDIAN:
val = lduw_le_p(ptr);
@@ -3368,13 +3326,13 @@ void address_space_stl_notdirty(AddressSpace *as, hwaddr addr, uint32_t val,
r = memory_region_dispatch_write(mr, addr1, val, 4, attrs);
} else {
- addr1 += memory_region_get_ram_addr(mr);
- ptr = qemu_get_ram_ptr(mr->ram_block, addr1);
+ ptr = qemu_map_ram_ptr(mr->ram_block, addr1);
stl_p(ptr, val);
dirty_log_mask = memory_region_get_dirty_log_mask(mr);
dirty_log_mask &= ~(1 << DIRTY_MEMORY_CODE);
- cpu_physical_memory_set_dirty_range(addr1, 4, dirty_log_mask);
+ cpu_physical_memory_set_dirty_range(memory_region_get_ram_addr(mr) + addr,
+ 4, dirty_log_mask);
r = MEMTX_OK;
}
if (result) {
@@ -3423,8 +3381,7 @@ static inline void address_space_stl_internal(AddressSpace *as,
r = memory_region_dispatch_write(mr, addr1, val, 4, attrs);
} else {
/* RAM case */
- addr1 += memory_region_get_ram_addr(mr);
- ptr = qemu_get_ram_ptr(mr->ram_block, addr1);
+ ptr = qemu_map_ram_ptr(mr->ram_block, addr1);
switch (endian) {
case DEVICE_LITTLE_ENDIAN:
stl_le_p(ptr, val);
@@ -3533,8 +3490,7 @@ static inline void address_space_stw_internal(AddressSpace *as,
r = memory_region_dispatch_write(mr, addr1, val, 2, attrs);
} else {
/* RAM case */
- addr1 += memory_region_get_ram_addr(mr);
- ptr = qemu_get_ram_ptr(mr->ram_block, addr1);
+ ptr = qemu_map_ram_ptr(mr->ram_block, addr1);
switch (endian) {
case DEVICE_LITTLE_ENDIAN:
stw_le_p(ptr, val);
diff --git a/hw/bt/hci-csr.c b/hw/bt/hci-csr.c
index e6b8998253..d688372ca3 100644
--- a/hw/bt/hci-csr.c
+++ b/hw/bt/hci-csr.c
@@ -39,9 +39,14 @@ struct csrhci_s {
int out_size;
uint8_t outfifo[FIFO_LEN * 2];
uint8_t inpkt[FIFO_LEN];
+ enum {
+ CSR_HDR_LEN,
+ CSR_DATA_LEN,
+ CSR_DATA
+ } in_state;
int in_len;
int in_hdr;
- int in_data;
+ int in_needed;
QEMUTimer *out_tm;
int64_t baud_delay;
@@ -296,38 +301,60 @@ static int csrhci_data_len(const uint8_t *pkt)
exit(-1);
}
+static void csrhci_ready_for_next_inpkt(struct csrhci_s *s)
+{
+ s->in_state = CSR_HDR_LEN;
+ s->in_len = 0;
+ s->in_needed = 2;
+ s->in_hdr = INT_MAX;
+}
+
static int csrhci_write(struct CharDriverState *chr,
const uint8_t *buf, int len)
{
struct csrhci_s *s = (struct csrhci_s *) chr->opaque;
- int plen = s->in_len;
+ int total = 0;
if (!s->enable)
return 0;
- s->in_len += len;
- memcpy(s->inpkt + plen, buf, len);
+ for (;;) {
+ int cnt = MIN(len, s->in_needed - s->in_len);
+ if (cnt) {
+ memcpy(s->inpkt + s->in_len, buf, cnt);
+ s->in_len += cnt;
+ buf += cnt;
+ len -= cnt;
+ total += cnt;
+ }
+
+ if (s->in_len < s->in_needed) {
+ break;
+ }
- while (1) {
- if (s->in_len >= 2 && plen < 2)
+ if (s->in_state == CSR_HDR_LEN) {
s->in_hdr = csrhci_header_len(s->inpkt) + 1;
+ assert(s->in_hdr >= s->in_needed);
+ s->in_needed = s->in_hdr;
+ s->in_state = CSR_DATA_LEN;
+ continue;
+ }
- if (s->in_len >= s->in_hdr && plen < s->in_hdr)
- s->in_data = csrhci_data_len(s->inpkt) + s->in_hdr;
+ if (s->in_state == CSR_DATA_LEN) {
+ s->in_needed += csrhci_data_len(s->inpkt);
+ /* hci_acl_hdr could specify more than 4096 bytes, so assert. */
+ assert(s->in_needed <= sizeof(s->inpkt));
+ s->in_state = CSR_DATA;
+ continue;
+ }
- if (s->in_len >= s->in_data) {
+ if (s->in_state == CSR_DATA) {
csrhci_in_packet(s, s->inpkt);
-
- memmove(s->inpkt, s->inpkt + s->in_len, s->in_len - s->in_data);
- s->in_len -= s->in_data;
- s->in_hdr = INT_MAX;
- s->in_data = INT_MAX;
- plen = 0;
- } else
- break;
+ csrhci_ready_for_next_inpkt(s);
+ }
}
- return len;
+ return total;
}
static void csrhci_out_hci_packet_event(void *opaque,
@@ -389,11 +416,9 @@ static void csrhci_reset(struct csrhci_s *s)
{
s->out_len = 0;
s->out_size = FIFO_LEN;
- s->in_len = 0;
+ csrhci_ready_for_next_inpkt(s);
s->baud_delay = NANOSECONDS_PER_SECOND;
s->enable = 0;
- s->in_hdr = INT_MAX;
- s->in_data = INT_MAX;
s->modem_state = 0;
/* After a while... (but sooner than 10ms) */
diff --git a/hw/char/escc.c b/hw/char/escc.c
index 7bf09a0077..8e6a7df465 100644
--- a/hw/char/escc.c
+++ b/hw/char/escc.c
@@ -983,9 +983,10 @@ void slavio_serial_ms_kbd_init(hwaddr base, qemu_irq irq,
sysbus_mmio_map(s, 0, base);
}
-static int escc_init1(SysBusDevice *dev)
+static void escc_init1(Object *obj)
{
- ESCCState *s = ESCC(dev);
+ ESCCState *s = ESCC(obj);
+ SysBusDevice *dev = SYS_BUS_DEVICE(obj);
unsigned int i;
s->chn[0].disabled = s->disabled;
@@ -994,17 +995,26 @@ static int escc_init1(SysBusDevice *dev)
sysbus_init_irq(dev, &s->chn[i].irq);
s->chn[i].chn = 1 - i;
s->chn[i].clock = s->frequency / 2;
- if (s->chn[i].chr) {
- qemu_chr_add_handlers(s->chn[i].chr, serial_can_receive,
- serial_receive1, serial_event, &s->chn[i]);
- }
}
s->chn[0].otherchn = &s->chn[1];
s->chn[1].otherchn = &s->chn[0];
- memory_region_init_io(&s->mmio, OBJECT(s), &escc_mem_ops, s, "escc",
+ memory_region_init_io(&s->mmio, obj, &escc_mem_ops, s, "escc",
ESCC_SIZE << s->it_shift);
sysbus_init_mmio(dev, &s->mmio);
+}
+
+static void escc_realize(DeviceState *dev, Error **errp)
+{
+ ESCCState *s = ESCC(dev);
+ unsigned int i;
+
+ for (i = 0; i < 2; i++) {
+ if (s->chn[i].chr) {
+ qemu_chr_add_handlers(s->chn[i].chr, serial_can_receive,
+ serial_receive1, serial_event, &s->chn[i]);
+ }
+ }
if (s->chn[0].type == mouse) {
qemu_add_mouse_event_handler(sunmouse_event, &s->chn[0], 0,
@@ -1014,8 +1024,6 @@ static int escc_init1(SysBusDevice *dev)
s->chn[1].hs = qemu_input_handler_register((DeviceState *)(&s->chn[1]),
&sunkbd_handler);
}
-
- return 0;
}
static Property escc_properties[] = {
@@ -1032,10 +1040,9 @@ static Property escc_properties[] = {
static void escc_class_init(ObjectClass *klass, void *data)
{
DeviceClass *dc = DEVICE_CLASS(klass);
- SysBusDeviceClass *k = SYS_BUS_DEVICE_CLASS(klass);
- k->init = escc_init1;
dc->reset = escc_reset;
+ dc->realize = escc_realize;
dc->vmsd = &vmstate_escc;
dc->props = escc_properties;
set_bit(DEVICE_CATEGORY_INPUT, dc->categories);
@@ -1045,6 +1052,7 @@ static const TypeInfo escc_info = {
.name = TYPE_ESCC,
.parent = TYPE_SYS_BUS_DEVICE,
.instance_size = sizeof(ESCCState),
+ .instance_init = escc_init1,
.class_init = escc_class_init,
};
diff --git a/hw/char/etraxfs_ser.c b/hw/char/etraxfs_ser.c
index 146b387e7e..04ca04fe2c 100644
--- a/hw/char/etraxfs_ser.c
+++ b/hw/char/etraxfs_ser.c
@@ -159,6 +159,11 @@ static const MemoryRegionOps ser_ops = {
}
};
+static Property etraxfs_ser_properties[] = {
+ DEFINE_PROP_CHR("chardev", ETRAXSerial, chr),
+ DEFINE_PROP_END_OF_LIST(),
+};
+
static void serial_receive(void *opaque, const uint8_t *buf, int size)
{
ETRAXSerial *s = opaque;
@@ -209,40 +214,42 @@ static void etraxfs_ser_reset(DeviceState *d)
}
-static int etraxfs_ser_init(SysBusDevice *dev)
+static void etraxfs_ser_init(Object *obj)
{
- ETRAXSerial *s = ETRAX_SERIAL(dev);
+ ETRAXSerial *s = ETRAX_SERIAL(obj);
+ SysBusDevice *dev = SYS_BUS_DEVICE(obj);
sysbus_init_irq(dev, &s->irq);
- memory_region_init_io(&s->mmio, OBJECT(s), &ser_ops, s,
+ memory_region_init_io(&s->mmio, obj, &ser_ops, s,
"etraxfs-serial", R_MAX * 4);
sysbus_init_mmio(dev, &s->mmio);
+}
+
+static void etraxfs_ser_realize(DeviceState *dev, Error **errp)
+{
+ ETRAXSerial *s = ETRAX_SERIAL(dev);
- /* FIXME use a qdev chardev prop instead of qemu_char_get_next_serial() */
- s->chr = qemu_char_get_next_serial();
if (s->chr) {
qemu_chr_add_handlers(s->chr,
serial_can_receive, serial_receive,
serial_event, s);
}
- return 0;
}
static void etraxfs_ser_class_init(ObjectClass *klass, void *data)
{
DeviceClass *dc = DEVICE_CLASS(klass);
- SysBusDeviceClass *k = SYS_BUS_DEVICE_CLASS(klass);
- k->init = etraxfs_ser_init;
dc->reset = etraxfs_ser_reset;
- /* Reason: init() method uses qemu_char_get_next_serial() */
- dc->cannot_instantiate_with_device_add_yet = true;
+ dc->props = etraxfs_ser_properties;
+ dc->realize = etraxfs_ser_realize;
}
static const TypeInfo etraxfs_ser_info = {
.name = TYPE_ETRAX_FS_SERIAL,
.parent = TYPE_SYS_BUS_DEVICE,
.instance_size = sizeof(ETRAXSerial),
+ .instance_init = etraxfs_ser_init,
.class_init = etraxfs_ser_class_init,
};
diff --git a/hw/char/lm32_juart.c b/hw/char/lm32_juart.c
index 5bf8acfe8f..28c2cf702d 100644
--- a/hw/char/lm32_juart.c
+++ b/hw/char/lm32_juart.c
@@ -114,17 +114,13 @@ static void juart_reset(DeviceState *d)
s->jrx = 0;
}
-static int lm32_juart_init(SysBusDevice *dev)
+static void lm32_juart_realize(DeviceState *dev, Error **errp)
{
LM32JuartState *s = LM32_JUART(dev);
- /* FIXME use a qdev chardev prop instead of qemu_char_get_next_serial() */
- s->chr = qemu_char_get_next_serial();
if (s->chr) {
qemu_chr_add_handlers(s->chr, juart_can_rx, juart_rx, juart_event, s);
}
-
- return 0;
}
static const VMStateDescription vmstate_lm32_juart = {
@@ -138,16 +134,19 @@ static const VMStateDescription vmstate_lm32_juart = {
}
};
+static Property lm32_juart_properties[] = {
+ DEFINE_PROP_CHR("chardev", LM32JuartState, chr),
+ DEFINE_PROP_END_OF_LIST(),
+};
+
static void lm32_juart_class_init(ObjectClass *klass, void *data)
{
DeviceClass *dc = DEVICE_CLASS(klass);
- SysBusDeviceClass *k = SYS_BUS_DEVICE_CLASS(klass);
- k->init = lm32_juart_init;
dc->reset = juart_reset;
dc->vmsd = &vmstate_lm32_juart;
- /* Reason: init() method uses qemu_char_get_next_serial() */
- dc->cannot_instantiate_with_device_add_yet = true;
+ dc->props = lm32_juart_properties;
+ dc->realize = lm32_juart_realize;
}
static const TypeInfo lm32_juart_info = {
diff --git a/hw/char/lm32_uart.c b/hw/char/lm32_uart.c
index 036813d0f3..b5c760dda3 100644
--- a/hw/char/lm32_uart.c
+++ b/hw/char/lm32_uart.c
@@ -249,23 +249,25 @@ static void uart_reset(DeviceState *d)
s->regs[R_LSR] = LSR_THRE | LSR_TEMT;
}
-static int lm32_uart_init(SysBusDevice *dev)
+static void lm32_uart_init(Object *obj)
{
- LM32UartState *s = LM32_UART(dev);
+ LM32UartState *s = LM32_UART(obj);
+ SysBusDevice *dev = SYS_BUS_DEVICE(obj);
sysbus_init_irq(dev, &s->irq);
- memory_region_init_io(&s->iomem, OBJECT(s), &uart_ops, s,
+ memory_region_init_io(&s->iomem, obj, &uart_ops, s,
"uart", R_MAX * 4);
sysbus_init_mmio(dev, &s->iomem);
+}
+
+static void lm32_uart_realize(DeviceState *dev, Error **errp)
+{
+ LM32UartState *s = LM32_UART(dev);
- /* FIXME use a qdev chardev prop instead of qemu_char_get_next_serial() */
- s->chr = qemu_char_get_next_serial();
if (s->chr) {
qemu_chr_add_handlers(s->chr, uart_can_rx, uart_rx, uart_event, s);
}
-
- return 0;
}
static const VMStateDescription vmstate_lm32_uart = {
@@ -278,22 +280,26 @@ static const VMStateDescription vmstate_lm32_uart = {
}
};
+static Property lm32_uart_properties[] = {
+ DEFINE_PROP_CHR("chardev", LM32UartState, chr),
+ DEFINE_PROP_END_OF_LIST(),
+};
+
static void lm32_uart_class_init(ObjectClass *klass, void *data)
{
DeviceClass *dc = DEVICE_CLASS(klass);
- SysBusDeviceClass *k = SYS_BUS_DEVICE_CLASS(klass);
- k->init = lm32_uart_init;
dc->reset = uart_reset;
dc->vmsd = &vmstate_lm32_uart;
- /* Reason: init() method uses qemu_char_get_next_serial() */
- dc->cannot_instantiate_with_device_add_yet = true;
+ dc->props = lm32_uart_properties;
+ dc->realize = lm32_uart_realize;
}
static const TypeInfo lm32_uart_info = {
.name = TYPE_LM32_UART,
.parent = TYPE_SYS_BUS_DEVICE,
.instance_size = sizeof(LM32UartState),
+ .instance_init = lm32_uart_init,
.class_init = lm32_uart_class_init,
};
diff --git a/hw/char/milkymist-uart.c b/hw/char/milkymist-uart.c
index 03b36b2236..72f8484668 100644
--- a/hw/char/milkymist-uart.c
+++ b/hw/char/milkymist-uart.c
@@ -200,8 +200,6 @@ static void milkymist_uart_realize(DeviceState *dev, Error **errp)
{
MilkymistUartState *s = MILKYMIST_UART(dev);
- /* FIXME use a qdev chardev prop instead of qemu_char_get_next_serial() */
- s->chr = qemu_char_get_next_serial();
if (s->chr) {
qemu_chr_add_handlers(s->chr, uart_can_rx, uart_rx, uart_event, s);
}
@@ -229,6 +227,11 @@ static const VMStateDescription vmstate_milkymist_uart = {
}
};
+static Property milkymist_uart_properties[] = {
+ DEFINE_PROP_CHR("chardev", MilkymistUartState, chr),
+ DEFINE_PROP_END_OF_LIST(),
+};
+
static void milkymist_uart_class_init(ObjectClass *klass, void *data)
{
DeviceClass *dc = DEVICE_CLASS(klass);
@@ -236,8 +239,7 @@ static void milkymist_uart_class_init(ObjectClass *klass, void *data)
dc->realize = milkymist_uart_realize;
dc->reset = milkymist_uart_reset;
dc->vmsd = &vmstate_milkymist_uart;
- /* Reason: realize() method uses qemu_char_get_next_serial() */
- dc->cannot_instantiate_with_device_add_yet = true;
+ dc->props = milkymist_uart_properties;
}
static const TypeInfo milkymist_uart_info = {
diff --git a/hw/cris/axis_dev88.c b/hw/cris/axis_dev88.c
index 9f58658741..60df8877c1 100644
--- a/hw/cris/axis_dev88.c
+++ b/hw/cris/axis_dev88.c
@@ -37,6 +37,7 @@
#include "sysemu/block-backend.h"
#include "exec/address-spaces.h"
#include "sysemu/qtest.h"
+#include "sysemu/sysemu.h"
#define D(x)
#define DNAND(x)
@@ -341,8 +342,7 @@ void axisdev88_init(MachineState *machine)
sysbus_create_varargs("etraxfs,timer", 0x3005e000, irq[0x1b], nmi[1], NULL);
for (i = 0; i < 4; i++) {
- sysbus_create_simple("etraxfs,serial", 0x30026000 + i * 0x2000,
- irq[0x14 + i]);
+ etraxfs_ser_create(0x30026000 + i * 0x2000, irq[0x14 + i], serial_hds[i]);
}
if (kernel_filename) {
diff --git a/hw/lm32/lm32.h b/hw/lm32/lm32.h
index 18aa6fdc15..e338bfeae5 100644
--- a/hw/lm32/lm32.h
+++ b/hw/lm32/lm32.h
@@ -16,14 +16,31 @@ static inline DeviceState *lm32_pic_init(qemu_irq cpu_irq)
return dev;
}
-static inline DeviceState *lm32_juart_init(void)
+static inline DeviceState *lm32_juart_init(CharDriverState *chr)
{
DeviceState *dev;
dev = qdev_create(NULL, TYPE_LM32_JUART);
+ qdev_prop_set_chr(dev, "chardev", chr);
qdev_init_nofail(dev);
return dev;
}
+static inline DeviceState *lm32_uart_create(hwaddr addr,
+ qemu_irq irq,
+ CharDriverState *chr)
+{
+ DeviceState *dev;
+ SysBusDevice *s;
+
+ dev = qdev_create(NULL, "lm32-uart");
+ s = SYS_BUS_DEVICE(dev);
+ qdev_prop_set_chr(dev, "chardev", chr);
+ qdev_init_nofail(dev);
+ sysbus_mmio_map(s, 0, addr);
+ sysbus_connect_irq(s, 0, irq);
+ return dev;
+}
+
#endif
diff --git a/hw/lm32/lm32_boards.c b/hw/lm32/lm32_boards.c
index c0290560fc..8f0c3079d6 100644
--- a/hw/lm32/lm32_boards.c
+++ b/hw/lm32/lm32_boards.c
@@ -31,6 +31,7 @@
#include "lm32_hwsetup.h"
#include "lm32.h"
#include "exec/address-spaces.h"
+#include "sysemu/sysemu.h"
typedef struct {
LM32CPU *cpu;
@@ -131,12 +132,12 @@ static void lm32_evr_init(MachineState *machine)
irq[i] = qdev_get_gpio_in(env->pic_state, i);
}
- sysbus_create_simple("lm32-uart", uart0_base, irq[uart0_irq]);
+ lm32_uart_create(uart0_base, irq[uart0_irq], serial_hds[0]);
sysbus_create_simple("lm32-timer", timer0_base, irq[timer0_irq]);
sysbus_create_simple("lm32-timer", timer1_base, irq[timer1_irq]);
/* make sure juart isn't the first chardev */
- env->juart_state = lm32_juart_init();
+ env->juart_state = lm32_juart_init(serial_hds[1]);
reset_info->bootstrap_pc = flash_base;
@@ -232,13 +233,13 @@ static void lm32_uclinux_init(MachineState *machine)
irq[i] = qdev_get_gpio_in(env->pic_state, i);
}
- sysbus_create_simple("lm32-uart", uart0_base, irq[uart0_irq]);
+ lm32_uart_create(uart0_base, irq[uart0_irq], serial_hds[0]);
sysbus_create_simple("lm32-timer", timer0_base, irq[timer0_irq]);
sysbus_create_simple("lm32-timer", timer1_base, irq[timer1_irq]);
sysbus_create_simple("lm32-timer", timer2_base, irq[timer2_irq]);
/* make sure juart isn't the first chardev */
- env->juart_state = lm32_juart_init();
+ env->juart_state = lm32_juart_init(serial_hds[1]);
reset_info->bootstrap_pc = flash_base;
diff --git a/hw/lm32/milkymist-hw.h b/hw/lm32/milkymist-hw.h
index f857d2846f..eb6a3a2559 100644
--- a/hw/lm32/milkymist-hw.h
+++ b/hw/lm32/milkymist-hw.h
@@ -5,11 +5,13 @@
#include "net/net.h"
static inline DeviceState *milkymist_uart_create(hwaddr base,
- qemu_irq irq)
+ qemu_irq irq,
+ CharDriverState *chr)
{
DeviceState *dev;
dev = qdev_create(NULL, "milkymist-uart");
+ qdev_prop_set_chr(dev, "chardev", chr);
qdev_init_nofail(dev);
sysbus_mmio_map(SYS_BUS_DEVICE(dev), 0, base);
sysbus_connect_irq(SYS_BUS_DEVICE(dev), 0, irq);
diff --git a/hw/lm32/milkymist.c b/hw/lm32/milkymist.c
index 1abdf6e2e6..5cae0f19dd 100644
--- a/hw/lm32/milkymist.c
+++ b/hw/lm32/milkymist.c
@@ -159,7 +159,7 @@ milkymist_init(MachineState *machine)
}
g_free(bios_filename);
- milkymist_uart_create(0x60000000, irq[0]);
+ milkymist_uart_create(0x60000000, irq[0], serial_hds[0]);
milkymist_sysctl_create(0x60001000, irq[1], irq[2], irq[3],
80000000, 0x10014d31, 0x0000041f, 0x00000001);
milkymist_hpdmc_create(0x60002000);
@@ -175,7 +175,7 @@ milkymist_init(MachineState *machine)
0x20000000, 0x1000, 0x20020000, 0x2000);
/* make sure juart isn't the first chardev */
- env->juart_state = lm32_juart_init();
+ env->juart_state = lm32_juart_init(serial_hds[1]);
if (kernel_filename) {
uint64_t entry;
diff --git a/hw/misc/ivshmem.c b/hw/misc/ivshmem.c
index e40f23bfc2..90be9f7617 100644
--- a/hw/misc/ivshmem.c
+++ b/hw/misc/ivshmem.c
@@ -33,7 +33,6 @@
#include "sysemu/hostmem.h"
#include "sysemu/qtest.h"
#include "qapi/visitor.h"
-#include "exec/ram_addr.h"
#include "hw/misc/ivshmem.h"
@@ -533,7 +532,7 @@ static void process_msg_shmem(IVShmemState *s, int fd, Error **errp)
}
memory_region_init_ram_ptr(&s->server_bar2, OBJECT(s),
"ivshmem.bar2", size, ptr);
- qemu_set_ram_fd(memory_region_get_ram_addr(&s->server_bar2), fd);
+ memory_region_set_fd(&s->server_bar2, fd);
s->ivshmem_bar2 = &s->server_bar2;
}
@@ -940,7 +939,7 @@ static void ivshmem_exit(PCIDevice *dev)
strerror(errno));
}
- fd = qemu_get_ram_fd(memory_region_get_ram_addr(s->ivshmem_bar2));
+ fd = memory_region_get_fd(s->ivshmem_bar2);
close(fd);
}
diff --git a/hw/scsi/megasas.c b/hw/scsi/megasas.c
index a63a581550..cc66d36186 100644
--- a/hw/scsi/megasas.c
+++ b/hw/scsi/megasas.c
@@ -650,7 +650,9 @@ static int megasas_init_firmware(MegasasState *s, MegasasCmd *cmd)
pa_hi = le32_to_cpu(initq->pi_addr_hi);
s->producer_pa = ((uint64_t) pa_hi << 32) | pa_lo;
s->reply_queue_head = ldl_le_pci_dma(pcid, s->producer_pa);
+ s->reply_queue_head %= MEGASAS_MAX_FRAMES;
s->reply_queue_tail = ldl_le_pci_dma(pcid, s->consumer_pa);
+ s->reply_queue_tail %= MEGASAS_MAX_FRAMES;
flags = le32_to_cpu(initq->flags);
if (flags & MFI_QUEUE_FLAG_CONTEXT64) {
s->flags |= MEGASAS_MASK_USE_QUEUE64;
@@ -1293,7 +1295,7 @@ static int megasas_dcmd_ld_get_info(MegasasState *s, MegasasCmd *cmd)
static int megasas_dcmd_cfg_read(MegasasState *s, MegasasCmd *cmd)
{
- uint8_t data[4096];
+ uint8_t data[4096] = { 0 };
struct mfi_config_data *info;
int num_pd_disks = 0, array_offset, ld_offset;
BusChild *kid;
@@ -1446,7 +1448,7 @@ static int megasas_dcmd_set_properties(MegasasState *s, MegasasCmd *cmd)
dcmd_size);
return MFI_STAT_INVALID_PARAMETER;
}
- dma_buf_write((uint8_t *)&info, cmd->iov_size, &cmd->qsg);
+ dma_buf_write((uint8_t *)&info, dcmd_size, &cmd->qsg);
trace_megasas_dcmd_unsupported(cmd->index, cmd->iov_size);
return MFI_STAT_OK;
}
diff --git a/hw/scsi/mptsas.c b/hw/scsi/mptsas.c
index 499c1465ae..be88e161a9 100644
--- a/hw/scsi/mptsas.c
+++ b/hw/scsi/mptsas.c
@@ -754,11 +754,6 @@ static void mptsas_fetch_request(MPTSASState *s)
hwaddr addr;
int size;
- if (s->state != MPI_IOC_STATE_OPERATIONAL) {
- mptsas_set_fault(s, MPI_IOCSTATUS_INVALID_STATE);
- return;
- }
-
/* Read the message header from the guest first. */
addr = s->host_mfa_high_addr | MPTSAS_FIFO_GET(s, request_post);
pci_dma_read(pci, addr, req, sizeof(hdr));
@@ -789,6 +784,10 @@ static void mptsas_fetch_requests(void *opaque)
{
MPTSASState *s = opaque;
+ if (s->state != MPI_IOC_STATE_OPERATIONAL) {
+ mptsas_set_fault(s, MPI_IOCSTATUS_INVALID_STATE);
+ return;
+ }
while (!MPTSAS_FIFO_EMPTY(s, request_post)) {
mptsas_fetch_request(s);
}
diff --git a/hw/scsi/scsi-disk.c b/hw/scsi/scsi-disk.c
index 8865da53e8..ace65e0720 100644
--- a/hw/scsi/scsi-disk.c
+++ b/hw/scsi/scsi-disk.c
@@ -53,7 +53,21 @@ do { printf("scsi-disk: " fmt , ## __VA_ARGS__); } while (0)
#define DEFAULT_MAX_UNMAP_SIZE (1 << 30) /* 1 GB */
#define DEFAULT_MAX_IO_SIZE INT_MAX /* 2 GB - 1 block */
-typedef struct SCSIDiskState SCSIDiskState;
+#define TYPE_SCSI_DISK_BASE "scsi-disk-base"
+
+#define SCSI_DISK_BASE(obj) \
+ OBJECT_CHECK(SCSIDiskState, (obj), TYPE_SCSI_DISK_BASE)
+#define SCSI_DISK_BASE_CLASS(klass) \
+ OBJECT_CLASS_CHECK(SCSIDiskClass, (klass), TYPE_SCSI_DISK_BASE)
+#define SCSI_DISK_BASE_GET_CLASS(obj) \
+ OBJECT_GET_CLASS(SCSIDiskClass, (obj), TYPE_SCSI_DISK_BASE)
+
+typedef struct SCSIDiskClass {
+ SCSIDeviceClass parent_class;
+ DMAIOFunc *dma_readv;
+ DMAIOFunc *dma_writev;
+ bool (*need_fua_emulation)(SCSICommand *cmd);
+} SCSIDiskClass;
typedef struct SCSIDiskReq {
SCSIRequest req;
@@ -62,16 +76,18 @@ typedef struct SCSIDiskReq {
uint32_t sector_count;
uint32_t buflen;
bool started;
+ bool need_fua_emulation;
struct iovec iov;
QEMUIOVector qiov;
BlockAcctCookie acct;
+ unsigned char *status;
} SCSIDiskReq;
#define SCSI_DISK_F_REMOVABLE 0
#define SCSI_DISK_F_DPOFUA 1
#define SCSI_DISK_F_NO_REMOVABLE_DEVOPS 2
-struct SCSIDiskState
+typedef struct SCSIDiskState
{
SCSIDevice qdev;
uint32_t features;
@@ -88,7 +104,7 @@ struct SCSIDiskState
char *product;
bool tray_open;
bool tray_locked;
-};
+} SCSIDiskState;
static int scsi_handle_rw_error(SCSIDiskReq *r, int error, bool acct_failed);
@@ -161,6 +177,29 @@ static void scsi_disk_load_request(QEMUFile *f, SCSIRequest *req)
qemu_iovec_init_external(&r->qiov, &r->iov, 1);
}
+static bool scsi_disk_req_check_error(SCSIDiskReq *r, int ret, bool acct_failed)
+{
+ if (r->req.io_canceled) {
+ scsi_req_cancel_complete(&r->req);
+ return true;
+ }
+
+ if (ret < 0) {
+ return scsi_handle_rw_error(r, -ret, acct_failed);
+ }
+
+ if (r->status && *r->status) {
+ if (acct_failed) {
+ SCSIDiskState *s = DO_UPCAST(SCSIDiskState, qdev, r->req.dev);
+ block_acct_failed(blk_get_stats(s->qdev.conf.blk), &r->acct);
+ }
+ scsi_req_complete(&r->req, *r->status);
+ return true;
+ }
+
+ return false;
+}
+
static void scsi_aio_complete(void *opaque, int ret)
{
SCSIDiskReq *r = (SCSIDiskReq *)opaque;
@@ -168,17 +207,10 @@ static void scsi_aio_complete(void *opaque, int ret)
assert(r->req.aiocb != NULL);
r->req.aiocb = NULL;
- if (r->req.io_canceled) {
- scsi_req_cancel_complete(&r->req);
+ if (scsi_disk_req_check_error(r, ret, true)) {
goto done;
}
- if (ret < 0) {
- if (scsi_handle_rw_error(r, -ret, true)) {
- goto done;
- }
- }
-
block_acct_done(blk_get_stats(s->qdev.conf.blk), &r->acct);
scsi_req_complete(&r->req, GOOD);
@@ -217,13 +249,9 @@ static void scsi_write_do_fua(SCSIDiskReq *r)
SCSIDiskState *s = DO_UPCAST(SCSIDiskState, qdev, r->req.dev);
assert(r->req.aiocb == NULL);
+ assert(!r->req.io_canceled);
- if (r->req.io_canceled) {
- scsi_req_cancel_complete(&r->req);
- goto done;
- }
-
- if (scsi_is_cmd_fua(&r->req.cmd)) {
+ if (r->need_fua_emulation) {
block_acct_start(blk_get_stats(s->qdev.conf.blk), &r->acct, 0,
BLOCK_ACCT_FLUSH);
r->req.aiocb = blk_aio_flush(s->qdev.conf.blk, scsi_aio_complete, r);
@@ -231,26 +259,16 @@ static void scsi_write_do_fua(SCSIDiskReq *r)
}
scsi_req_complete(&r->req, GOOD);
-
-done:
scsi_req_unref(&r->req);
}
static void scsi_dma_complete_noio(SCSIDiskReq *r, int ret)
{
assert(r->req.aiocb == NULL);
-
- if (r->req.io_canceled) {
- scsi_req_cancel_complete(&r->req);
+ if (scsi_disk_req_check_error(r, ret, false)) {
goto done;
}
- if (ret < 0) {
- if (scsi_handle_rw_error(r, -ret, false)) {
- goto done;
- }
- }
-
r->sector += r->sector_count;
r->sector_count = 0;
if (r->req.cmd.mode == SCSI_XFER_TO_DEV) {
@@ -288,17 +306,10 @@ static void scsi_read_complete(void * opaque, int ret)
assert(r->req.aiocb != NULL);
r->req.aiocb = NULL;
- if (r->req.io_canceled) {
- scsi_req_cancel_complete(&r->req);
+ if (scsi_disk_req_check_error(r, ret, true)) {
goto done;
}
- if (ret < 0) {
- if (scsi_handle_rw_error(r, -ret, true)) {
- goto done;
- }
- }
-
block_acct_done(blk_get_stats(s->qdev.conf.blk), &r->acct);
DPRINTF("Data ready tag=0x%x len=%zd\n", r->req.tag, r->qiov.size);
@@ -315,36 +326,29 @@ done:
static void scsi_do_read(SCSIDiskReq *r, int ret)
{
SCSIDiskState *s = DO_UPCAST(SCSIDiskState, qdev, r->req.dev);
+ SCSIDiskClass *sdc = (SCSIDiskClass *) object_get_class(OBJECT(s));
assert (r->req.aiocb == NULL);
-
- if (r->req.io_canceled) {
- scsi_req_cancel_complete(&r->req);
+ if (scsi_disk_req_check_error(r, ret, false)) {
goto done;
}
- if (ret < 0) {
- if (scsi_handle_rw_error(r, -ret, false)) {
- goto done;
- }
- }
-
/* The request is used as the AIO opaque value, so add a ref. */
scsi_req_ref(&r->req);
if (r->req.sg) {
dma_acct_start(s->qdev.conf.blk, &r->acct, r->req.sg, BLOCK_ACCT_READ);
r->req.resid -= r->req.sg->size;
- r->req.aiocb = dma_blk_read(s->qdev.conf.blk, r->req.sg,
- r->sector << BDRV_SECTOR_BITS,
- scsi_dma_complete, r);
+ r->req.aiocb = dma_blk_io(blk_get_aio_context(s->qdev.conf.blk),
+ r->req.sg, r->sector << BDRV_SECTOR_BITS,
+ sdc->dma_readv, r, scsi_dma_complete, r,
+ DMA_DIRECTION_FROM_DEVICE);
} else {
scsi_init_iovec(r, SCSI_DMA_BUF_SIZE);
block_acct_start(blk_get_stats(s->qdev.conf.blk), &r->acct,
r->qiov.size, BLOCK_ACCT_READ);
- r->req.aiocb = blk_aio_preadv(s->qdev.conf.blk,
- r->sector << BDRV_SECTOR_BITS, &r->qiov,
- 0, scsi_read_complete, r);
+ r->req.aiocb = sdc->dma_readv(r->sector, &r->qiov,
+ scsi_read_complete, r, r);
}
done:
@@ -399,7 +403,7 @@ static void scsi_read_data(SCSIRequest *req)
first = !r->started;
r->started = true;
- if (first && scsi_is_cmd_fua(&r->req.cmd)) {
+ if (first && r->need_fua_emulation) {
block_acct_start(blk_get_stats(s->qdev.conf.blk), &r->acct, 0,
BLOCK_ACCT_FLUSH);
r->req.aiocb = blk_aio_flush(s->qdev.conf.blk, scsi_do_read_cb, r);
@@ -456,18 +460,10 @@ static void scsi_write_complete_noio(SCSIDiskReq *r, int ret)
uint32_t n;
assert (r->req.aiocb == NULL);
-
- if (r->req.io_canceled) {
- scsi_req_cancel_complete(&r->req);
+ if (scsi_disk_req_check_error(r, ret, false)) {
goto done;
}
- if (ret < 0) {
- if (scsi_handle_rw_error(r, -ret, false)) {
- goto done;
- }
- }
-
n = r->qiov.size / 512;
r->sector += n;
r->sector_count -= n;
@@ -504,6 +500,7 @@ static void scsi_write_data(SCSIRequest *req)
{
SCSIDiskReq *r = DO_UPCAST(SCSIDiskReq, req, req);
SCSIDiskState *s = DO_UPCAST(SCSIDiskState, qdev, r->req.dev);
+ SCSIDiskClass *sdc = (SCSIDiskClass *) object_get_class(OBJECT(s));
/* No data transfer may already be in progress */
assert(r->req.aiocb == NULL);
@@ -540,15 +537,15 @@ static void scsi_write_data(SCSIRequest *req)
if (r->req.sg) {
dma_acct_start(s->qdev.conf.blk, &r->acct, r->req.sg, BLOCK_ACCT_WRITE);
r->req.resid -= r->req.sg->size;
- r->req.aiocb = dma_blk_write(s->qdev.conf.blk, r->req.sg,
- r->sector << BDRV_SECTOR_BITS,
- scsi_dma_complete, r);
+ r->req.aiocb = dma_blk_io(blk_get_aio_context(s->qdev.conf.blk),
+ r->req.sg, r->sector << BDRV_SECTOR_BITS,
+ sdc->dma_writev, r, scsi_dma_complete, r,
+ DMA_DIRECTION_TO_DEVICE);
} else {
block_acct_start(blk_get_stats(s->qdev.conf.blk), &r->acct,
r->qiov.size, BLOCK_ACCT_WRITE);
- r->req.aiocb = blk_aio_pwritev(s->qdev.conf.blk,
- r->sector << BDRV_SECTOR_BITS, &r->qiov,
- 0, scsi_write_complete, r);
+ r->req.aiocb = sdc->dma_writev(r->sector << BDRV_SECTOR_BITS, &r->qiov,
+ scsi_write_complete, r, r);
}
}
@@ -1600,18 +1597,10 @@ static void scsi_unmap_complete_noio(UnmapCBData *data, int ret)
uint32_t nb_sectors;
assert(r->req.aiocb == NULL);
-
- if (r->req.io_canceled) {
- scsi_req_cancel_complete(&r->req);
+ if (scsi_disk_req_check_error(r, ret, false)) {
goto done;
}
- if (ret < 0) {
- if (scsi_handle_rw_error(r, -ret, false)) {
- goto done;
- }
- }
-
if (data->count > 0) {
sector_num = ldq_be_p(&data->inbuf[0]);
nb_sectors = ldl_be_p(&data->inbuf[8]) & 0xffffffffULL;
@@ -1711,17 +1700,10 @@ static void scsi_write_same_complete(void *opaque, int ret)
assert(r->req.aiocb != NULL);
r->req.aiocb = NULL;
- if (r->req.io_canceled) {
- scsi_req_cancel_complete(&r->req);
+ if (scsi_disk_req_check_error(r, ret, true)) {
goto done;
}
- if (ret < 0) {
- if (scsi_handle_rw_error(r, -ret, true)) {
- goto done;
- }
- }
-
block_acct_done(blk_get_stats(s->qdev.conf.blk), &r->acct);
data->nb_sectors -= data->iov.iov_len / 512;
@@ -2138,6 +2120,7 @@ static int32_t scsi_disk_dma_command(SCSIRequest *req, uint8_t *buf)
{
SCSIDiskReq *r = DO_UPCAST(SCSIDiskReq, req, req);
SCSIDiskState *s = DO_UPCAST(SCSIDiskState, qdev, req->dev);
+ SCSIDiskClass *sdc = (SCSIDiskClass *) object_get_class(OBJECT(s));
uint32_t len;
uint8_t command;
@@ -2196,6 +2179,7 @@ static int32_t scsi_disk_dma_command(SCSIRequest *req, uint8_t *buf)
scsi_check_condition(r, SENSE_CODE(LBA_OUT_OF_RANGE));
return 0;
}
+ r->need_fua_emulation = sdc->need_fua_emulation(&r->req.cmd);
if (r->sector_count == 0) {
scsi_req_complete(&r->req, GOOD);
}
@@ -2578,16 +2562,145 @@ static void scsi_block_realize(SCSIDevice *dev, Error **errp)
scsi_generic_read_device_identification(&s->qdev);
}
+typedef struct SCSIBlockReq {
+ SCSIDiskReq req;
+ sg_io_hdr_t io_header;
+
+ /* Selected bytes of the original CDB, copied into our own CDB. */
+ uint8_t cmd, cdb1, group_number;
+
+ /* CDB passed to SG_IO. */
+ uint8_t cdb[16];
+} SCSIBlockReq;
+
+static BlockAIOCB *scsi_block_do_sgio(SCSIBlockReq *req,
+ int64_t offset, QEMUIOVector *iov,
+ int direction,
+ BlockCompletionFunc *cb, void *opaque)
+{
+ sg_io_hdr_t *io_header = &req->io_header;
+ SCSIDiskReq *r = &req->req;
+ SCSIDiskState *s = DO_UPCAST(SCSIDiskState, qdev, r->req.dev);
+ int nb_logical_blocks;
+ uint64_t lba;
+ BlockAIOCB *aiocb;
+
+ /* This is not supported yet. It can only happen if the guest does
+ * reads and writes that are not aligned to one logical sectors
+ * _and_ cover multiple MemoryRegions.
+ */
+ assert(offset % s->qdev.blocksize == 0);
+ assert(iov->size % s->qdev.blocksize == 0);
+
+ io_header->interface_id = 'S';
+
+ /* The data transfer comes from the QEMUIOVector. */
+ io_header->dxfer_direction = direction;
+ io_header->dxfer_len = iov->size;
+ io_header->dxferp = (void *)iov->iov;
+ io_header->iovec_count = iov->niov;
+ assert(io_header->iovec_count == iov->niov); /* no overflow! */
+
+ /* Build a new CDB with the LBA and length patched in, in case
+ * DMA helpers split the transfer in multiple segments. Do not
+ * build a CDB smaller than what the guest wanted, and only build
+ * a larger one if strictly necessary.
+ */
+ io_header->cmdp = req->cdb;
+ lba = offset / s->qdev.blocksize;
+ nb_logical_blocks = io_header->dxfer_len / s->qdev.blocksize;
+
+ if ((req->cmd >> 5) == 0 && lba <= 0x1ffff) {
+ /* 6-byte CDB */
+ stl_be_p(&req->cdb[0], lba | (req->cmd << 24));
+ req->cdb[4] = nb_logical_blocks;
+ req->cdb[5] = 0;
+ io_header->cmd_len = 6;
+ } else if ((req->cmd >> 5) <= 1 && lba <= 0xffffffffULL) {
+ /* 10-byte CDB */
+ req->cdb[0] = (req->cmd & 0x1f) | 0x20;
+ req->cdb[1] = req->cdb1;
+ stl_be_p(&req->cdb[2], lba);
+ req->cdb[6] = req->group_number;
+ stw_be_p(&req->cdb[7], nb_logical_blocks);
+ req->cdb[9] = 0;
+ io_header->cmd_len = 10;
+ } else if ((req->cmd >> 5) != 4 && lba <= 0xffffffffULL) {
+ /* 12-byte CDB */
+ req->cdb[0] = (req->cmd & 0x1f) | 0xA0;
+ req->cdb[1] = req->cdb1;
+ stl_be_p(&req->cdb[2], lba);
+ stl_be_p(&req->cdb[6], nb_logical_blocks);
+ req->cdb[10] = req->group_number;
+ req->cdb[11] = 0;
+ io_header->cmd_len = 12;
+ } else {
+ /* 16-byte CDB */
+ req->cdb[0] = (req->cmd & 0x1f) | 0x80;
+ req->cdb[1] = req->cdb1;
+ stq_be_p(&req->cdb[2], lba);
+ stl_be_p(&req->cdb[10], nb_logical_blocks);
+ req->cdb[14] = req->group_number;
+ req->cdb[15] = 0;
+ io_header->cmd_len = 16;
+ }
+
+ /* The rest is as in scsi-generic.c. */
+ io_header->mx_sb_len = sizeof(r->req.sense);
+ io_header->sbp = r->req.sense;
+ io_header->timeout = UINT_MAX;
+ io_header->usr_ptr = r;
+ io_header->flags |= SG_FLAG_DIRECT_IO;
+
+ aiocb = blk_aio_ioctl(s->qdev.conf.blk, SG_IO, io_header, cb, opaque);
+ assert(aiocb != NULL);
+ return aiocb;
+}
+
+static bool scsi_block_no_fua(SCSICommand *cmd)
+{
+ return false;
+}
+
+static BlockAIOCB *scsi_block_dma_readv(int64_t offset,
+ QEMUIOVector *iov,
+ BlockCompletionFunc *cb, void *cb_opaque,
+ void *opaque)
+{
+ SCSIBlockReq *r = opaque;
+ return scsi_block_do_sgio(r, offset, iov,
+ SG_DXFER_FROM_DEV, cb, cb_opaque);
+}
+
+static BlockAIOCB *scsi_block_dma_writev(int64_t offset,
+ QEMUIOVector *iov,
+ BlockCompletionFunc *cb, void *cb_opaque,
+ void *opaque)
+{
+ SCSIBlockReq *r = opaque;
+ return scsi_block_do_sgio(r, offset, iov,
+ SG_DXFER_TO_DEV, cb, cb_opaque);
+}
+
static bool scsi_block_is_passthrough(SCSIDiskState *s, uint8_t *buf)
{
switch (buf[0]) {
+ case VERIFY_10:
+ case VERIFY_12:
+ case VERIFY_16:
+ /* Check if BYTCHK == 0x01 (data-out buffer contains data
+ * for the number of logical blocks specified in the length
+ * field). For other modes, do not use scatter/gather operation.
+ */
+ if ((buf[1] & 6) != 2) {
+ return false;
+ }
+ break;
+
case READ_6:
case READ_10:
case READ_12:
case READ_16:
- case VERIFY_10:
- case VERIFY_12:
- case VERIFY_16:
case WRITE_6:
case WRITE_10:
case WRITE_12:
@@ -2595,21 +2708,8 @@ static bool scsi_block_is_passthrough(SCSIDiskState *s, uint8_t *buf)
case WRITE_VERIFY_10:
case WRITE_VERIFY_12:
case WRITE_VERIFY_16:
- /* If we are not using O_DIRECT, we might read stale data from the
- * host cache if writes were made using other commands than these
- * ones (such as WRITE SAME or EXTENDED COPY, etc.). So, without
- * O_DIRECT everything must go through SG_IO.
- */
- if (!(blk_get_flags(s->qdev.conf.blk) & BDRV_O_NOCACHE)) {
- break;
- }
-
- /* MMC writing cannot be done via pread/pwrite, because it sometimes
+ /* MMC writing cannot be done via DMA helpers, because it sometimes
* involves writing beyond the maximum LBA or to negative LBA (lead-in).
- * And once you do these writes, reading from the block device is
- * unreliable, too. It is even possible that reads deliver random data
- * from the host page cache (this is probably a Linux bug).
- *
* We might use scsi_disk_dma_reqops as long as no writing commands are
* seen, but performance usually isn't paramount on optical media. So,
* just make scsi-block operate the same as scsi-generic for them.
@@ -2627,6 +2727,54 @@ static bool scsi_block_is_passthrough(SCSIDiskState *s, uint8_t *buf)
}
+static int32_t scsi_block_dma_command(SCSIRequest *req, uint8_t *buf)
+{
+ SCSIBlockReq *r = (SCSIBlockReq *)req;
+ r->cmd = req->cmd.buf[0];
+ switch (r->cmd >> 5) {
+ case 0:
+ /* 6-byte CDB. */
+ r->cdb1 = r->group_number = 0;
+ break;
+ case 1:
+ /* 10-byte CDB. */
+ r->cdb1 = req->cmd.buf[1];
+ r->group_number = req->cmd.buf[6];
+ case 4:
+ /* 12-byte CDB. */
+ r->cdb1 = req->cmd.buf[1];
+ r->group_number = req->cmd.buf[10];
+ break;
+ case 5:
+ /* 16-byte CDB. */
+ r->cdb1 = req->cmd.buf[1];
+ r->group_number = req->cmd.buf[14];
+ break;
+ default:
+ abort();
+ }
+
+ if (r->cdb1 & 0xe0) {
+ /* Protection information is not supported. */
+ scsi_check_condition(&r->req, SENSE_CODE(INVALID_FIELD));
+ return 0;
+ }
+
+ r->req.status = &r->io_header.status;
+ return scsi_disk_dma_command(req, buf);
+}
+
+static const SCSIReqOps scsi_block_dma_reqops = {
+ .size = sizeof(SCSIBlockReq),
+ .free_req = scsi_free_request,
+ .send_command = scsi_block_dma_command,
+ .read_data = scsi_read_data,
+ .write_data = scsi_write_data,
+ .get_buf = scsi_get_buf,
+ .load_request = scsi_disk_load_request,
+ .save_request = scsi_disk_save_request,
+};
+
static SCSIRequest *scsi_block_new_request(SCSIDevice *d, uint32_t tag,
uint32_t lun, uint8_t *buf,
void *hba_private)
@@ -2637,7 +2785,7 @@ static SCSIRequest *scsi_block_new_request(SCSIDevice *d, uint32_t tag,
return scsi_req_alloc(&scsi_generic_req_ops, &s->qdev, tag, lun,
hba_private);
} else {
- return scsi_req_alloc(&scsi_disk_dma_reqops, &s->qdev, tag, lun,
+ return scsi_req_alloc(&scsi_block_dma_reqops, &s->qdev, tag, lun,
hba_private);
}
}
@@ -2656,6 +2804,46 @@ static int scsi_block_parse_cdb(SCSIDevice *d, SCSICommand *cmd,
#endif
+static
+BlockAIOCB *scsi_dma_readv(int64_t offset, QEMUIOVector *iov,
+ BlockCompletionFunc *cb, void *cb_opaque,
+ void *opaque)
+{
+ SCSIDiskReq *r = opaque;
+ SCSIDiskState *s = DO_UPCAST(SCSIDiskState, qdev, r->req.dev);
+ return blk_aio_preadv(s->qdev.conf.blk, offset, iov, 0, cb, cb_opaque);
+}
+
+static
+BlockAIOCB *scsi_dma_writev(int64_t offset, QEMUIOVector *iov,
+ BlockCompletionFunc *cb, void *cb_opaque,
+ void *opaque)
+{
+ SCSIDiskReq *r = opaque;
+ SCSIDiskState *s = DO_UPCAST(SCSIDiskState, qdev, r->req.dev);
+ return blk_aio_pwritev(s->qdev.conf.blk, offset, iov, 0, cb, cb_opaque);
+}
+
+static void scsi_disk_base_class_initfn(ObjectClass *klass, void *data)
+{
+ DeviceClass *dc = DEVICE_CLASS(klass);
+ SCSIDiskClass *sdc = SCSI_DISK_BASE_CLASS(klass);
+
+ dc->fw_name = "disk";
+ dc->reset = scsi_disk_reset;
+ sdc->dma_readv = scsi_dma_readv;
+ sdc->dma_writev = scsi_dma_writev;
+ sdc->need_fua_emulation = scsi_is_cmd_fua;
+}
+
+static const TypeInfo scsi_disk_base_info = {
+ .name = TYPE_SCSI_DISK_BASE,
+ .parent = TYPE_SCSI_DEVICE,
+ .class_init = scsi_disk_base_class_initfn,
+ .instance_size = sizeof(SCSIDiskState),
+ .class_size = sizeof(SCSIDiskClass),
+};
+
#define DEFINE_SCSI_DISK_PROPERTIES() \
DEFINE_BLOCK_PROPERTIES(SCSIDiskState, qdev.conf), \
DEFINE_PROP_STRING("ver", SCSIDiskState, version), \
@@ -2703,17 +2891,14 @@ static void scsi_hd_class_initfn(ObjectClass *klass, void *data)
sc->realize = scsi_hd_realize;
sc->alloc_req = scsi_new_request;
sc->unit_attention_reported = scsi_disk_unit_attention_reported;
- dc->fw_name = "disk";
dc->desc = "virtual SCSI disk";
- dc->reset = scsi_disk_reset;
dc->props = scsi_hd_properties;
dc->vmsd = &vmstate_scsi_disk_state;
}
static const TypeInfo scsi_hd_info = {
.name = "scsi-hd",
- .parent = TYPE_SCSI_DEVICE,
- .instance_size = sizeof(SCSIDiskState),
+ .parent = TYPE_SCSI_DISK_BASE,
.class_init = scsi_hd_class_initfn,
};
@@ -2735,17 +2920,14 @@ static void scsi_cd_class_initfn(ObjectClass *klass, void *data)
sc->realize = scsi_cd_realize;
sc->alloc_req = scsi_new_request;
sc->unit_attention_reported = scsi_disk_unit_attention_reported;
- dc->fw_name = "disk";
dc->desc = "virtual SCSI CD-ROM";
- dc->reset = scsi_disk_reset;
dc->props = scsi_cd_properties;
dc->vmsd = &vmstate_scsi_disk_state;
}
static const TypeInfo scsi_cd_info = {
.name = "scsi-cd",
- .parent = TYPE_SCSI_DEVICE,
- .instance_size = sizeof(SCSIDiskState),
+ .parent = TYPE_SCSI_DISK_BASE,
.class_init = scsi_cd_class_initfn,
};
@@ -2759,21 +2941,22 @@ static void scsi_block_class_initfn(ObjectClass *klass, void *data)
{
DeviceClass *dc = DEVICE_CLASS(klass);
SCSIDeviceClass *sc = SCSI_DEVICE_CLASS(klass);
+ SCSIDiskClass *sdc = SCSI_DISK_BASE_CLASS(klass);
sc->realize = scsi_block_realize;
sc->alloc_req = scsi_block_new_request;
sc->parse_cdb = scsi_block_parse_cdb;
- dc->fw_name = "disk";
+ sdc->dma_readv = scsi_block_dma_readv;
+ sdc->dma_writev = scsi_block_dma_writev;
+ sdc->need_fua_emulation = scsi_block_no_fua;
dc->desc = "SCSI block device passthrough";
- dc->reset = scsi_disk_reset;
dc->props = scsi_block_properties;
dc->vmsd = &vmstate_scsi_disk_state;
}
static const TypeInfo scsi_block_info = {
.name = "scsi-block",
- .parent = TYPE_SCSI_DEVICE,
- .instance_size = sizeof(SCSIDiskState),
+ .parent = TYPE_SCSI_DISK_BASE,
.class_init = scsi_block_class_initfn,
};
#endif
@@ -2811,13 +2994,13 @@ static void scsi_disk_class_initfn(ObjectClass *klass, void *data)
static const TypeInfo scsi_disk_info = {
.name = "scsi-disk",
- .parent = TYPE_SCSI_DEVICE,
- .instance_size = sizeof(SCSIDiskState),
+ .parent = TYPE_SCSI_DISK_BASE,
.class_init = scsi_disk_class_initfn,
};
static void scsi_disk_register_types(void)
{
+ type_register_static(&scsi_disk_base_info);
type_register_static(&scsi_hd_info);
type_register_static(&scsi_cd_info);
#ifdef __linux__
diff --git a/hw/scsi/scsi-generic.c b/hw/scsi/scsi-generic.c
index 7459465f60..71372a8383 100644
--- a/hw/scsi/scsi-generic.c
+++ b/hw/scsi/scsi-generic.c
@@ -222,6 +222,18 @@ static void scsi_read_complete(void * opaque, int ret)
r->buf[3] |= 0x80;
}
}
+ if (s->type == TYPE_DISK &&
+ r->req.cmd.buf[0] == INQUIRY &&
+ r->req.cmd.buf[2] == 0xb0) {
+ uint32_t max_xfer_len = blk_get_max_transfer_length(s->conf.blk);
+ if (max_xfer_len) {
+ stl_be_p(&r->buf[8], max_xfer_len);
+ /* Also take care of the opt xfer len. */
+ if (ldl_be_p(&r->buf[12]) > max_xfer_len) {
+ stl_be_p(&r->buf[12], max_xfer_len);
+ }
+ }
+ }
scsi_req_data(&r->req, len);
scsi_req_unref(&r->req);
}
diff --git a/hw/scsi/vmw_pvscsi.c b/hw/scsi/vmw_pvscsi.c
index f67b5bf7d3..2d7528d1dd 100644
--- a/hw/scsi/vmw_pvscsi.c
+++ b/hw/scsi/vmw_pvscsi.c
@@ -153,7 +153,7 @@ pvscsi_log2(uint32_t input)
return log;
}
-static void
+static int
pvscsi_ring_init_data(PVSCSIRingInfo *m, PVSCSICmdDescSetupRings *ri)
{
int i;
@@ -161,6 +161,10 @@ pvscsi_ring_init_data(PVSCSIRingInfo *m, PVSCSICmdDescSetupRings *ri)
uint32_t req_ring_size, cmp_ring_size;
m->rs_pa = ri->ringsStatePPN << VMW_PAGE_SHIFT;
+ if ((ri->reqRingNumPages > PVSCSI_SETUP_RINGS_MAX_NUM_PAGES)
+ || (ri->cmpRingNumPages > PVSCSI_SETUP_RINGS_MAX_NUM_PAGES)) {
+ return -1;
+ }
req_ring_size = ri->reqRingNumPages * PVSCSI_MAX_NUM_REQ_ENTRIES_PER_PAGE;
cmp_ring_size = ri->cmpRingNumPages * PVSCSI_MAX_NUM_CMP_ENTRIES_PER_PAGE;
txr_len_log2 = pvscsi_log2(req_ring_size - 1);
@@ -192,15 +196,20 @@ pvscsi_ring_init_data(PVSCSIRingInfo *m, PVSCSICmdDescSetupRings *ri)
/* Flush ring state page changes */
smp_wmb();
+
+ return 0;
}
-static void
+static int
pvscsi_ring_init_msg(PVSCSIRingInfo *m, PVSCSICmdDescSetupMsgRing *ri)
{
int i;
uint32_t len_log2;
uint32_t ring_size;
+ if (ri->numPages > PVSCSI_SETUP_MSG_RING_MAX_NUM_PAGES) {
+ return -1;
+ }
ring_size = ri->numPages * PVSCSI_MAX_NUM_MSG_ENTRIES_PER_PAGE;
len_log2 = pvscsi_log2(ring_size - 1);
@@ -220,6 +229,8 @@ pvscsi_ring_init_msg(PVSCSIRingInfo *m, PVSCSICmdDescSetupMsgRing *ri)
/* Flush ring state page changes */
smp_wmb();
+
+ return 0;
}
static void
@@ -770,7 +781,10 @@ pvscsi_on_cmd_setup_rings(PVSCSIState *s)
trace_pvscsi_on_cmd_arrived("PVSCSI_CMD_SETUP_RINGS");
pvscsi_dbg_dump_tx_rings_config(rc);
- pvscsi_ring_init_data(&s->rings, rc);
+ if (pvscsi_ring_init_data(&s->rings, rc) < 0) {
+ return PVSCSI_COMMAND_PROCESSING_FAILED;
+ }
+
s->rings_info_valid = TRUE;
return PVSCSI_COMMAND_PROCESSING_SUCCEEDED;
}
@@ -850,7 +864,9 @@ pvscsi_on_cmd_setup_msg_ring(PVSCSIState *s)
}
if (s->rings_info_valid) {
- pvscsi_ring_init_msg(&s->rings, rc);
+ if (pvscsi_ring_init_msg(&s->rings, rc) < 0) {
+ return PVSCSI_COMMAND_PROCESSING_FAILED;
+ }
s->msg_ring_info_valid = TRUE;
}
return sizeof(PVSCSICmdDescSetupMsgRing) / sizeof(uint32_t);
diff --git a/hw/virtio/vhost-user.c b/hw/virtio/vhost-user.c
index 5914e85107..495e09fd4e 100644
--- a/hw/virtio/vhost-user.c
+++ b/hw/virtio/vhost-user.c
@@ -17,7 +17,6 @@
#include "sysemu/kvm.h"
#include "qemu/error-report.h"
#include "qemu/sockets.h"
-#include "exec/ram_addr.h"
#include "migration/migration.h"
#include <sys/ioctl.h>
@@ -247,18 +246,18 @@ static int vhost_user_set_mem_table(struct vhost_dev *dev,
for (i = 0; i < dev->mem->nregions; ++i) {
struct vhost_memory_region *reg = dev->mem->regions + i;
- ram_addr_t ram_addr;
+ ram_addr_t offset;
+ MemoryRegion *mr;
assert((uintptr_t)reg->userspace_addr == reg->userspace_addr);
- qemu_ram_addr_from_host((void *)(uintptr_t)reg->userspace_addr,
- &ram_addr);
- fd = qemu_get_ram_fd(ram_addr);
+ mr = memory_region_from_host((void *)(uintptr_t)reg->userspace_addr,
+ &offset);
+ fd = memory_region_get_fd(mr);
if (fd > 0) {
msg.payload.memory.regions[fd_num].userspace_addr = reg->userspace_addr;
msg.payload.memory.regions[fd_num].memory_size = reg->memory_size;
msg.payload.memory.regions[fd_num].guest_phys_addr = reg->guest_phys_addr;
- msg.payload.memory.regions[fd_num].mmap_offset = reg->userspace_addr -
- (uintptr_t) qemu_get_ram_block_host_ptr(ram_addr);
+ msg.payload.memory.regions[fd_num].mmap_offset = offset;
assert(fd_num < VHOST_MEMORY_MAX_NREGIONS);
fds[fd_num++] = fd;
}
@@ -616,17 +615,15 @@ static bool vhost_user_can_merge(struct vhost_dev *dev,
uint64_t start1, uint64_t size1,
uint64_t start2, uint64_t size2)
{
- ram_addr_t ram_addr;
+ ram_addr_t offset;
int mfd, rfd;
MemoryRegion *mr;
- mr = qemu_ram_addr_from_host((void *)(uintptr_t)start1, &ram_addr);
- assert(mr);
- mfd = qemu_get_ram_fd(ram_addr);
+ mr = memory_region_from_host((void *)(uintptr_t)start1, &offset);
+ mfd = memory_region_get_fd(mr);
- mr = qemu_ram_addr_from_host((void *)(uintptr_t)start2, &ram_addr);
- assert(mr);
- rfd = qemu_get_ram_fd(ram_addr);
+ mr = memory_region_from_host((void *)(uintptr_t)start2, &offset);
+ rfd = memory_region_get_fd(mr);
return mfd == rfd;
}
diff --git a/include/exec/cpu-common.h b/include/exec/cpu-common.h
index a2c3b92742..aaee995634 100644
--- a/include/exec/cpu-common.h
+++ b/include/exec/cpu-common.h
@@ -57,10 +57,10 @@ typedef uint32_t CPUReadMemoryFunc(void *opaque, hwaddr addr);
void qemu_ram_remap(ram_addr_t addr, ram_addr_t length);
/* This should not be used by devices. */
-MemoryRegion *qemu_ram_addr_from_host(void *ptr, ram_addr_t *ram_addr);
+ram_addr_t qemu_ram_addr_from_host(void *ptr);
RAMBlock *qemu_ram_block_by_name(const char *name);
RAMBlock *qemu_ram_block_from_host(void *ptr, bool round_offset,
- ram_addr_t *ram_addr, ram_addr_t *offset);
+ ram_addr_t *offset);
void qemu_ram_set_idstr(RAMBlock *block, const char *name, DeviceState *dev);
void qemu_ram_unset_idstr(RAMBlock *block);
const char *qemu_ram_get_idstr(RAMBlock *rb);
diff --git a/include/exec/memory.h b/include/exec/memory.h
index f649697ee9..4ab680052f 100644
--- a/include/exec/memory.h
+++ b/include/exec/memory.h
@@ -32,6 +32,8 @@
#include "qom/object.h"
#include "qemu/rcu.h"
+#define RAM_ADDR_INVALID (~(ram_addr_t)0)
+
#define MAX_PHYS_ADDR_SPACE_BITS 62
#define MAX_PHYS_ADDR (((hwaddr)1 << MAX_PHYS_ADDR_SPACE_BITS) - 1)
@@ -667,6 +669,35 @@ static inline bool memory_region_is_rom(MemoryRegion *mr)
int memory_region_get_fd(MemoryRegion *mr);
/**
+ * memory_region_set_fd: Mark a RAM memory region as backed by a
+ * file descriptor.
+ *
+ * This function is typically used after memory_region_init_ram_ptr().
+ *
+ * @mr: the memory region being queried.
+ * @fd: the file descriptor that backs @mr.
+ */
+void memory_region_set_fd(MemoryRegion *mr, int fd);
+
+/**
+ * memory_region_from_host: Convert a pointer into a RAM memory region
+ * and an offset within it.
+ *
+ * Given a host pointer inside a RAM memory region (created with
+ * memory_region_init_ram() or memory_region_init_ram_ptr()), return
+ * the MemoryRegion and the offset within it.
+ *
+ * Use with care; by the time this function returns, the returned pointer is
+ * not protected by RCU anymore. If the caller is not within an RCU critical
+ * section and does not hold the iothread lock, it must have other means of
+ * protecting the pointer, such as a reference to the region that includes
+ * the incoming ram_addr_t.
+ *
+ * @mr: the memory region being queried.
+ */
+MemoryRegion *memory_region_from_host(void *ptr, ram_addr_t *offset);
+
+/**
* memory_region_get_ram_ptr: Get a pointer into a RAM memory region.
*
* Returns a host pointer to a RAM memory region (created with
@@ -1362,7 +1393,7 @@ MemTxResult address_space_read_continue(AddressSpace *as, hwaddr addr,
MemoryRegion *mr);
MemTxResult address_space_read_full(AddressSpace *as, hwaddr addr,
MemTxAttrs attrs, uint8_t *buf, int len);
-void *qemu_get_ram_ptr(RAMBlock *ram_block, ram_addr_t addr);
+void *qemu_map_ram_ptr(RAMBlock *ram_block, ram_addr_t addr);
static inline bool memory_access_is_direct(MemoryRegion *mr, bool is_write)
{
@@ -1400,8 +1431,7 @@ MemTxResult address_space_read(AddressSpace *as, hwaddr addr, MemTxAttrs attrs,
l = len;
mr = address_space_translate(as, addr, &addr1, &l, false);
if (len == l && memory_access_is_direct(mr, false)) {
- addr1 += memory_region_get_ram_addr(mr);
- ptr = qemu_get_ram_ptr(mr->ram_block, addr1);
+ ptr = qemu_map_ram_ptr(mr->ram_block, addr1);
memcpy(buf, ptr, len);
} else {
result = address_space_read_continue(as, addr, attrs, buf, len,
diff --git a/include/exec/ram_addr.h b/include/exec/ram_addr.h
index 5b6e1b8b86..2a9465da11 100644
--- a/include/exec/ram_addr.h
+++ b/include/exec/ram_addr.h
@@ -105,9 +105,6 @@ RAMBlock *qemu_ram_alloc_resizeable(ram_addr_t size, ram_addr_t max_size,
uint64_t length,
void *host),
MemoryRegion *mr, Error **errp);
-int qemu_get_ram_fd(ram_addr_t addr);
-void qemu_set_ram_fd(ram_addr_t addr, int fd);
-void *qemu_get_ram_block_host_ptr(ram_addr_t addr);
void qemu_ram_free(RAMBlock *block);
int qemu_ram_resize(RAMBlock *block, ram_addr_t newsize, Error **errp);
diff --git a/include/hw/cris/etraxfs.h b/include/hw/cris/etraxfs.h
index 73a6134c1e..eb664181e7 100644
--- a/include/hw/cris/etraxfs.h
+++ b/include/hw/cris/etraxfs.h
@@ -46,4 +46,20 @@ etraxfs_eth_init(NICInfo *nd, hwaddr base, int phyaddr,
return dev;
}
+static inline DeviceState *etraxfs_ser_create(hwaddr addr,
+ qemu_irq irq,
+ CharDriverState *chr)
+{
+ DeviceState *dev;
+ SysBusDevice *s;
+
+ dev = qdev_create(NULL, "etraxfs,serial");
+ s = SYS_BUS_DEVICE(dev);
+ qdev_prop_set_chr(dev, "chardev", chr);
+ qdev_init_nofail(dev);
+ sysbus_mmio_map(s, 0, addr);
+ sysbus_connect_irq(s, 0, irq);
+ return dev;
+}
+
#endif
diff --git a/include/qemu/atomic.h b/include/qemu/atomic.h
index 5bc4d6cc47..7a590969b5 100644
--- a/include/qemu/atomic.h
+++ b/include/qemu/atomic.h
@@ -36,7 +36,18 @@
#define smp_wmb() ({ barrier(); __atomic_thread_fence(__ATOMIC_RELEASE); barrier(); })
#define smp_rmb() ({ barrier(); __atomic_thread_fence(__ATOMIC_ACQUIRE); barrier(); })
+/* Most compilers currently treat consume and acquire the same, but really
+ * no processors except Alpha need a barrier here. Leave it in if
+ * using Thread Sanitizer to avoid warnings, otherwise optimize it away.
+ */
+#if defined(__SANITIZE_THREAD__)
#define smp_read_barrier_depends() ({ barrier(); __atomic_thread_fence(__ATOMIC_CONSUME); barrier(); })
+#elsif defined(__alpha__)
+#define smp_read_barrier_depends() asm volatile("mb":::"memory")
+#else
+#define smp_read_barrier_depends() barrier()
+#endif
+
/* Weak atomic operations prevent the compiler moving other
* loads/stores past the atomic operation load/store. However there is
@@ -56,13 +67,23 @@
__atomic_store(ptr, &_val, __ATOMIC_RELAXED); \
} while(0)
-/* Atomic RCU operations imply weak memory barriers */
+/* See above: most compilers currently treat consume and acquire the
+ * same, but this slows down atomic_rcu_read unnecessarily.
+ */
+#ifdef __SANITIZE_THREAD__
+#define atomic_rcu_read__nocheck(ptr, valptr) \
+ __atomic_load(ptr, valptr, __ATOMIC_CONSUME);
+#else
+#define atomic_rcu_read__nocheck(ptr, valptr) \
+ __atomic_load(ptr, valptr, __ATOMIC_RELAXED); \
+ smp_read_barrier_depends();
+#endif
#define atomic_rcu_read(ptr) \
({ \
QEMU_BUILD_BUG_ON(sizeof(*ptr) > sizeof(void *)); \
typeof(*ptr) _val; \
- __atomic_load(ptr, &_val, __ATOMIC_CONSUME); \
+ atomic_rcu_read__nocheck(ptr, &_val); \
_val; \
})
diff --git a/memory.c b/memory.c
index 4e3cda8a12..8ba496dc7b 100644
--- a/memory.c
+++ b/memory.c
@@ -33,8 +33,6 @@
//#define DEBUG_UNASSIGNED
-#define RAM_ADDR_INVALID (~(ram_addr_t)0)
-
static unsigned memory_region_transaction_depth;
static bool memory_region_update_pending;
static bool ioeventfd_update_pending;
@@ -227,6 +225,7 @@ struct FlatRange {
hwaddr offset_in_region;
AddrRange addr;
uint8_t dirty_log_mask;
+ bool romd_mode;
bool readonly;
};
@@ -251,6 +250,7 @@ static bool flatrange_equal(FlatRange *a, FlatRange *b)
return a->mr == b->mr
&& addrrange_equal(a->addr, b->addr)
&& a->offset_in_region == b->offset_in_region
+ && a->romd_mode == b->romd_mode
&& a->readonly == b->readonly;
}
@@ -310,6 +310,7 @@ static bool can_merge(FlatRange *r1, FlatRange *r2)
r1->addr.size),
int128_make64(r2->offset_in_region))
&& r1->dirty_log_mask == r2->dirty_log_mask
+ && r1->romd_mode == r2->romd_mode
&& r1->readonly == r2->readonly;
}
@@ -663,6 +664,7 @@ static void render_memory_region(FlatView *view,
fr.mr = mr;
fr.dirty_log_mask = memory_region_get_dirty_log_mask(mr);
+ fr.romd_mode = mr->romd_mode;
fr.readonly = readonly;
/* Render the region itself into any gaps left by the current view. */
@@ -1622,13 +1624,26 @@ void memory_region_reset_dirty(MemoryRegion *mr, hwaddr addr,
int memory_region_get_fd(MemoryRegion *mr)
{
- if (mr->alias) {
- return memory_region_get_fd(mr->alias);
+ int fd;
+
+ rcu_read_lock();
+ while (mr->alias) {
+ mr = mr->alias;
}
+ fd = mr->ram_block->fd;
+ rcu_read_unlock();
- assert(mr->ram_block);
+ return fd;
+}
- return qemu_get_ram_fd(memory_region_get_ram_addr(mr));
+void memory_region_set_fd(MemoryRegion *mr, int fd)
+{
+ rcu_read_lock();
+ while (mr->alias) {
+ mr = mr->alias;
+ }
+ mr->ram_block->fd = fd;
+ rcu_read_unlock();
}
void *memory_region_get_ram_ptr(MemoryRegion *mr)
@@ -1642,10 +1657,22 @@ void *memory_region_get_ram_ptr(MemoryRegion *mr)
mr = mr->alias;
}
assert(mr->ram_block);
- ptr = qemu_get_ram_ptr(mr->ram_block, memory_region_get_ram_addr(mr));
+ ptr = qemu_map_ram_ptr(mr->ram_block, offset);
rcu_read_unlock();
- return ptr + offset;
+ return ptr;
+}
+
+MemoryRegion *memory_region_from_host(void *ptr, ram_addr_t *offset)
+{
+ RAMBlock *block;
+
+ block = qemu_ram_block_from_host(ptr, false, offset);
+ if (!block) {
+ return NULL;
+ }
+
+ return block->mr;
}
ram_addr_t memory_region_get_ram_addr(MemoryRegion *mr)
diff --git a/migration/postcopy-ram.c b/migration/postcopy-ram.c
index fbd0064fce..cf7dcd25d4 100644
--- a/migration/postcopy-ram.c
+++ b/migration/postcopy-ram.c
@@ -407,7 +407,6 @@ static void *postcopy_ram_fault_thread(void *opaque)
while (true) {
ram_addr_t rb_offset;
- ram_addr_t in_raspace;
struct pollfd pfd[2];
/*
@@ -459,7 +458,7 @@ static void *postcopy_ram_fault_thread(void *opaque)
rb = qemu_ram_block_from_host(
(void *)(uintptr_t)msg.arg.pagefault.address,
- true, &in_raspace, &rb_offset);
+ true, &rb_offset);
if (!rb) {
error_report("postcopy_ram_fault_thread: Fault outside guest: %"
PRIx64, (uint64_t)msg.arg.pagefault.address);
diff --git a/nbd/server.c b/nbd/server.c
index fa862cd622..b2cfeb9843 100644
--- a/nbd/server.c
+++ b/nbd/server.c
@@ -1153,12 +1153,20 @@ static void nbd_trip(void *opaque)
break;
case NBD_CMD_TRIM:
TRACE("Request type is TRIM");
- ret = blk_co_discard(exp->blk, (request.from + exp->dev_offset)
- / BDRV_SECTOR_SIZE,
- request.len / BDRV_SECTOR_SIZE);
- if (ret < 0) {
- LOG("discard failed");
- reply.error = -ret;
+ /* Ignore unaligned head or tail, until block layer adds byte
+ * interface */
+ if (request.len >= BDRV_SECTOR_SIZE) {
+ request.len -= (request.from + request.len) % BDRV_SECTOR_SIZE;
+ ret = blk_co_discard(exp->blk,
+ DIV_ROUND_UP(request.from + exp->dev_offset,
+ BDRV_SECTOR_SIZE),
+ request.len / BDRV_SECTOR_SIZE);
+ if (ret < 0) {
+ LOG("discard failed");
+ reply.error = -ret;
+ }
+ } else {
+ TRACE("trim request too small, ignoring");
}
if (nbd_co_send_reply(req, &reply, 0) < 0) {
goto out;
diff --git a/scripts/dump-guest-memory.py b/scripts/dump-guest-memory.py
index eb24f7874b..9956fc036c 100644
--- a/scripts/dump-guest-memory.py
+++ b/scripts/dump-guest-memory.py
@@ -328,23 +328,10 @@ def qlist_foreach(head, field_str):
yield var
-def qemu_get_ram_block(ram_addr):
- """Returns the RAMBlock struct to which the given address belongs."""
-
- ram_blocks = gdb.parse_and_eval("ram_list.blocks")
-
- for block in qlist_foreach(ram_blocks, "next"):
- if (ram_addr - block["offset"]) < block["used_length"]:
- return block
-
- raise gdb.GdbError("Bad ram offset %x" % ram_addr)
-
-
-def qemu_get_ram_ptr(ram_addr):
+def qemu_map_ram_ptr(block, offset):
"""Returns qemu vaddr for given guest physical address."""
- block = qemu_get_ram_block(ram_addr)
- return block["host"] + (ram_addr - block["offset"])
+ return block["host"] + offset
def memory_region_get_ram_ptr(memory_region):
@@ -352,7 +339,7 @@ def memory_region_get_ram_ptr(memory_region):
return (memory_region_get_ram_ptr(memory_region["alias"].dereference())
+ memory_region["alias_offset"])
- return qemu_get_ram_ptr(memory_region["ram_block"]["offset"])
+ return qemu_map_ram_ptr(memory_region["ram_block"], 0)
def get_guest_phys_blocks():
diff --git a/scripts/kvm/kvm_stat b/scripts/kvm/kvm_stat
deleted file mode 100755
index 769d884b6d..0000000000
--- a/scripts/kvm/kvm_stat
+++ /dev/null
@@ -1,825 +0,0 @@
-#!/usr/bin/python
-#
-# top-like utility for displaying kvm statistics
-#
-# Copyright 2006-2008 Qumranet Technologies
-# Copyright 2008-2011 Red Hat, Inc.
-#
-# Authors:
-# Avi Kivity <avi@redhat.com>
-#
-# This work is licensed under the terms of the GNU GPL, version 2. See
-# the COPYING file in the top-level directory.
-
-import curses
-import sys
-import os
-import time
-import optparse
-import ctypes
-import fcntl
-import resource
-import struct
-import re
-from collections import defaultdict
-from time import sleep
-
-VMX_EXIT_REASONS = {
- 'EXCEPTION_NMI': 0,
- 'EXTERNAL_INTERRUPT': 1,
- 'TRIPLE_FAULT': 2,
- 'PENDING_INTERRUPT': 7,
- 'NMI_WINDOW': 8,
- 'TASK_SWITCH': 9,
- 'CPUID': 10,
- 'HLT': 12,
- 'INVLPG': 14,
- 'RDPMC': 15,
- 'RDTSC': 16,
- 'VMCALL': 18,
- 'VMCLEAR': 19,
- 'VMLAUNCH': 20,
- 'VMPTRLD': 21,
- 'VMPTRST': 22,
- 'VMREAD': 23,
- 'VMRESUME': 24,
- 'VMWRITE': 25,
- 'VMOFF': 26,
- 'VMON': 27,
- 'CR_ACCESS': 28,
- 'DR_ACCESS': 29,
- 'IO_INSTRUCTION': 30,
- 'MSR_READ': 31,
- 'MSR_WRITE': 32,
- 'INVALID_STATE': 33,
- 'MWAIT_INSTRUCTION': 36,
- 'MONITOR_INSTRUCTION': 39,
- 'PAUSE_INSTRUCTION': 40,
- 'MCE_DURING_VMENTRY': 41,
- 'TPR_BELOW_THRESHOLD': 43,
- 'APIC_ACCESS': 44,
- 'EPT_VIOLATION': 48,
- 'EPT_MISCONFIG': 49,
- 'WBINVD': 54,
- 'XSETBV': 55,
- 'APIC_WRITE': 56,
- 'INVPCID': 58,
-}
-
-SVM_EXIT_REASONS = {
- 'READ_CR0': 0x000,
- 'READ_CR3': 0x003,
- 'READ_CR4': 0x004,
- 'READ_CR8': 0x008,
- 'WRITE_CR0': 0x010,
- 'WRITE_CR3': 0x013,
- 'WRITE_CR4': 0x014,
- 'WRITE_CR8': 0x018,
- 'READ_DR0': 0x020,
- 'READ_DR1': 0x021,
- 'READ_DR2': 0x022,
- 'READ_DR3': 0x023,
- 'READ_DR4': 0x024,
- 'READ_DR5': 0x025,
- 'READ_DR6': 0x026,
- 'READ_DR7': 0x027,
- 'WRITE_DR0': 0x030,
- 'WRITE_DR1': 0x031,
- 'WRITE_DR2': 0x032,
- 'WRITE_DR3': 0x033,
- 'WRITE_DR4': 0x034,
- 'WRITE_DR5': 0x035,
- 'WRITE_DR6': 0x036,
- 'WRITE_DR7': 0x037,
- 'EXCP_BASE': 0x040,
- 'INTR': 0x060,
- 'NMI': 0x061,
- 'SMI': 0x062,
- 'INIT': 0x063,
- 'VINTR': 0x064,
- 'CR0_SEL_WRITE': 0x065,
- 'IDTR_READ': 0x066,
- 'GDTR_READ': 0x067,
- 'LDTR_READ': 0x068,
- 'TR_READ': 0x069,
- 'IDTR_WRITE': 0x06a,
- 'GDTR_WRITE': 0x06b,
- 'LDTR_WRITE': 0x06c,
- 'TR_WRITE': 0x06d,
- 'RDTSC': 0x06e,
- 'RDPMC': 0x06f,
- 'PUSHF': 0x070,
- 'POPF': 0x071,
- 'CPUID': 0x072,
- 'RSM': 0x073,
- 'IRET': 0x074,
- 'SWINT': 0x075,
- 'INVD': 0x076,
- 'PAUSE': 0x077,
- 'HLT': 0x078,
- 'INVLPG': 0x079,
- 'INVLPGA': 0x07a,
- 'IOIO': 0x07b,
- 'MSR': 0x07c,
- 'TASK_SWITCH': 0x07d,
- 'FERR_FREEZE': 0x07e,
- 'SHUTDOWN': 0x07f,
- 'VMRUN': 0x080,
- 'VMMCALL': 0x081,
- 'VMLOAD': 0x082,
- 'VMSAVE': 0x083,
- 'STGI': 0x084,
- 'CLGI': 0x085,
- 'SKINIT': 0x086,
- 'RDTSCP': 0x087,
- 'ICEBP': 0x088,
- 'WBINVD': 0x089,
- 'MONITOR': 0x08a,
- 'MWAIT': 0x08b,
- 'MWAIT_COND': 0x08c,
- 'XSETBV': 0x08d,
- 'NPF': 0x400,
-}
-
-# EC definition of HSR (from arch/arm64/include/asm/kvm_arm.h)
-AARCH64_EXIT_REASONS = {
- 'UNKNOWN': 0x00,
- 'WFI': 0x01,
- 'CP15_32': 0x03,
- 'CP15_64': 0x04,
- 'CP14_MR': 0x05,
- 'CP14_LS': 0x06,
- 'FP_ASIMD': 0x07,
- 'CP10_ID': 0x08,
- 'CP14_64': 0x0C,
- 'ILL_ISS': 0x0E,
- 'SVC32': 0x11,
- 'HVC32': 0x12,
- 'SMC32': 0x13,
- 'SVC64': 0x15,
- 'HVC64': 0x16,
- 'SMC64': 0x17,
- 'SYS64': 0x18,
- 'IABT': 0x20,
- 'IABT_HYP': 0x21,
- 'PC_ALIGN': 0x22,
- 'DABT': 0x24,
- 'DABT_HYP': 0x25,
- 'SP_ALIGN': 0x26,
- 'FP_EXC32': 0x28,
- 'FP_EXC64': 0x2C,
- 'SERROR': 0x2F,
- 'BREAKPT': 0x30,
- 'BREAKPT_HYP': 0x31,
- 'SOFTSTP': 0x32,
- 'SOFTSTP_HYP': 0x33,
- 'WATCHPT': 0x34,
- 'WATCHPT_HYP': 0x35,
- 'BKPT32': 0x38,
- 'VECTOR32': 0x3A,
- 'BRK64': 0x3C,
-}
-
-# From include/uapi/linux/kvm.h, KVM_EXIT_xxx
-USERSPACE_EXIT_REASONS = {
- 'UNKNOWN': 0,
- 'EXCEPTION': 1,
- 'IO': 2,
- 'HYPERCALL': 3,
- 'DEBUG': 4,
- 'HLT': 5,
- 'MMIO': 6,
- 'IRQ_WINDOW_OPEN': 7,
- 'SHUTDOWN': 8,
- 'FAIL_ENTRY': 9,
- 'INTR': 10,
- 'SET_TPR': 11,
- 'TPR_ACCESS': 12,
- 'S390_SIEIC': 13,
- 'S390_RESET': 14,
- 'DCR': 15,
- 'NMI': 16,
- 'INTERNAL_ERROR': 17,
- 'OSI': 18,
- 'PAPR_HCALL': 19,
- 'S390_UCONTROL': 20,
- 'WATCHDOG': 21,
- 'S390_TSCH': 22,
- 'EPR': 23,
- 'SYSTEM_EVENT': 24,
-}
-
-IOCTL_NUMBERS = {
- 'SET_FILTER': 0x40082406,
- 'ENABLE': 0x00002400,
- 'DISABLE': 0x00002401,
- 'RESET': 0x00002403,
-}
-
-class Arch(object):
- """Class that encapsulates global architecture specific data like
- syscall and ioctl numbers.
-
- """
- @staticmethod
- def get_arch():
- machine = os.uname()[4]
-
- if machine.startswith('ppc'):
- return ArchPPC()
- elif machine.startswith('aarch64'):
- return ArchA64()
- elif machine.startswith('s390'):
- return ArchS390()
- else:
- # X86_64
- for line in open('/proc/cpuinfo'):
- if not line.startswith('flags'):
- continue
-
- flags = line.split()
- if 'vmx' in flags:
- return ArchX86(VMX_EXIT_REASONS)
- if 'svm' in flags:
- return ArchX86(SVM_EXIT_REASONS)
- return
-
-class ArchX86(Arch):
- def __init__(self, exit_reasons):
- self.sc_perf_evt_open = 298
- self.ioctl_numbers = IOCTL_NUMBERS
- self.exit_reasons = exit_reasons
-
-class ArchPPC(Arch):
- def __init__(self):
- self.sc_perf_evt_open = 319
- self.ioctl_numbers = IOCTL_NUMBERS
- self.ioctl_numbers['ENABLE'] = 0x20002400
- self.ioctl_numbers['DISABLE'] = 0x20002401
-
- # PPC comes in 32 and 64 bit and some generated ioctl
- # numbers depend on the wordsize.
- char_ptr_size = ctypes.sizeof(ctypes.c_char_p)
- self.ioctl_numbers['SET_FILTER'] = 0x80002406 | char_ptr_size << 16
-
-class ArchA64(Arch):
- def __init__(self):
- self.sc_perf_evt_open = 241
- self.ioctl_numbers = IOCTL_NUMBERS
- self.exit_reasons = AARCH64_EXIT_REASONS
-
-class ArchS390(Arch):
- def __init__(self):
- self.sc_perf_evt_open = 331
- self.ioctl_numbers = IOCTL_NUMBERS
- self.exit_reasons = None
-
-ARCH = Arch.get_arch()
-
-
-def walkdir(path):
- """Returns os.walk() data for specified directory.
-
- As it is only a wrapper it returns the same 3-tuple of (dirpath,
- dirnames, filenames).
- """
- return next(os.walk(path))
-
-
-def parse_int_list(list_string):
- """Returns an int list from a string of comma separated integers and
- integer ranges."""
- integers = []
- members = list_string.split(',')
-
- for member in members:
- if '-' not in member:
- integers.append(int(member))
- else:
- int_range = member.split('-')
- integers.extend(range(int(int_range[0]),
- int(int_range[1]) + 1))
-
- return integers
-
-
-def get_online_cpus():
- with open('/sys/devices/system/cpu/online') as cpu_list:
- cpu_string = cpu_list.readline()
- return parse_int_list(cpu_string)
-
-
-def get_filters():
- filters = {}
- filters['kvm_userspace_exit'] = ('reason', USERSPACE_EXIT_REASONS)
- if ARCH.exit_reasons:
- filters['kvm_exit'] = ('exit_reason', ARCH.exit_reasons)
- return filters
-
-libc = ctypes.CDLL('libc.so.6', use_errno=True)
-syscall = libc.syscall
-
-class perf_event_attr(ctypes.Structure):
- _fields_ = [('type', ctypes.c_uint32),
- ('size', ctypes.c_uint32),
- ('config', ctypes.c_uint64),
- ('sample_freq', ctypes.c_uint64),
- ('sample_type', ctypes.c_uint64),
- ('read_format', ctypes.c_uint64),
- ('flags', ctypes.c_uint64),
- ('wakeup_events', ctypes.c_uint32),
- ('bp_type', ctypes.c_uint32),
- ('bp_addr', ctypes.c_uint64),
- ('bp_len', ctypes.c_uint64),
- ]
-
- def __init__(self):
- super(self.__class__, self).__init__()
- self.type = PERF_TYPE_TRACEPOINT
- self.size = ctypes.sizeof(self)
- self.read_format = PERF_FORMAT_GROUP
-
-def perf_event_open(attr, pid, cpu, group_fd, flags):
- return syscall(ARCH.sc_perf_evt_open, ctypes.pointer(attr),
- ctypes.c_int(pid), ctypes.c_int(cpu),
- ctypes.c_int(group_fd), ctypes.c_long(flags))
-
-PERF_TYPE_TRACEPOINT = 2
-PERF_FORMAT_GROUP = 1 << 3
-
-PATH_DEBUGFS_TRACING = '/sys/kernel/debug/tracing'
-PATH_DEBUGFS_KVM = '/sys/kernel/debug/kvm'
-
-class Group(object):
- def __init__(self):
- self.events = []
-
- def add_event(self, event):
- self.events.append(event)
-
- def read(self):
- length = 8 * (1 + len(self.events))
- read_format = 'xxxxxxxx' + 'Q' * len(self.events)
- return dict(zip([event.name for event in self.events],
- struct.unpack(read_format,
- os.read(self.events[0].fd, length))))
-
-class Event(object):
- def __init__(self, name, group, trace_cpu, trace_point, trace_filter,
- trace_set='kvm'):
- self.name = name
- self.fd = None
- self.setup_event(group, trace_cpu, trace_point, trace_filter,
- trace_set)
-
- def setup_event_attribute(self, trace_set, trace_point):
- id_path = os.path.join(PATH_DEBUGFS_TRACING, 'events', trace_set,
- trace_point, 'id')
-
- event_attr = perf_event_attr()
- event_attr.config = int(open(id_path).read())
- return event_attr
-
- def setup_event(self, group, trace_cpu, trace_point, trace_filter,
- trace_set):
- event_attr = self.setup_event_attribute(trace_set, trace_point)
-
- group_leader = -1
- if group.events:
- group_leader = group.events[0].fd
-
- fd = perf_event_open(event_attr, -1, trace_cpu,
- group_leader, 0)
- if fd == -1:
- err = ctypes.get_errno()
- raise OSError(err, os.strerror(err),
- 'while calling sys_perf_event_open().')
-
- if trace_filter:
- fcntl.ioctl(fd, ARCH.ioctl_numbers['SET_FILTER'],
- trace_filter)
-
- self.fd = fd
-
- def enable(self):
- fcntl.ioctl(self.fd, ARCH.ioctl_numbers['ENABLE'], 0)
-
- def disable(self):
- fcntl.ioctl(self.fd, ARCH.ioctl_numbers['DISABLE'], 0)
-
- def reset(self):
- fcntl.ioctl(self.fd, ARCH.ioctl_numbers['RESET'], 0)
-
-class TracepointProvider(object):
- def __init__(self):
- self.group_leaders = []
- self.filters = get_filters()
- self._fields = self.get_available_fields()
- self.setup_traces()
- self.fields = self._fields
-
- def get_available_fields(self):
- path = os.path.join(PATH_DEBUGFS_TRACING, 'events', 'kvm')
- fields = walkdir(path)[1]
- extra = []
- for field in fields:
- if field in self.filters:
- filter_name_, filter_dicts = self.filters[field]
- for name in filter_dicts:
- extra.append(field + '(' + name + ')')
- fields += extra
- return fields
-
- def setup_traces(self):
- cpus = get_online_cpus()
-
- # The constant is needed as a buffer for python libs, std
- # streams and other files that the script opens.
- newlim = len(cpus) * len(self._fields) + 50
- try:
- softlim_, hardlim = resource.getrlimit(resource.RLIMIT_NOFILE)
-
- if hardlim < newlim:
- # Now we need CAP_SYS_RESOURCE, to increase the hard limit.
- resource.setrlimit(resource.RLIMIT_NOFILE, (newlim, newlim))
- else:
- # Raising the soft limit is sufficient.
- resource.setrlimit(resource.RLIMIT_NOFILE, (newlim, hardlim))
-
- except ValueError:
- sys.exit("NOFILE rlimit could not be raised to {0}".format(newlim))
-
- for cpu in cpus:
- group = Group()
- for name in self._fields:
- tracepoint = name
- tracefilter = None
- match = re.match(r'(.*)\((.*)\)', name)
- if match:
- tracepoint, sub = match.groups()
- tracefilter = ('%s==%d\0' %
- (self.filters[tracepoint][0],
- self.filters[tracepoint][1][sub]))
-
- group.add_event(Event(name=name,
- group=group,
- trace_cpu=cpu,
- trace_point=tracepoint,
- trace_filter=tracefilter))
- self.group_leaders.append(group)
-
- def available_fields(self):
- return self.get_available_fields()
-
- @property
- def fields(self):
- return self._fields
-
- @fields.setter
- def fields(self, fields):
- self._fields = fields
- for group in self.group_leaders:
- for index, event in enumerate(group.events):
- if event.name in fields:
- event.reset()
- event.enable()
- else:
- # Do not disable the group leader.
- # It would disable all of its events.
- if index != 0:
- event.disable()
-
- def read(self):
- ret = defaultdict(int)
- for group in self.group_leaders:
- for name, val in group.read().iteritems():
- if name in self._fields:
- ret[name] += val
- return ret
-
-class DebugfsProvider(object):
- def __init__(self):
- self._fields = self.get_available_fields()
-
- def get_available_fields(self):
- return walkdir(PATH_DEBUGFS_KVM)[2]
-
- @property
- def fields(self):
- return self._fields
-
- @fields.setter
- def fields(self, fields):
- self._fields = fields
-
- def read(self):
- def val(key):
- return int(file(PATH_DEBUGFS_KVM + '/' + key).read())
- return dict([(key, val(key)) for key in self._fields])
-
-class Stats(object):
- def __init__(self, providers, fields=None):
- self.providers = providers
- self._fields_filter = fields
- self.values = {}
- self.update_provider_filters()
-
- def update_provider_filters(self):
- def wanted(key):
- if not self._fields_filter:
- return True
- return re.match(self._fields_filter, key) is not None
-
- # As we reset the counters when updating the fields we can
- # also clear the cache of old values.
- self.values = {}
- for provider in self.providers:
- provider_fields = [key for key in provider.get_available_fields()
- if wanted(key)]
- provider.fields = provider_fields
-
- @property
- def fields_filter(self):
- return self._fields_filter
-
- @fields_filter.setter
- def fields_filter(self, fields_filter):
- self._fields_filter = fields_filter
- self.update_provider_filters()
-
- def get(self):
- for provider in self.providers:
- new = provider.read()
- for key in provider.fields:
- oldval = self.values.get(key, (0, 0))
- newval = new.get(key, 0)
- newdelta = None
- if oldval is not None:
- newdelta = newval - oldval[0]
- self.values[key] = (newval, newdelta)
- return self.values
-
-LABEL_WIDTH = 40
-NUMBER_WIDTH = 10
-
-class Tui(object):
- def __init__(self, stats):
- self.stats = stats
- self.screen = None
- self.drilldown = False
- self.update_drilldown()
-
- def __enter__(self):
- """Initialises curses for later use. Based on curses.wrapper
- implementation from the Python standard library."""
- self.screen = curses.initscr()
- curses.noecho()
- curses.cbreak()
-
- # The try/catch works around a minor bit of
- # over-conscientiousness in the curses module, the error
- # return from C start_color() is ignorable.
- try:
- curses.start_color()
- except:
- pass
-
- curses.use_default_colors()
- return self
-
- def __exit__(self, *exception):
- """Resets the terminal to its normal state. Based on curses.wrappre
- implementation from the Python standard library."""
- if self.screen:
- self.screen.keypad(0)
- curses.echo()
- curses.nocbreak()
- curses.endwin()
-
- def update_drilldown(self):
- if not self.stats.fields_filter:
- self.stats.fields_filter = r'^[^\(]*$'
-
- elif self.stats.fields_filter == r'^[^\(]*$':
- self.stats.fields_filter = None
-
- def refresh(self, sleeptime):
- self.screen.erase()
- self.screen.addstr(0, 0, 'kvm statistics - summary', curses.A_BOLD)
- self.screen.addstr(2, 1, 'Event')
- self.screen.addstr(2, 1 + LABEL_WIDTH + NUMBER_WIDTH -
- len('Total'), 'Total')
- self.screen.addstr(2, 1 + LABEL_WIDTH + NUMBER_WIDTH + 8 -
- len('Current'), 'Current')
- row = 3
- stats = self.stats.get()
- def sortkey(x):
- if stats[x][1]:
- return (-stats[x][1], -stats[x][0])
- else:
- return (0, -stats[x][0])
- for key in sorted(stats.keys(), key=sortkey):
-
- if row >= self.screen.getmaxyx()[0]:
- break
- values = stats[key]
- if not values[0] and not values[1]:
- break
- col = 1
- self.screen.addstr(row, col, key)
- col += LABEL_WIDTH
- self.screen.addstr(row, col, '%10d' % (values[0],))
- col += NUMBER_WIDTH
- if values[1] is not None:
- self.screen.addstr(row, col, '%8d' % (values[1] / sleeptime,))
- row += 1
- self.screen.refresh()
-
- def show_filter_selection(self):
- while True:
- self.screen.erase()
- self.screen.addstr(0, 0,
- "Show statistics for events matching a regex.",
- curses.A_BOLD)
- self.screen.addstr(2, 0,
- "Current regex: {0}"
- .format(self.stats.fields_filter))
- self.screen.addstr(3, 0, "New regex: ")
- curses.echo()
- regex = self.screen.getstr()
- curses.noecho()
- if len(regex) == 0:
- return
- try:
- re.compile(regex)
- self.stats.fields_filter = regex
- return
- except re.error:
- continue
-
- def show_stats(self):
- sleeptime = 0.25
- while True:
- self.refresh(sleeptime)
- curses.halfdelay(int(sleeptime * 10))
- sleeptime = 3
- try:
- char = self.screen.getkey()
- if char == 'x':
- self.drilldown = not self.drilldown
- self.update_drilldown()
- if char == 'q':
- break
- if char == 'f':
- self.show_filter_selection()
- except KeyboardInterrupt:
- break
- except curses.error:
- continue
-
-def batch(stats):
- s = stats.get()
- time.sleep(1)
- s = stats.get()
- for key in sorted(s.keys()):
- values = s[key]
- print '%-42s%10d%10d' % (key, values[0], values[1])
-
-def log(stats):
- keys = sorted(stats.get().iterkeys())
- def banner():
- for k in keys:
- print '%s' % k,
- print
- def statline():
- s = stats.get()
- for k in keys:
- print ' %9d' % s[k][1],
- print
- line = 0
- banner_repeat = 20
- while True:
- time.sleep(1)
- if line % banner_repeat == 0:
- banner()
- statline()
- line += 1
-
-def get_options():
- description_text = """
-This script displays various statistics about VMs running under KVM.
-The statistics are gathered from the KVM debugfs entries and / or the
-currently available perf traces.
-
-The monitoring takes additional cpu cycles and might affect the VM's
-performance.
-
-Requirements:
-- Access to:
- /sys/kernel/debug/kvm
- /sys/kernel/debug/trace/events/*
- /proc/pid/task
-- /proc/sys/kernel/perf_event_paranoid < 1 if user has no
- CAP_SYS_ADMIN and perf events are used.
-- CAP_SYS_RESOURCE if the hard limit is not high enough to allow
- the large number of files that are possibly opened.
-"""
-
- class PlainHelpFormatter(optparse.IndentedHelpFormatter):
- def format_description(self, description):
- if description:
- return description + "\n"
- else:
- return ""
-
- optparser = optparse.OptionParser(description=description_text,
- formatter=PlainHelpFormatter())
- optparser.add_option('-1', '--once', '--batch',
- action='store_true',
- default=False,
- dest='once',
- help='run in batch mode for one second',
- )
- optparser.add_option('-l', '--log',
- action='store_true',
- default=False,
- dest='log',
- help='run in logging mode (like vmstat)',
- )
- optparser.add_option('-t', '--tracepoints',
- action='store_true',
- default=False,
- dest='tracepoints',
- help='retrieve statistics from tracepoints',
- )
- optparser.add_option('-d', '--debugfs',
- action='store_true',
- default=False,
- dest='debugfs',
- help='retrieve statistics from debugfs',
- )
- optparser.add_option('-f', '--fields',
- action='store',
- default=None,
- dest='fields',
- help='fields to display (regex)',
- )
- (options, _) = optparser.parse_args(sys.argv)
- return options
-
-def get_providers(options):
- providers = []
-
- if options.tracepoints:
- providers.append(TracepointProvider())
- if options.debugfs:
- providers.append(DebugfsProvider())
- if len(providers) == 0:
- providers.append(TracepointProvider())
-
- return providers
-
-def check_access(options):
- if not os.path.exists('/sys/kernel/debug'):
- sys.stderr.write('Please enable CONFIG_DEBUG_FS in your kernel.')
- sys.exit(1)
-
- if not os.path.exists(PATH_DEBUGFS_KVM):
- sys.stderr.write("Please make sure, that debugfs is mounted and "
- "readable by the current user:\n"
- "('mount -t debugfs debugfs /sys/kernel/debug')\n"
- "Also ensure, that the kvm modules are loaded.\n")
- sys.exit(1)
-
- if not os.path.exists(PATH_DEBUGFS_TRACING) and (options.tracepoints
- or not options.debugfs):
- sys.stderr.write("Please enable CONFIG_TRACING in your kernel "
- "when using the option -t (default).\n"
- "If it is enabled, make {0} readable by the "
- "current user.\n"
- .format(PATH_DEBUGFS_TRACING))
- if options.tracepoints:
- sys.exit(1)
-
- sys.stderr.write("Falling back to debugfs statistics!\n")
- options.debugfs = True
- sleep(5)
-
- return options
-
-def main():
- options = get_options()
- options = check_access(options)
- providers = get_providers(options)
- stats = Stats(providers, fields=options.fields)
-
- if options.log:
- log(stats)
- elif not options.once:
- with Tui(stats) as tui:
- tui.show_stats()
- else:
- batch(stats)
-
-if __name__ == "__main__":
- main()
diff --git a/scripts/kvm/kvm_stat.texi b/scripts/kvm/kvm_stat.texi
deleted file mode 100644
index 6ce00d80e7..0000000000
--- a/scripts/kvm/kvm_stat.texi
+++ /dev/null
@@ -1,55 +0,0 @@
-@example
-@c man begin SYNOPSIS
-usage: kvm_stat [OPTION]...
-@c man end
-@end example
-
-@c man begin DESCRIPTION
-
-kvm_stat prints counts of KVM kernel module trace events. These events signify
-state transitions such as guest mode entry and exit.
-
-This tool is useful for observing guest behavior from the host perspective.
-Often conclusions about performance or buggy behavior can be drawn from the
-output.
-
-The set of KVM kernel module trace events may be specific to the kernel version
-or architecture. It is best to check the KVM kernel module source code for the
-meaning of events.
-
-Note that trace events are counted globally across all running guests.
-
-@c man end
-
-@c man begin OPTIONS
-@table @option
-@item -1, --once, --batch
- run in batch mode for one second
-@item -l, --log
- run in logging mode (like vmstat)
-@item -t, --tracepoints
- retrieve statistics from tracepoints
-@item -d, --debugfs
- retrieve statistics from debugfs
-@item -f, --fields=@var{fields}
- fields to display (regex)
-@item -h, --help
- show help message
-@end table
-
-@c man end
-
-@ignore
-
-@setfilename kvm_stat
-@settitle Report KVM kernel module event counters.
-
-@c man begin AUTHOR
-Stefan Hajnoczi <stefanha@redhat.com>
-@c man end
-
-@c man begin SEEALSO
-perf(1), trace-cmd(1)
-@c man end
-
-@end ignore
diff --git a/target-i386/kvm.c b/target-i386/kvm.c
index 7b3667a089..abf50e6632 100644
--- a/target-i386/kvm.c
+++ b/target-i386/kvm.c
@@ -411,7 +411,8 @@ int kvm_arch_on_sigbus_vcpu(CPUState *c, int code, void *addr)
if ((env->mcg_cap & MCG_SER_P) && addr
&& (code == BUS_MCEERR_AR || code == BUS_MCEERR_AO)) {
- if (qemu_ram_addr_from_host(addr, &ram_addr) == NULL ||
+ ram_addr = qemu_ram_addr_from_host(addr);
+ if (ram_addr == RAM_ADDR_INVALID ||
!kvm_physical_memory_addr_from_host(c->kvm_state, addr, &paddr)) {
fprintf(stderr, "Hardware memory error for memory used by "
"QEMU itself instead of guest system!\n");
@@ -445,7 +446,8 @@ int kvm_arch_on_sigbus(int code, void *addr)
hwaddr paddr;
/* Hope we are lucky for AO MCE */
- if (qemu_ram_addr_from_host(addr, &ram_addr) == NULL ||
+ ram_addr = qemu_ram_addr_from_host(addr);
+ if (ram_addr == RAM_ADDR_INVALID ||
!kvm_physical_memory_addr_from_host(first_cpu->kvm_state,
addr, &paddr)) {
fprintf(stderr, "Hardware memory error for memory used by "
diff --git a/xen-hvm.c b/xen-hvm.c
index c14e778a8e..01ee25de21 100644
--- a/xen-hvm.c
+++ b/xen-hvm.c
@@ -511,8 +511,13 @@ static void xen_io_add(MemoryListener *listener,
MemoryRegionSection *section)
{
XenIOState *state = container_of(listener, XenIOState, io_listener);
+ MemoryRegion *mr = section->mr;
- memory_region_ref(section->mr);
+ if (mr->ops == &unassigned_io_ops) {
+ return;
+ }
+
+ memory_region_ref(mr);
xen_map_io_section(xen_xc, xen_domid, state->ioservid, section);
}
@@ -521,10 +526,15 @@ static void xen_io_del(MemoryListener *listener,
MemoryRegionSection *section)
{
XenIOState *state = container_of(listener, XenIOState, io_listener);
+ MemoryRegion *mr = section->mr;
+
+ if (mr->ops == &unassigned_io_ops) {
+ return;
+ }
xen_unmap_io_section(xen_xc, xen_domid, state->ioservid, section);
- memory_region_unref(section->mr);
+ memory_region_unref(mr);
}
static void xen_device_realize(DeviceListener *listener,