diff options
Diffstat (limited to 'hw/virtio/virtio.c')
-rw-r--r-- | hw/virtio/virtio.c | 348 |
1 files changed, 240 insertions, 108 deletions
diff --git a/hw/virtio/virtio.c b/hw/virtio/virtio.c index 63a7b6d7ba..90f25451d0 100644 --- a/hw/virtio/virtio.c +++ b/hw/virtio/virtio.c @@ -70,7 +70,15 @@ typedef struct VRing struct VirtQueue { VRing vring; + + /* Next head to pop */ uint16_t last_avail_idx; + + /* Last avail_idx read from VQ. */ + uint16_t shadow_avail_idx; + + uint16_t used_idx; + /* Last used index value we have signalled on */ uint16_t signalled_used; @@ -107,35 +115,15 @@ void virtio_queue_update_rings(VirtIODevice *vdev, int n) vring->align); } -static inline uint64_t vring_desc_addr(VirtIODevice *vdev, hwaddr desc_pa, - int i) +static void vring_desc_read(VirtIODevice *vdev, VRingDesc *desc, + hwaddr desc_pa, int i) { - hwaddr pa; - pa = desc_pa + sizeof(VRingDesc) * i + offsetof(VRingDesc, addr); - return virtio_ldq_phys(vdev, pa); -} - -static inline uint32_t vring_desc_len(VirtIODevice *vdev, hwaddr desc_pa, int i) -{ - hwaddr pa; - pa = desc_pa + sizeof(VRingDesc) * i + offsetof(VRingDesc, len); - return virtio_ldl_phys(vdev, pa); -} - -static inline uint16_t vring_desc_flags(VirtIODevice *vdev, hwaddr desc_pa, - int i) -{ - hwaddr pa; - pa = desc_pa + sizeof(VRingDesc) * i + offsetof(VRingDesc, flags); - return virtio_lduw_phys(vdev, pa); -} - -static inline uint16_t vring_desc_next(VirtIODevice *vdev, hwaddr desc_pa, - int i) -{ - hwaddr pa; - pa = desc_pa + sizeof(VRingDesc) * i + offsetof(VRingDesc, next); - return virtio_lduw_phys(vdev, pa); + address_space_read(&address_space_memory, desc_pa + i * sizeof(VRingDesc), + MEMTXATTRS_UNSPECIFIED, (void *)desc, sizeof(VRingDesc)); + virtio_tswap64s(vdev, &desc->addr); + virtio_tswap32s(vdev, &desc->len); + virtio_tswap16s(vdev, &desc->flags); + virtio_tswap16s(vdev, &desc->next); } static inline uint16_t vring_avail_flags(VirtQueue *vq) @@ -149,7 +137,8 @@ static inline uint16_t vring_avail_idx(VirtQueue *vq) { hwaddr pa; pa = vq->vring.avail + offsetof(VRingAvail, idx); - return virtio_lduw_phys(vq->vdev, pa); + vq->shadow_avail_idx = virtio_lduw_phys(vq->vdev, pa); + return vq->shadow_avail_idx; } static inline uint16_t vring_avail_ring(VirtQueue *vq, int i) @@ -164,18 +153,15 @@ static inline uint16_t vring_get_used_event(VirtQueue *vq) return vring_avail_ring(vq, vq->vring.num); } -static inline void vring_used_ring_id(VirtQueue *vq, int i, uint32_t val) -{ - hwaddr pa; - pa = vq->vring.used + offsetof(VRingUsed, ring[i].id); - virtio_stl_phys(vq->vdev, pa, val); -} - -static inline void vring_used_ring_len(VirtQueue *vq, int i, uint32_t val) +static inline void vring_used_write(VirtQueue *vq, VRingUsedElem *uelem, + int i) { hwaddr pa; - pa = vq->vring.used + offsetof(VRingUsed, ring[i].len); - virtio_stl_phys(vq->vdev, pa, val); + virtio_tswap32s(vq->vdev, &uelem->id); + virtio_tswap32s(vq->vdev, &uelem->len); + pa = vq->vring.used + offsetof(VRingUsed, ring[i]); + address_space_write(&address_space_memory, pa, MEMTXATTRS_UNSPECIFIED, + (void *)uelem, sizeof(VRingUsedElem)); } static uint16_t vring_used_idx(VirtQueue *vq) @@ -190,6 +176,7 @@ static inline void vring_used_idx_set(VirtQueue *vq, uint16_t val) hwaddr pa; pa = vq->vring.used + offsetof(VRingUsed, idx); virtio_stw_phys(vq->vdev, pa, val); + vq->used_idx = val; } static inline void vring_used_flags_set_bit(VirtQueue *vq, int mask) @@ -239,8 +226,14 @@ int virtio_queue_ready(VirtQueue *vq) return vq->vring.avail != 0; } +/* Fetch avail_idx from VQ memory only when we really need to know if + * guest has added some buffers. */ int virtio_queue_empty(VirtQueue *vq) { + if (vq->shadow_avail_idx != vq->last_avail_idx) { + return 0; + } + return vring_avail_idx(vq) == vq->last_avail_idx; } @@ -277,15 +270,17 @@ void virtqueue_discard(VirtQueue *vq, const VirtQueueElement *elem, void virtqueue_fill(VirtQueue *vq, const VirtQueueElement *elem, unsigned int len, unsigned int idx) { + VRingUsedElem uelem; + trace_virtqueue_fill(vq, elem, len, idx); virtqueue_unmap_sg(vq, elem, len); - idx = (idx + vring_used_idx(vq)) % vq->vring.num; + idx = (idx + vq->used_idx) % vq->vring.num; - /* Get a pointer to the next entry in the used ring. */ - vring_used_ring_id(vq, idx, elem->index); - vring_used_ring_len(vq, idx, len); + uelem.id = elem->index; + uelem.len = len; + vring_used_write(vq, &uelem, idx); } void virtqueue_flush(VirtQueue *vq, unsigned int count) @@ -294,7 +289,7 @@ void virtqueue_flush(VirtQueue *vq, unsigned int count) /* Make sure buffer is written before we update index. */ smp_wmb(); trace_virtqueue_flush(vq, count); - old = vring_used_idx(vq); + old = vq->used_idx; new = old + count; vring_used_idx_set(vq, new); vq->inuse -= count; @@ -316,7 +311,7 @@ static int virtqueue_num_heads(VirtQueue *vq, unsigned int idx) /* Check it isn't doing very strange things with descriptor numbers. */ if (num_heads > vq->vring.num) { error_report("Guest moved used index from %u to %u", - idx, vring_avail_idx(vq)); + idx, vq->shadow_avail_idx); exit(1); } /* On success, callers read a descriptor at vq->last_avail_idx. @@ -345,18 +340,18 @@ static unsigned int virtqueue_get_head(VirtQueue *vq, unsigned int idx) return head; } -static unsigned virtqueue_next_desc(VirtIODevice *vdev, hwaddr desc_pa, - unsigned int i, unsigned int max) +static unsigned virtqueue_read_next_desc(VirtIODevice *vdev, VRingDesc *desc, + hwaddr desc_pa, unsigned int max) { unsigned int next; /* If this descriptor says it doesn't chain, we're done. */ - if (!(vring_desc_flags(vdev, desc_pa, i) & VRING_DESC_F_NEXT)) { + if (!(desc->flags & VRING_DESC_F_NEXT)) { return max; } /* Check they're not leading us off end of descriptors. */ - next = vring_desc_next(vdev, desc_pa, i); + next = desc->next; /* Make sure compiler knows to grab that: we don't want it changing! */ smp_wmb(); @@ -365,6 +360,7 @@ static unsigned virtqueue_next_desc(VirtIODevice *vdev, hwaddr desc_pa, exit(1); } + vring_desc_read(vdev, desc, desc_pa, next); return next; } @@ -381,6 +377,7 @@ void virtqueue_get_avail_bytes(VirtQueue *vq, unsigned int *in_bytes, while (virtqueue_num_heads(vq, idx)) { VirtIODevice *vdev = vq->vdev; unsigned int max, num_bufs, indirect = 0; + VRingDesc desc; hwaddr desc_pa; int i; @@ -388,9 +385,10 @@ void virtqueue_get_avail_bytes(VirtQueue *vq, unsigned int *in_bytes, num_bufs = total_bufs; i = virtqueue_get_head(vq, idx++); desc_pa = vq->vring.desc; + vring_desc_read(vdev, &desc, desc_pa, i); - if (vring_desc_flags(vdev, desc_pa, i) & VRING_DESC_F_INDIRECT) { - if (vring_desc_len(vdev, desc_pa, i) % sizeof(VRingDesc)) { + if (desc.flags & VRING_DESC_F_INDIRECT) { + if (desc.len % sizeof(VRingDesc)) { error_report("Invalid size for indirect buffer table"); exit(1); } @@ -403,9 +401,10 @@ void virtqueue_get_avail_bytes(VirtQueue *vq, unsigned int *in_bytes, /* loop over the indirect descriptor table */ indirect = 1; - max = vring_desc_len(vdev, desc_pa, i) / sizeof(VRingDesc); - desc_pa = vring_desc_addr(vdev, desc_pa, i); + max = desc.len / sizeof(VRingDesc); + desc_pa = desc.addr; num_bufs = i = 0; + vring_desc_read(vdev, &desc, desc_pa, i); } do { @@ -415,15 +414,15 @@ void virtqueue_get_avail_bytes(VirtQueue *vq, unsigned int *in_bytes, exit(1); } - if (vring_desc_flags(vdev, desc_pa, i) & VRING_DESC_F_WRITE) { - in_total += vring_desc_len(vdev, desc_pa, i); + if (desc.flags & VRING_DESC_F_WRITE) { + in_total += desc.len; } else { - out_total += vring_desc_len(vdev, desc_pa, i); + out_total += desc.len; } if (in_total >= max_in_bytes && out_total >= max_out_bytes) { goto done; } - } while ((i = virtqueue_next_desc(vdev, desc_pa, i, max)) != max); + } while ((i = virtqueue_read_next_desc(vdev, &desc, desc_pa, max)) != max); if (!indirect) total_bufs = num_bufs; @@ -448,6 +447,32 @@ int virtqueue_avail_bytes(VirtQueue *vq, unsigned int in_bytes, return in_bytes <= in_total && out_bytes <= out_total; } +static void virtqueue_map_desc(unsigned int *p_num_sg, hwaddr *addr, struct iovec *iov, + unsigned int max_num_sg, bool is_write, + hwaddr pa, size_t sz) +{ + unsigned num_sg = *p_num_sg; + assert(num_sg <= max_num_sg); + + while (sz) { + hwaddr len = sz; + + if (num_sg == max_num_sg) { + error_report("virtio: too many write descriptors in indirect table"); + exit(1); + } + + iov[num_sg].iov_base = cpu_physical_memory_map(pa, &len, is_write); + iov[num_sg].iov_len = len; + addr[num_sg] = pa; + + sz -= len; + pa += len; + num_sg++; + } + *p_num_sg = num_sg; +} + static void virtqueue_map_iovec(struct iovec *sg, hwaddr *addr, unsigned int *num_sg, unsigned int max_size, int is_write) @@ -474,44 +499,62 @@ static void virtqueue_map_iovec(struct iovec *sg, hwaddr *addr, error_report("virtio: error trying to map MMIO memory"); exit(1); } - if (len == sg[i].iov_len) { - continue; - } - if (*num_sg >= max_size) { - error_report("virtio: memory split makes iovec too large"); + if (len != sg[i].iov_len) { + error_report("virtio: unexpected memory split"); exit(1); } - memmove(sg + i + 1, sg + i, sizeof(*sg) * (*num_sg - i)); - memmove(addr + i + 1, addr + i, sizeof(*addr) * (*num_sg - i)); - assert(len < sg[i + 1].iov_len); - sg[i].iov_len = len; - addr[i + 1] += len; - sg[i + 1].iov_len -= len; - ++*num_sg; } } void virtqueue_map(VirtQueueElement *elem) { virtqueue_map_iovec(elem->in_sg, elem->in_addr, &elem->in_num, - MIN(ARRAY_SIZE(elem->in_sg), ARRAY_SIZE(elem->in_addr)), - 1); + VIRTQUEUE_MAX_SIZE, 1); virtqueue_map_iovec(elem->out_sg, elem->out_addr, &elem->out_num, - MIN(ARRAY_SIZE(elem->out_sg), ARRAY_SIZE(elem->out_addr)), - 0); + VIRTQUEUE_MAX_SIZE, 0); } -int virtqueue_pop(VirtQueue *vq, VirtQueueElement *elem) +void *virtqueue_alloc_element(size_t sz, unsigned out_num, unsigned in_num) +{ + VirtQueueElement *elem; + size_t in_addr_ofs = QEMU_ALIGN_UP(sz, __alignof__(elem->in_addr[0])); + size_t out_addr_ofs = in_addr_ofs + in_num * sizeof(elem->in_addr[0]); + size_t out_addr_end = out_addr_ofs + out_num * sizeof(elem->out_addr[0]); + size_t in_sg_ofs = QEMU_ALIGN_UP(out_addr_end, __alignof__(elem->in_sg[0])); + size_t out_sg_ofs = in_sg_ofs + in_num * sizeof(elem->in_sg[0]); + size_t out_sg_end = out_sg_ofs + out_num * sizeof(elem->out_sg[0]); + + assert(sz >= sizeof(VirtQueueElement)); + elem = g_malloc(out_sg_end); + elem->out_num = out_num; + elem->in_num = in_num; + elem->in_addr = (void *)elem + in_addr_ofs; + elem->out_addr = (void *)elem + out_addr_ofs; + elem->in_sg = (void *)elem + in_sg_ofs; + elem->out_sg = (void *)elem + out_sg_ofs; + return elem; +} + +void *virtqueue_pop(VirtQueue *vq, size_t sz) { unsigned int i, head, max; hwaddr desc_pa = vq->vring.desc; VirtIODevice *vdev = vq->vdev; + VirtQueueElement *elem; + unsigned out_num, in_num; + hwaddr addr[VIRTQUEUE_MAX_SIZE]; + struct iovec iov[VIRTQUEUE_MAX_SIZE]; + VRingDesc desc; - if (!virtqueue_num_heads(vq, vq->last_avail_idx)) - return 0; + if (virtio_queue_empty(vq)) { + return NULL; + } + /* Needed after virtio_queue_empty(), see comment in + * virtqueue_num_heads(). */ + smp_rmb(); /* When we start there are none of either input nor output. */ - elem->out_num = elem->in_num = 0; + out_num = in_num = 0; max = vq->vring.num; @@ -520,56 +563,140 @@ int virtqueue_pop(VirtQueue *vq, VirtQueueElement *elem) vring_set_avail_event(vq, vq->last_avail_idx); } - if (vring_desc_flags(vdev, desc_pa, i) & VRING_DESC_F_INDIRECT) { - if (vring_desc_len(vdev, desc_pa, i) % sizeof(VRingDesc)) { + vring_desc_read(vdev, &desc, desc_pa, i); + if (desc.flags & VRING_DESC_F_INDIRECT) { + if (desc.len % sizeof(VRingDesc)) { error_report("Invalid size for indirect buffer table"); exit(1); } /* loop over the indirect descriptor table */ - max = vring_desc_len(vdev, desc_pa, i) / sizeof(VRingDesc); - desc_pa = vring_desc_addr(vdev, desc_pa, i); + max = desc.len / sizeof(VRingDesc); + desc_pa = desc.addr; i = 0; + vring_desc_read(vdev, &desc, desc_pa, i); } /* Collect all the descriptors */ do { - struct iovec *sg; - - if (vring_desc_flags(vdev, desc_pa, i) & VRING_DESC_F_WRITE) { - if (elem->in_num >= ARRAY_SIZE(elem->in_sg)) { - error_report("Too many write descriptors in indirect table"); - exit(1); - } - elem->in_addr[elem->in_num] = vring_desc_addr(vdev, desc_pa, i); - sg = &elem->in_sg[elem->in_num++]; + if (desc.flags & VRING_DESC_F_WRITE) { + virtqueue_map_desc(&in_num, addr + out_num, iov + out_num, + VIRTQUEUE_MAX_SIZE - out_num, true, desc.addr, desc.len); } else { - if (elem->out_num >= ARRAY_SIZE(elem->out_sg)) { - error_report("Too many read descriptors in indirect table"); + if (in_num) { + error_report("Incorrect order for descriptors"); exit(1); } - elem->out_addr[elem->out_num] = vring_desc_addr(vdev, desc_pa, i); - sg = &elem->out_sg[elem->out_num++]; + virtqueue_map_desc(&out_num, addr, iov, + VIRTQUEUE_MAX_SIZE, false, desc.addr, desc.len); } - sg->iov_len = vring_desc_len(vdev, desc_pa, i); - /* If we've got too many, that implies a descriptor loop. */ - if ((elem->in_num + elem->out_num) > max) { + if ((in_num + out_num) > max) { error_report("Looped descriptor"); exit(1); } - } while ((i = virtqueue_next_desc(vdev, desc_pa, i, max)) != max); - - /* Now map what we have collected */ - virtqueue_map(elem); + } while ((i = virtqueue_read_next_desc(vdev, &desc, desc_pa, max)) != max); + /* Now copy what we have collected and mapped */ + elem = virtqueue_alloc_element(sz, out_num, in_num); elem->index = head; + for (i = 0; i < out_num; i++) { + elem->out_addr[i] = addr[i]; + elem->out_sg[i] = iov[i]; + } + for (i = 0; i < in_num; i++) { + elem->in_addr[i] = addr[out_num + i]; + elem->in_sg[i] = iov[out_num + i]; + } vq->inuse++; trace_virtqueue_pop(vq, elem, elem->in_num, elem->out_num); - return elem->in_num + elem->out_num; + return elem; +} + +/* Reading and writing a structure directly to QEMUFile is *awful*, but + * it is what QEMU has always done by mistake. We can change it sooner + * or later by bumping the version number of the affected vm states. + * In the meanwhile, since the in-memory layout of VirtQueueElement + * has changed, we need to marshal to and from the layout that was + * used before the change. + */ +typedef struct VirtQueueElementOld { + unsigned int index; + unsigned int out_num; + unsigned int in_num; + hwaddr in_addr[VIRTQUEUE_MAX_SIZE]; + hwaddr out_addr[VIRTQUEUE_MAX_SIZE]; + struct iovec in_sg[VIRTQUEUE_MAX_SIZE]; + struct iovec out_sg[VIRTQUEUE_MAX_SIZE]; +} VirtQueueElementOld; + +void *qemu_get_virtqueue_element(QEMUFile *f, size_t sz) +{ + VirtQueueElement *elem; + VirtQueueElementOld data; + int i; + + qemu_get_buffer(f, (uint8_t *)&data, sizeof(VirtQueueElementOld)); + + elem = virtqueue_alloc_element(sz, data.out_num, data.in_num); + elem->index = data.index; + + for (i = 0; i < elem->in_num; i++) { + elem->in_addr[i] = data.in_addr[i]; + } + + for (i = 0; i < elem->out_num; i++) { + elem->out_addr[i] = data.out_addr[i]; + } + + for (i = 0; i < elem->in_num; i++) { + /* Base is overwritten by virtqueue_map. */ + elem->in_sg[i].iov_base = 0; + elem->in_sg[i].iov_len = data.in_sg[i].iov_len; + } + + for (i = 0; i < elem->out_num; i++) { + /* Base is overwritten by virtqueue_map. */ + elem->out_sg[i].iov_base = 0; + elem->out_sg[i].iov_len = data.out_sg[i].iov_len; + } + + virtqueue_map(elem); + return elem; +} + +void qemu_put_virtqueue_element(QEMUFile *f, VirtQueueElement *elem) +{ + VirtQueueElementOld data; + int i; + + memset(&data, 0, sizeof(data)); + data.index = elem->index; + data.in_num = elem->in_num; + data.out_num = elem->out_num; + + for (i = 0; i < elem->in_num; i++) { + data.in_addr[i] = elem->in_addr[i]; + } + + for (i = 0; i < elem->out_num; i++) { + data.out_addr[i] = elem->out_addr[i]; + } + + for (i = 0; i < elem->in_num; i++) { + /* Base is overwritten by virtqueue_map when loading. Do not + * save it, as it would leak the QEMU address space layout. */ + data.in_sg[i].iov_len = elem->in_sg[i].iov_len; + } + + for (i = 0; i < elem->out_num; i++) { + /* Do not save iov_base as above. */ + data.out_sg[i].iov_len = elem->out_sg[i].iov_len; + } + qemu_put_buffer(f, (uint8_t *)&data, sizeof(VirtQueueElementOld)); } /* virtio device */ @@ -673,6 +800,8 @@ void virtio_reset(void *opaque) vdev->vq[i].vring.avail = 0; vdev->vq[i].vring.used = 0; vdev->vq[i].last_avail_idx = 0; + vdev->vq[i].shadow_avail_idx = 0; + vdev->vq[i].used_idx = 0; virtio_queue_set_vector(vdev, i, VIRTIO_NO_VECTOR); vdev->vq[i].signalled_used = 0; vdev->vq[i].signalled_used_valid = false; @@ -1041,7 +1170,7 @@ static bool vring_notify(VirtIODevice *vdev, VirtQueue *vq) smp_mb(); /* Always notify when queue is empty (when feature acknowledge) */ if (virtio_vdev_has_feature(vdev, VIRTIO_F_NOTIFY_ON_EMPTY) && - !vq->inuse && vring_avail_idx(vq) == vq->last_avail_idx) { + !vq->inuse && virtio_queue_empty(vq)) { return true; } @@ -1052,7 +1181,7 @@ static bool vring_notify(VirtIODevice *vdev, VirtQueue *vq) v = vq->signalled_used_valid; vq->signalled_used_valid = true; old = vq->signalled_used; - new = vq->signalled_used = vring_used_idx(vq); + new = vq->signalled_used = vq->used_idx; return !v || vring_need_event(vring_get_used_event(vq), new, old); } @@ -1143,8 +1272,8 @@ static const VMStateDescription vmstate_virtio_virtqueues = { .minimum_version_id = 1, .needed = &virtio_virtqueue_needed, .fields = (VMStateField[]) { - VMSTATE_STRUCT_VARRAY_KNOWN(vq, struct VirtIODevice, VIRTIO_QUEUE_MAX, - 0, vmstate_virtqueue, VirtQueue), + VMSTATE_STRUCT_VARRAY_POINTER_KNOWN(vq, struct VirtIODevice, + VIRTIO_QUEUE_MAX, 0, vmstate_virtqueue, VirtQueue), VMSTATE_END_OF_LIST() } }; @@ -1165,8 +1294,8 @@ static const VMStateDescription vmstate_virtio_ringsize = { .minimum_version_id = 1, .needed = &virtio_ringsize_needed, .fields = (VMStateField[]) { - VMSTATE_STRUCT_VARRAY_KNOWN(vq, struct VirtIODevice, VIRTIO_QUEUE_MAX, - 0, vmstate_ringsize, VirtQueue), + VMSTATE_STRUCT_VARRAY_POINTER_KNOWN(vq, struct VirtIODevice, + VIRTIO_QUEUE_MAX, 0, vmstate_ringsize, VirtQueue), VMSTATE_END_OF_LIST() } }; @@ -1464,6 +1593,8 @@ int virtio_load(VirtIODevice *vdev, QEMUFile *f, int version_id) vdev->vq[i].last_avail_idx, nheads); return -1; } + vdev->vq[i].used_idx = vring_used_idx(&vdev->vq[i]); + vdev->vq[i].shadow_avail_idx = vring_avail_idx(&vdev->vq[i]); } } @@ -1599,6 +1730,7 @@ uint16_t virtio_queue_get_last_avail_idx(VirtIODevice *vdev, int n) void virtio_queue_set_last_avail_idx(VirtIODevice *vdev, int n, uint16_t idx) { vdev->vq[n].last_avail_idx = idx; + vdev->vq[n].shadow_avail_idx = idx; } void virtio_queue_invalidate_signalled_used(VirtIODevice *vdev, int n) |