aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--hw/acpi/memory_hotplug.c8
-rw-r--r--hw/block/virtio-blk.c5
-rw-r--r--hw/char/virtio-serial-bus.c5
-rw-r--r--hw/i386/pc.c5
-rw-r--r--hw/i386/pc_piix.c1
-rw-r--r--hw/i386/pc_q35.c1
-rw-r--r--hw/mem/pc-dimm.c15
-rw-r--r--hw/pci/pci.c40
-rw-r--r--hw/pci/pci_host.c15
-rw-r--r--hw/pci/pcie.c39
-rw-r--r--hw/ppc/spapr.c2
-rw-r--r--hw/scsi/virtio-scsi.c16
-rw-r--r--hw/virtio/vhost-user.c22
-rw-r--r--hw/virtio/virtio.c52
-rw-r--r--include/hw/i386/pc.h1
-rw-r--r--include/hw/mem/pc-dimm.h7
-rw-r--r--include/hw/pci/pci.h1
-rw-r--r--include/hw/virtio/virtio.h3
-rw-r--r--tests/Makefile1
-rw-r--r--tests/vhost-user-bridge.c1110
-rw-r--r--tests/vhost-user-test.c18
-rw-r--r--util/mmap-alloc.c4
22 files changed, 1276 insertions, 95 deletions
diff --git a/hw/acpi/memory_hotplug.c b/hw/acpi/memory_hotplug.c
index 2ff0d5ce1b..ce428dfc18 100644
--- a/hw/acpi/memory_hotplug.c
+++ b/hw/acpi/memory_hotplug.c
@@ -238,10 +238,12 @@ void acpi_memory_plug_cb(ACPIREGS *ar, qemu_irq irq, MemHotplugState *mem_st,
mdev->dimm = dev;
mdev->is_enabled = true;
- mdev->is_inserting = true;
+ if (dev->hotplugged) {
+ mdev->is_inserting = true;
- /* do ACPI magic */
- acpi_send_gpe_event(ar, irq, ACPI_MEMORY_HOTPLUG_STATUS);
+ /* do ACPI magic */
+ acpi_send_gpe_event(ar, irq, ACPI_MEMORY_HOTPLUG_STATUS);
+ }
return;
}
diff --git a/hw/block/virtio-blk.c b/hw/block/virtio-blk.c
index 8beb26b4db..3e230debb8 100644
--- a/hw/block/virtio-blk.c
+++ b/hw/block/virtio-blk.c
@@ -839,10 +839,7 @@ static int virtio_blk_load_device(VirtIODevice *vdev, QEMUFile *f,
req->next = s->rq;
s->rq = req;
- virtqueue_map_sg(req->elem.in_sg, req->elem.in_addr,
- req->elem.in_num, 1);
- virtqueue_map_sg(req->elem.out_sg, req->elem.out_addr,
- req->elem.out_num, 0);
+ virtqueue_map(&req->elem);
}
return 0;
diff --git a/hw/char/virtio-serial-bus.c b/hw/char/virtio-serial-bus.c
index be97058712..497b0afd9f 100644
--- a/hw/char/virtio-serial-bus.c
+++ b/hw/char/virtio-serial-bus.c
@@ -705,10 +705,7 @@ static int fetch_active_ports_list(QEMUFile *f, int version_id,
qemu_get_buffer(f, (unsigned char *)&port->elem,
sizeof(port->elem));
- virtqueue_map_sg(port->elem.in_sg, port->elem.in_addr,
- port->elem.in_num, 1);
- virtqueue_map_sg(port->elem.out_sg, port->elem.out_addr,
- port->elem.out_num, 1);
+ virtqueue_map(&port->elem);
/*
* Port was throttled on source machine. Let's
diff --git a/hw/i386/pc.c b/hw/i386/pc.c
index 3d958bae5b..0cb8afd2c2 100644
--- a/hw/i386/pc.c
+++ b/hw/i386/pc.c
@@ -1616,7 +1616,6 @@ static void pc_dimm_plug(HotplugHandler *hotplug_dev,
HotplugHandlerClass *hhc;
Error *local_err = NULL;
PCMachineState *pcms = PC_MACHINE(hotplug_dev);
- PCMachineClass *pcmc = PC_MACHINE_GET_CLASS(pcms);
PCDIMMDevice *dimm = PC_DIMM(dev);
PCDIMMDeviceClass *ddc = PC_DIMM_GET_CLASS(dimm);
MemoryRegion *mr = ddc->get_memory_region(dimm);
@@ -1632,8 +1631,7 @@ static void pc_dimm_plug(HotplugHandler *hotplug_dev,
goto out;
}
- pc_dimm_memory_plug(dev, &pcms->hotplug_memory, mr, align,
- pcmc->inter_dimm_gap, &local_err);
+ pc_dimm_memory_plug(dev, &pcms->hotplug_memory, mr, align, &local_err);
if (local_err) {
goto out;
}
@@ -1953,7 +1951,6 @@ static void pc_machine_class_init(ObjectClass *oc, void *data)
PCMachineClass *pcmc = PC_MACHINE_CLASS(oc);
HotplugHandlerClass *hc = HOTPLUG_HANDLER_CLASS(oc);
- pcmc->inter_dimm_gap = true;
pcmc->get_hotplug_handler = mc->get_hotplug_handler;
mc->get_hotplug_handler = pc_get_hotpug_handler;
mc->cpu_index_to_socket_id = pc_cpu_index_to_socket_id;
diff --git a/hw/i386/pc_piix.c b/hw/i386/pc_piix.c
index 9d4425a5b9..393dcc4544 100644
--- a/hw/i386/pc_piix.c
+++ b/hw/i386/pc_piix.c
@@ -487,7 +487,6 @@ static void pc_i440fx_2_4_machine_options(MachineClass *m)
m->alias = NULL;
m->is_default = 0;
pcmc->broken_reserved_end = true;
- pcmc->inter_dimm_gap = false;
SET_MACHINE_COMPAT(m, PC_COMPAT_2_4);
}
diff --git a/hw/i386/pc_q35.c b/hw/i386/pc_q35.c
index 3744abd397..2f8f3963c4 100644
--- a/hw/i386/pc_q35.c
+++ b/hw/i386/pc_q35.c
@@ -385,7 +385,6 @@ static void pc_q35_2_4_machine_options(MachineClass *m)
pc_q35_2_5_machine_options(m);
m->alias = NULL;
pcmc->broken_reserved_end = true;
- pcmc->inter_dimm_gap = false;
SET_MACHINE_COMPAT(m, PC_COMPAT_2_4);
}
diff --git a/hw/mem/pc-dimm.c b/hw/mem/pc-dimm.c
index 2bae994667..80f424b442 100644
--- a/hw/mem/pc-dimm.c
+++ b/hw/mem/pc-dimm.c
@@ -33,8 +33,7 @@ typedef struct pc_dimms_capacity {
} pc_dimms_capacity;
void pc_dimm_memory_plug(DeviceState *dev, MemoryHotplugState *hpms,
- MemoryRegion *mr, uint64_t align, bool gap,
- Error **errp)
+ MemoryRegion *mr, uint64_t align, Error **errp)
{
int slot;
MachineState *machine = MACHINE(qdev_get_machine());
@@ -50,7 +49,7 @@ void pc_dimm_memory_plug(DeviceState *dev, MemoryHotplugState *hpms,
addr = pc_dimm_get_free_addr(hpms->base,
memory_region_size(&hpms->mr),
- !addr ? NULL : &addr, align, gap,
+ !addr ? NULL : &addr, align,
memory_region_size(mr), &local_err);
if (local_err) {
goto out;
@@ -295,8 +294,8 @@ static int pc_dimm_built_list(Object *obj, void *opaque)
uint64_t pc_dimm_get_free_addr(uint64_t address_space_start,
uint64_t address_space_size,
- uint64_t *hint, uint64_t align, bool gap,
- uint64_t size, Error **errp)
+ uint64_t *hint, uint64_t align, uint64_t size,
+ Error **errp)
{
GSList *list = NULL, *item;
uint64_t new_addr, ret = 0;
@@ -341,15 +340,13 @@ uint64_t pc_dimm_get_free_addr(uint64_t address_space_start,
goto out;
}
- if (ranges_overlap(dimm->addr, dimm_size, new_addr,
- size + (gap ? 1 : 0))) {
+ if (ranges_overlap(dimm->addr, dimm_size, new_addr, size)) {
if (hint) {
DeviceState *d = DEVICE(dimm);
error_setg(errp, "address range conflicts with '%s'", d->id);
goto out;
}
- new_addr = QEMU_ALIGN_UP(dimm->addr + dimm_size + (gap ? 1 : 0),
- align);
+ new_addr = QEMU_ALIGN_UP(dimm->addr + dimm_size, align);
}
}
ret = new_addr;
diff --git a/hw/pci/pci.c b/hw/pci/pci.c
index b0bf54061f..168b9cc56b 100644
--- a/hw/pci/pci.c
+++ b/hw/pci/pci.c
@@ -847,6 +847,9 @@ static PCIDevice *do_pci_register_device(PCIDevice *pci_dev, PCIBus *bus,
PCIConfigWriteFunc *config_write = pc->config_write;
Error *local_err = NULL;
AddressSpace *dma_as;
+ DeviceState *dev = DEVICE(pci_dev);
+
+ pci_dev->bus = bus;
if (devfn < 0) {
for(devfn = bus->devfn_min ; devfn < ARRAY_SIZE(bus->devices);
@@ -864,9 +867,17 @@ static PCIDevice *do_pci_register_device(PCIDevice *pci_dev, PCIBus *bus,
PCI_SLOT(devfn), PCI_FUNC(devfn), name,
bus->devices[devfn]->name);
return NULL;
+ } else if (dev->hotplugged &&
+ pci_get_function_0(pci_dev)) {
+ error_setg(errp, "PCI: slot %d function 0 already ocuppied by %s,"
+ " new func %s cannot be exposed to guest.",
+ PCI_SLOT(devfn),
+ bus->devices[PCI_DEVFN(PCI_SLOT(devfn), 0)]->name,
+ name);
+
+ return NULL;
}
- pci_dev->bus = bus;
pci_dev->devfn = devfn;
dma_as = pci_device_iommu_address_space(pci_dev);
@@ -2454,6 +2465,33 @@ void pci_bus_get_w64_range(PCIBus *bus, Range *range)
pci_for_each_device_under_bus(bus, pci_dev_get_w64, range);
}
+static bool pcie_has_upstream_port(PCIDevice *dev)
+{
+ PCIDevice *parent_dev = pci_bridge_get_device(dev->bus);
+
+ /* Device associated with an upstream port.
+ * As there are several types of these, it's easier to check the
+ * parent device: upstream ports are always connected to
+ * root or downstream ports.
+ */
+ return parent_dev &&
+ pci_is_express(parent_dev) &&
+ parent_dev->exp.exp_cap &&
+ (pcie_cap_get_type(parent_dev) == PCI_EXP_TYPE_ROOT_PORT ||
+ pcie_cap_get_type(parent_dev) == PCI_EXP_TYPE_DOWNSTREAM);
+}
+
+PCIDevice *pci_get_function_0(PCIDevice *pci_dev)
+{
+ if(pcie_has_upstream_port(pci_dev)) {
+ /* With an upstream PCIe port, we only support 1 device at slot 0 */
+ return pci_dev->bus->devices[0];
+ } else {
+ /* Other bus types might support multiple devices at slots 0-31 */
+ return pci_dev->bus->devices[PCI_DEVFN(PCI_SLOT(pci_dev->devfn), 0)];
+ }
+}
+
static const TypeInfo pci_device_type_info = {
.name = TYPE_PCI_DEVICE,
.parent = TYPE_DEVICE,
diff --git a/hw/pci/pci_host.c b/hw/pci/pci_host.c
index 3e26f9256c..49f59a5dbc 100644
--- a/hw/pci/pci_host.c
+++ b/hw/pci/pci_host.c
@@ -20,6 +20,7 @@
#include "hw/pci/pci.h"
#include "hw/pci/pci_host.h"
+#include "hw/pci/pci_bus.h"
#include "trace.h"
/* debug PCI */
@@ -52,6 +53,13 @@ void pci_host_config_write_common(PCIDevice *pci_dev, uint32_t addr,
uint32_t limit, uint32_t val, uint32_t len)
{
assert(len <= 4);
+ /* non-zero functions are only exposed when function 0 is present,
+ * allowing direct removal of unexposed functions.
+ */
+ if (pci_dev->qdev.hotplugged && !pci_get_function_0(pci_dev)) {
+ return;
+ }
+
trace_pci_cfg_write(pci_dev->name, PCI_SLOT(pci_dev->devfn),
PCI_FUNC(pci_dev->devfn), addr, val);
pci_dev->config_write(pci_dev, addr, val, MIN(len, limit - addr));
@@ -63,6 +71,13 @@ uint32_t pci_host_config_read_common(PCIDevice *pci_dev, uint32_t addr,
uint32_t ret;
assert(len <= 4);
+ /* non-zero functions are only exposed when function 0 is present,
+ * allowing direct removal of unexposed functions.
+ */
+ if (pci_dev->qdev.hotplugged && !pci_get_function_0(pci_dev)) {
+ return ~0x0;
+ }
+
ret = pci_dev->config_read(pci_dev, addr, MIN(len, limit - addr));
trace_pci_cfg_read(pci_dev->name, PCI_SLOT(pci_dev->devfn),
PCI_FUNC(pci_dev->devfn), addr, ret);
diff --git a/hw/pci/pcie.c b/hw/pci/pcie.c
index 6e28985bd1..32c65c27a4 100644
--- a/hw/pci/pcie.c
+++ b/hw/pci/pcie.c
@@ -249,25 +249,43 @@ void pcie_cap_slot_hotplug_cb(HotplugHandler *hotplug_dev, DeviceState *dev,
return;
}
- /* TODO: multifunction hot-plug.
- * Right now, only a device of function = 0 is allowed to be
- * hot plugged/unplugged.
+ /* To enable multifunction hot-plug, we just ensure the function
+ * 0 added last. When function 0 is added, we set the sltsta and
+ * inform OS via event notification.
*/
- assert(PCI_FUNC(pci_dev->devfn) == 0);
+ if (pci_get_function_0(pci_dev)) {
+ pci_word_test_and_set_mask(exp_cap + PCI_EXP_SLTSTA,
+ PCI_EXP_SLTSTA_PDS);
+ pcie_cap_slot_event(PCI_DEVICE(hotplug_dev),
+ PCI_EXP_HP_EV_PDC | PCI_EXP_HP_EV_ABP);
+ }
+}
- pci_word_test_and_set_mask(exp_cap + PCI_EXP_SLTSTA,
- PCI_EXP_SLTSTA_PDS);
- pcie_cap_slot_event(PCI_DEVICE(hotplug_dev),
- PCI_EXP_HP_EV_PDC | PCI_EXP_HP_EV_ABP);
+static void pcie_unplug_device(PCIBus *bus, PCIDevice *dev, void *opaque)
+{
+ object_unparent(OBJECT(dev));
}
void pcie_cap_slot_hot_unplug_request_cb(HotplugHandler *hotplug_dev,
DeviceState *dev, Error **errp)
{
uint8_t *exp_cap;
+ PCIDevice *pci_dev = PCI_DEVICE(dev);
+ PCIBus *bus = pci_dev->bus;
pcie_cap_slot_hotplug_common(PCI_DEVICE(hotplug_dev), dev, &exp_cap, errp);
+ /* In case user cancel the operation of multi-function hot-add,
+ * remove the function that is unexposed to guest individually,
+ * without interaction with guest.
+ */
+ if (pci_dev->devfn &&
+ !bus->devices[0]) {
+ pcie_unplug_device(bus, pci_dev, NULL);
+
+ return;
+ }
+
pcie_cap_slot_push_attention_button(PCI_DEVICE(hotplug_dev));
}
@@ -378,11 +396,6 @@ void pcie_cap_slot_reset(PCIDevice *dev)
hotplug_event_update_event_status(dev);
}
-static void pcie_unplug_device(PCIBus *bus, PCIDevice *dev, void *opaque)
-{
- object_unparent(OBJECT(dev));
-}
-
void pcie_cap_slot_write_config(PCIDevice *dev,
uint32_t addr, uint32_t val, int len)
{
diff --git a/hw/ppc/spapr.c b/hw/ppc/spapr.c
index e1202cec9f..288b57e24f 100644
--- a/hw/ppc/spapr.c
+++ b/hw/ppc/spapr.c
@@ -2157,7 +2157,7 @@ static void spapr_memory_plug(HotplugHandler *hotplug_dev, DeviceState *dev,
goto out;
}
- pc_dimm_memory_plug(dev, &ms->hotplug_memory, mr, align, false, &local_err);
+ pc_dimm_memory_plug(dev, &ms->hotplug_memory, mr, align, &local_err);
if (local_err) {
goto out;
}
diff --git a/hw/scsi/virtio-scsi.c b/hw/scsi/virtio-scsi.c
index 20885fb7f1..46553e1e68 100644
--- a/hw/scsi/virtio-scsi.c
+++ b/hw/scsi/virtio-scsi.c
@@ -205,20 +205,8 @@ static void *virtio_scsi_load_request(QEMUFile *f, SCSIRequest *sreq)
assert(n < vs->conf.num_queues);
req = virtio_scsi_init_req(s, vs->cmd_vqs[n]);
qemu_get_buffer(f, (unsigned char *)&req->elem, sizeof(req->elem));
- /* TODO: add a way for SCSIBusInfo's load_request to fail,
- * and fail migration instead of asserting here.
- * When we do, we might be able to re-enable NDEBUG below.
- */
-#ifdef NDEBUG
-#error building with NDEBUG is not supported
-#endif
- assert(req->elem.in_num <= ARRAY_SIZE(req->elem.in_sg));
- assert(req->elem.out_num <= ARRAY_SIZE(req->elem.out_sg));
-
- virtqueue_map_sg(req->elem.in_sg, req->elem.in_addr,
- req->elem.in_num, 1);
- virtqueue_map_sg(req->elem.out_sg, req->elem.out_addr,
- req->elem.out_num, 0);
+
+ virtqueue_map(&req->elem);
if (virtio_scsi_parse_req(req, sizeof(VirtIOSCSICmdReq) + vs->cdb_size,
sizeof(VirtIOSCSICmdResp) + vs->sense_size) < 0) {
diff --git a/hw/virtio/vhost-user.c b/hw/virtio/vhost-user.c
index 0aa8e0d93a..83c84f1cd6 100644
--- a/hw/virtio/vhost-user.c
+++ b/hw/virtio/vhost-user.c
@@ -201,7 +201,7 @@ static int vhost_user_set_log_base(struct vhost_dev *dev, uint64_t base,
.request = VHOST_USER_SET_LOG_BASE,
.flags = VHOST_USER_VERSION,
.payload.u64 = base,
- .size = sizeof(m.payload.u64),
+ .size = sizeof(msg.payload.u64),
};
if (shmfd && log->fd != -1) {
@@ -265,8 +265,8 @@ static int vhost_user_set_mem_table(struct vhost_dev *dev,
return -1;
}
- msg.size = sizeof(m.payload.memory.nregions);
- msg.size += sizeof(m.payload.memory.padding);
+ msg.size = sizeof(msg.payload.memory.nregions);
+ msg.size += sizeof(msg.payload.memory.padding);
msg.size += fd_num * sizeof(VhostUserMemoryRegion);
vhost_user_write(dev, &msg, fds, fd_num);
@@ -281,7 +281,7 @@ static int vhost_user_set_vring_addr(struct vhost_dev *dev,
.request = VHOST_USER_SET_VRING_ADDR,
.flags = VHOST_USER_VERSION,
.payload.addr = *addr,
- .size = sizeof(*addr),
+ .size = sizeof(msg.payload.addr),
};
vhost_user_write(dev, &msg, NULL, 0);
@@ -304,7 +304,7 @@ static int vhost_set_vring(struct vhost_dev *dev,
.request = request,
.flags = VHOST_USER_VERSION,
.payload.state = *ring,
- .size = sizeof(*ring),
+ .size = sizeof(msg.payload.state),
};
vhost_user_write(dev, &msg, NULL, 0);
@@ -346,7 +346,7 @@ static int vhost_user_get_vring_base(struct vhost_dev *dev,
.request = VHOST_USER_GET_VRING_BASE,
.flags = VHOST_USER_VERSION,
.payload.state = *ring,
- .size = sizeof(*ring),
+ .size = sizeof(msg.payload.state),
};
vhost_user_write(dev, &msg, NULL, 0);
@@ -361,7 +361,7 @@ static int vhost_user_get_vring_base(struct vhost_dev *dev,
return -1;
}
- if (msg.size != sizeof(m.payload.state)) {
+ if (msg.size != sizeof(msg.payload.state)) {
error_report("Received bad msg size.");
return -1;
}
@@ -381,7 +381,7 @@ static int vhost_set_vring_file(struct vhost_dev *dev,
.request = request,
.flags = VHOST_USER_VERSION,
.payload.u64 = file->index & VHOST_USER_VRING_IDX_MASK,
- .size = sizeof(m.payload.u64),
+ .size = sizeof(msg.payload.u64),
};
if (ioeventfd_enabled() && file->fd > 0) {
@@ -413,7 +413,7 @@ static int vhost_user_set_u64(struct vhost_dev *dev, int request, uint64_t u64)
.request = request,
.flags = VHOST_USER_VERSION,
.payload.u64 = u64,
- .size = sizeof(m.payload.u64),
+ .size = sizeof(msg.payload.u64),
};
vhost_user_write(dev, &msg, NULL, 0);
@@ -456,7 +456,7 @@ static int vhost_user_get_u64(struct vhost_dev *dev, int request, uint64_t *u64)
return -1;
}
- if (msg.size != sizeof(m.payload.u64)) {
+ if (msg.size != sizeof(msg.payload.u64)) {
error_report("Received bad msg size.");
return -1;
}
@@ -592,7 +592,7 @@ static int vhost_user_migration_done(struct vhost_dev *dev, char* mac_addr)
msg.request = VHOST_USER_SEND_RARP;
msg.flags = VHOST_USER_VERSION;
memcpy((char *)&msg.payload.u64, mac_addr, 6);
- msg.size = sizeof(m.payload.u64);
+ msg.size = sizeof(msg.payload.u64);
err = vhost_user_write(dev, &msg, NULL, 0);
return err;
diff --git a/hw/virtio/virtio.c b/hw/virtio/virtio.c
index d0bc72e0e4..939f802110 100644
--- a/hw/virtio/virtio.c
+++ b/hw/virtio/virtio.c
@@ -448,28 +448,59 @@ int virtqueue_avail_bytes(VirtQueue *vq, unsigned int in_bytes,
return in_bytes <= in_total && out_bytes <= out_total;
}
-void virtqueue_map_sg(struct iovec *sg, hwaddr *addr,
- size_t num_sg, int is_write)
+static void virtqueue_map_iovec(struct iovec *sg, hwaddr *addr,
+ unsigned int *num_sg, unsigned int max_size,
+ int is_write)
{
unsigned int i;
hwaddr len;
- if (num_sg > VIRTQUEUE_MAX_SIZE) {
- error_report("virtio: map attempt out of bounds: %zd > %d",
- num_sg, VIRTQUEUE_MAX_SIZE);
- exit(1);
- }
+ /* Note: this function MUST validate input, some callers
+ * are passing in num_sg values received over the network.
+ */
+ /* TODO: teach all callers that this can fail, and return failure instead
+ * of asserting here.
+ * When we do, we might be able to re-enable NDEBUG below.
+ */
+#ifdef NDEBUG
+#error building with NDEBUG is not supported
+#endif
+ assert(*num_sg <= max_size);
- for (i = 0; i < num_sg; i++) {
+ for (i = 0; i < *num_sg; i++) {
len = sg[i].iov_len;
sg[i].iov_base = cpu_physical_memory_map(addr[i], &len, is_write);
- if (sg[i].iov_base == NULL || len != sg[i].iov_len) {
+ if (!sg[i].iov_base) {
error_report("virtio: error trying to map MMIO memory");
exit(1);
}
+ if (len == sg[i].iov_len) {
+ continue;
+ }
+ if (*num_sg >= max_size) {
+ error_report("virtio: memory split makes iovec too large");
+ exit(1);
+ }
+ memmove(sg + i + 1, sg + i, sizeof(*sg) * (*num_sg - i));
+ memmove(addr + i + 1, addr + i, sizeof(*addr) * (*num_sg - i));
+ assert(len < sg[i + 1].iov_len);
+ sg[i].iov_len = len;
+ addr[i + 1] += len;
+ sg[i + 1].iov_len -= len;
+ ++*num_sg;
}
}
+void virtqueue_map(VirtQueueElement *elem)
+{
+ virtqueue_map_iovec(elem->in_sg, elem->in_addr, &elem->in_num,
+ MIN(ARRAY_SIZE(elem->in_sg), ARRAY_SIZE(elem->in_addr)),
+ 1);
+ virtqueue_map_iovec(elem->out_sg, elem->out_addr, &elem->out_num,
+ MIN(ARRAY_SIZE(elem->out_sg), ARRAY_SIZE(elem->out_addr)),
+ 0);
+}
+
int virtqueue_pop(VirtQueue *vq, VirtQueueElement *elem)
{
unsigned int i, head, max;
@@ -531,8 +562,7 @@ int virtqueue_pop(VirtQueue *vq, VirtQueueElement *elem)
} while ((i = virtqueue_next_desc(vdev, desc_pa, i, max)) != max);
/* Now map what we have collected */
- virtqueue_map_sg(elem->in_sg, elem->in_addr, elem->in_num, 1);
- virtqueue_map_sg(elem->out_sg, elem->out_addr, elem->out_num, 0);
+ virtqueue_map(elem);
elem->index = head;
diff --git a/include/hw/i386/pc.h b/include/hw/i386/pc.h
index 7037de044d..606dbc2854 100644
--- a/include/hw/i386/pc.h
+++ b/include/hw/i386/pc.h
@@ -60,7 +60,6 @@ struct PCMachineClass {
/*< public >*/
bool broken_reserved_end;
- bool inter_dimm_gap;
HotplugHandler *(*get_hotplug_handler)(MachineState *machine,
DeviceState *dev);
};
diff --git a/include/hw/mem/pc-dimm.h b/include/hw/mem/pc-dimm.h
index c1ee7b0408..d83bf30ea9 100644
--- a/include/hw/mem/pc-dimm.h
+++ b/include/hw/mem/pc-dimm.h
@@ -83,16 +83,15 @@ typedef struct MemoryHotplugState {
uint64_t pc_dimm_get_free_addr(uint64_t address_space_start,
uint64_t address_space_size,
- uint64_t *hint, uint64_t align, bool gap,
- uint64_t size, Error **errp);
+ uint64_t *hint, uint64_t align, uint64_t size,
+ Error **errp);
int pc_dimm_get_free_slot(const int *hint, int max_slots, Error **errp);
int qmp_pc_dimm_device_list(Object *obj, void *opaque);
uint64_t pc_existing_dimms_capacity(Error **errp);
void pc_dimm_memory_plug(DeviceState *dev, MemoryHotplugState *hpms,
- MemoryRegion *mr, uint64_t align, bool gap,
- Error **errp);
+ MemoryRegion *mr, uint64_t align, Error **errp);
void pc_dimm_memory_unplug(DeviceState *dev, MemoryHotplugState *hpms,
MemoryRegion *mr);
#endif
diff --git a/include/hw/pci/pci.h b/include/hw/pci/pci.h
index f5e7fd818a..379b6e1a45 100644
--- a/include/hw/pci/pci.h
+++ b/include/hw/pci/pci.h
@@ -397,6 +397,7 @@ void pci_for_each_bus_depth_first(PCIBus *bus,
void *(*begin)(PCIBus *bus, void *parent_state),
void (*end)(PCIBus *bus, void *state),
void *parent_state);
+PCIDevice *pci_get_function_0(PCIDevice *pci_dev);
/* Use this wrapper when specific scan order is not required. */
static inline
diff --git a/include/hw/virtio/virtio.h b/include/hw/virtio/virtio.h
index 9d09115fab..205fadf234 100644
--- a/include/hw/virtio/virtio.h
+++ b/include/hw/virtio/virtio.h
@@ -151,8 +151,7 @@ void virtqueue_discard(VirtQueue *vq, const VirtQueueElement *elem,
void virtqueue_fill(VirtQueue *vq, const VirtQueueElement *elem,
unsigned int len, unsigned int idx);
-void virtqueue_map_sg(struct iovec *sg, hwaddr *addr,
- size_t num_sg, int is_write);
+void virtqueue_map(VirtQueueElement *elem);
int virtqueue_pop(VirtQueue *vq, VirtQueueElement *elem);
int virtqueue_avail_bytes(VirtQueue *vq, unsigned int in_bytes,
unsigned int out_bytes);
diff --git a/tests/Makefile b/tests/Makefile
index 1c57e39c53..0739bfe1bf 100644
--- a/tests/Makefile
+++ b/tests/Makefile
@@ -525,6 +525,7 @@ tests/test-qemu-opts$(EXESUF): tests/test-qemu-opts.o $(test-util-obj-y)
tests/test-write-threshold$(EXESUF): tests/test-write-threshold.o $(test-block-obj-y)
tests/test-netfilter$(EXESUF): tests/test-netfilter.o $(qtest-obj-y)
tests/ivshmem-test$(EXESUF): tests/ivshmem-test.o contrib/ivshmem-server/ivshmem-server.o $(libqos-pc-obj-y)
+tests/vhost-user-bridge$(EXESUF): tests/vhost-user-bridge.o
ifeq ($(CONFIG_POSIX),y)
LIBS += -lutil
diff --git a/tests/vhost-user-bridge.c b/tests/vhost-user-bridge.c
new file mode 100644
index 0000000000..fa18ad55fb
--- /dev/null
+++ b/tests/vhost-user-bridge.c
@@ -0,0 +1,1110 @@
+/*
+ * Vhost User Bridge
+ *
+ * Copyright (c) 2015 Red Hat, Inc.
+ *
+ * Authors:
+ * Victor Kaplansky <victork@redhat.com>
+ *
+ * This work is licensed under the terms of the GNU GPL, version 2 or
+ * later. See the COPYING file in the top-level directory.
+ */
+
+/*
+ * TODO:
+ * - main should get parameters from the command line.
+ * - implement all request handlers.
+ * - test for broken requests and virtqueue.
+ * - implement features defined by Virtio 1.0 spec.
+ * - support mergeable buffers and indirect descriptors.
+ * - implement RESET_DEVICE request.
+ * - implement clean shutdown.
+ * - implement non-blocking writes to UDP backend.
+ * - implement polling strategy.
+ */
+
+#include <stddef.h>
+#include <assert.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <stdint.h>
+#include <inttypes.h>
+#include <string.h>
+#include <unistd.h>
+#include <errno.h>
+#include <sys/types.h>
+#include <sys/socket.h>
+#include <sys/un.h>
+#include <sys/unistd.h>
+#include <sys/mman.h>
+#include <sys/eventfd.h>
+#include <arpa/inet.h>
+
+#include <linux/vhost.h>
+
+#include "qemu/atomic.h"
+#include "standard-headers/linux/virtio_net.h"
+#include "standard-headers/linux/virtio_ring.h"
+
+#define VHOST_USER_BRIDGE_DEBUG 1
+
+#define DPRINT(...) \
+ do { \
+ if (VHOST_USER_BRIDGE_DEBUG) { \
+ printf(__VA_ARGS__); \
+ } \
+ } while (0)
+
+typedef void (*CallbackFunc)(int sock, void *ctx);
+
+typedef struct Event {
+ void *ctx;
+ CallbackFunc callback;
+} Event;
+
+typedef struct Dispatcher {
+ int max_sock;
+ fd_set fdset;
+ Event events[FD_SETSIZE];
+} Dispatcher;
+
+static void
+vubr_die(const char *s)
+{
+ perror(s);
+ exit(1);
+}
+
+static int
+dispatcher_init(Dispatcher *dispr)
+{
+ FD_ZERO(&dispr->fdset);
+ dispr->max_sock = -1;
+ return 0;
+}
+
+static int
+dispatcher_add(Dispatcher *dispr, int sock, void *ctx, CallbackFunc cb)
+{
+ if (sock >= FD_SETSIZE) {
+ fprintf(stderr,
+ "Error: Failed to add new event. sock %d should be less than %d\n",
+ sock, FD_SETSIZE);
+ return -1;
+ }
+
+ dispr->events[sock].ctx = ctx;
+ dispr->events[sock].callback = cb;
+
+ FD_SET(sock, &dispr->fdset);
+ if (sock > dispr->max_sock) {
+ dispr->max_sock = sock;
+ }
+ DPRINT("Added sock %d for watching. max_sock: %d\n",
+ sock, dispr->max_sock);
+ return 0;
+}
+
+#if 0
+/* dispatcher_remove() is not currently in use but may be useful
+ * in the future. */
+static int
+dispatcher_remove(Dispatcher *dispr, int sock)
+{
+ if (sock >= FD_SETSIZE) {
+ fprintf(stderr,
+ "Error: Failed to remove event. sock %d should be less than %d\n",
+ sock, FD_SETSIZE);
+ return -1;
+ }
+
+ FD_CLR(sock, &dispr->fdset);
+ return 0;
+}
+#endif
+
+/* timeout in us */
+static int
+dispatcher_wait(Dispatcher *dispr, uint32_t timeout)
+{
+ struct timeval tv;
+ tv.tv_sec = timeout / 1000000;
+ tv.tv_usec = timeout % 1000000;
+
+ fd_set fdset = dispr->fdset;
+
+ /* wait until some of sockets become readable. */
+ int rc = select(dispr->max_sock + 1, &fdset, 0, 0, &tv);
+
+ if (rc == -1) {
+ vubr_die("select");
+ }
+
+ /* Timeout */
+ if (rc == 0) {
+ return 0;
+ }
+
+ /* Now call callback for every ready socket. */
+
+ int sock;
+ for (sock = 0; sock < dispr->max_sock + 1; sock++)
+ if (FD_ISSET(sock, &fdset)) {
+ Event *e = &dispr->events[sock];
+ e->callback(sock, e->ctx);
+ }
+
+ return 0;
+}
+
+typedef struct VubrVirtq {
+ int call_fd;
+ int kick_fd;
+ uint32_t size;
+ uint16_t last_avail_index;
+ uint16_t last_used_index;
+ struct vring_desc *desc;
+ struct vring_avail *avail;
+ struct vring_used *used;
+} VubrVirtq;
+
+/* Based on qemu/hw/virtio/vhost-user.c */
+
+#define VHOST_MEMORY_MAX_NREGIONS 8
+#define VHOST_USER_F_PROTOCOL_FEATURES 30
+
+enum VhostUserProtocolFeature {
+ VHOST_USER_PROTOCOL_F_MQ = 0,
+ VHOST_USER_PROTOCOL_F_LOG_SHMFD = 1,
+ VHOST_USER_PROTOCOL_F_RARP = 2,
+
+ VHOST_USER_PROTOCOL_F_MAX
+};
+
+#define VHOST_USER_PROTOCOL_FEATURE_MASK ((1 << VHOST_USER_PROTOCOL_F_MAX) - 1)
+
+typedef enum VhostUserRequest {
+ VHOST_USER_NONE = 0,
+ VHOST_USER_GET_FEATURES = 1,
+ VHOST_USER_SET_FEATURES = 2,
+ VHOST_USER_SET_OWNER = 3,
+ VHOST_USER_RESET_DEVICE = 4,
+ VHOST_USER_SET_MEM_TABLE = 5,
+ VHOST_USER_SET_LOG_BASE = 6,
+ VHOST_USER_SET_LOG_FD = 7,
+ VHOST_USER_SET_VRING_NUM = 8,
+ VHOST_USER_SET_VRING_ADDR = 9,
+ VHOST_USER_SET_VRING_BASE = 10,
+ VHOST_USER_GET_VRING_BASE = 11,
+ VHOST_USER_SET_VRING_KICK = 12,
+ VHOST_USER_SET_VRING_CALL = 13,
+ VHOST_USER_SET_VRING_ERR = 14,
+ VHOST_USER_GET_PROTOCOL_FEATURES = 15,
+ VHOST_USER_SET_PROTOCOL_FEATURES = 16,
+ VHOST_USER_GET_QUEUE_NUM = 17,
+ VHOST_USER_SET_VRING_ENABLE = 18,
+ VHOST_USER_SEND_RARP = 19,
+ VHOST_USER_MAX
+} VhostUserRequest;
+
+typedef struct VhostUserMemoryRegion {
+ uint64_t guest_phys_addr;
+ uint64_t memory_size;
+ uint64_t userspace_addr;
+ uint64_t mmap_offset;
+} VhostUserMemoryRegion;
+
+typedef struct VhostUserMemory {
+ uint32_t nregions;
+ uint32_t padding;
+ VhostUserMemoryRegion regions[VHOST_MEMORY_MAX_NREGIONS];
+} VhostUserMemory;
+
+typedef struct VhostUserMsg {
+ VhostUserRequest request;
+
+#define VHOST_USER_VERSION_MASK (0x3)
+#define VHOST_USER_REPLY_MASK (0x1<<2)
+ uint32_t flags;
+ uint32_t size; /* the following payload size */
+ union {
+#define VHOST_USER_VRING_IDX_MASK (0xff)
+#define VHOST_USER_VRING_NOFD_MASK (0x1<<8)
+ uint64_t u64;
+ struct vhost_vring_state state;
+ struct vhost_vring_addr addr;
+ VhostUserMemory memory;
+ } payload;
+ int fds[VHOST_MEMORY_MAX_NREGIONS];
+ int fd_num;
+} QEMU_PACKED VhostUserMsg;
+
+#define VHOST_USER_HDR_SIZE offsetof(VhostUserMsg, payload.u64)
+
+/* The version of the protocol we support */
+#define VHOST_USER_VERSION (0x1)
+
+#define MAX_NR_VIRTQUEUE (8)
+
+typedef struct VubrDevRegion {
+ /* Guest Physical address. */
+ uint64_t gpa;
+ /* Memory region size. */
+ uint64_t size;
+ /* QEMU virtual address (userspace). */
+ uint64_t qva;
+ /* Starting offset in our mmaped space. */
+ uint64_t mmap_offset;
+ /* Start address of mmaped space. */
+ uint64_t mmap_addr;
+} VubrDevRegion;
+
+typedef struct VubrDev {
+ int sock;
+ Dispatcher dispatcher;
+ uint32_t nregions;
+ VubrDevRegion regions[VHOST_MEMORY_MAX_NREGIONS];
+ VubrVirtq vq[MAX_NR_VIRTQUEUE];
+ int backend_udp_sock;
+ struct sockaddr_in backend_udp_dest;
+} VubrDev;
+
+static const char *vubr_request_str[] = {
+ [VHOST_USER_NONE] = "VHOST_USER_NONE",
+ [VHOST_USER_GET_FEATURES] = "VHOST_USER_GET_FEATURES",
+ [VHOST_USER_SET_FEATURES] = "VHOST_USER_SET_FEATURES",
+ [VHOST_USER_SET_OWNER] = "VHOST_USER_SET_OWNER",
+ [VHOST_USER_RESET_DEVICE] = "VHOST_USER_RESET_DEVICE",
+ [VHOST_USER_SET_MEM_TABLE] = "VHOST_USER_SET_MEM_TABLE",
+ [VHOST_USER_SET_LOG_BASE] = "VHOST_USER_SET_LOG_BASE",
+ [VHOST_USER_SET_LOG_FD] = "VHOST_USER_SET_LOG_FD",
+ [VHOST_USER_SET_VRING_NUM] = "VHOST_USER_SET_VRING_NUM",
+ [VHOST_USER_SET_VRING_ADDR] = "VHOST_USER_SET_VRING_ADDR",
+ [VHOST_USER_SET_VRING_BASE] = "VHOST_USER_SET_VRING_BASE",
+ [VHOST_USER_GET_VRING_BASE] = "VHOST_USER_GET_VRING_BASE",
+ [VHOST_USER_SET_VRING_KICK] = "VHOST_USER_SET_VRING_KICK",
+ [VHOST_USER_SET_VRING_CALL] = "VHOST_USER_SET_VRING_CALL",
+ [VHOST_USER_SET_VRING_ERR] = "VHOST_USER_SET_VRING_ERR",
+ [VHOST_USER_GET_PROTOCOL_FEATURES] = "VHOST_USER_GET_PROTOCOL_FEATURES",
+ [VHOST_USER_SET_PROTOCOL_FEATURES] = "VHOST_USER_SET_PROTOCOL_FEATURES",
+ [VHOST_USER_GET_QUEUE_NUM] = "VHOST_USER_GET_QUEUE_NUM",
+ [VHOST_USER_SET_VRING_ENABLE] = "VHOST_USER_SET_VRING_ENABLE",
+ [VHOST_USER_SEND_RARP] = "VHOST_USER_SEND_RARP",
+ [VHOST_USER_MAX] = "VHOST_USER_MAX",
+};
+
+static void
+print_buffer(uint8_t *buf, size_t len)
+{
+ int i;
+ printf("Raw buffer:\n");
+ for (i = 0; i < len; i++) {
+ if (i % 16 == 0) {
+ printf("\n");
+ }
+ if (i % 4 == 0) {
+ printf(" ");
+ }
+ printf("%02x ", buf[i]);
+ }
+ printf("\n............................................................\n");
+}
+
+/* Translate guest physical address to our virtual address. */
+static uint64_t
+gpa_to_va(VubrDev *dev, uint64_t guest_addr)
+{
+ int i;
+
+ /* Find matching memory region. */
+ for (i = 0; i < dev->nregions; i++) {
+ VubrDevRegion *r = &dev->regions[i];
+
+ if ((guest_addr >= r->gpa) && (guest_addr < (r->gpa + r->size))) {
+ return guest_addr - r->gpa + r->mmap_addr + r->mmap_offset;
+ }
+ }
+
+ assert(!"address not found in regions");
+ return 0;
+}
+
+/* Translate qemu virtual address to our virtual address. */
+static uint64_t
+qva_to_va(VubrDev *dev, uint64_t qemu_addr)
+{
+ int i;
+
+ /* Find matching memory region. */
+ for (i = 0; i < dev->nregions; i++) {
+ VubrDevRegion *r = &dev->regions[i];
+
+ if ((qemu_addr >= r->qva) && (qemu_addr < (r->qva + r->size))) {
+ return qemu_addr - r->qva + r->mmap_addr + r->mmap_offset;
+ }
+ }
+
+ assert(!"address not found in regions");
+ return 0;
+}
+
+static void
+vubr_message_read(int conn_fd, VhostUserMsg *vmsg)
+{
+ char control[CMSG_SPACE(VHOST_MEMORY_MAX_NREGIONS * sizeof(int))] = { };
+ struct iovec iov = {
+ .iov_base = (char *)vmsg,
+ .iov_len = VHOST_USER_HDR_SIZE,
+ };
+ struct msghdr msg = {
+ .msg_iov = &iov,
+ .msg_iovlen = 1,
+ .msg_control = control,
+ .msg_controllen = sizeof(control),
+ };
+ size_t fd_size;
+ struct cmsghdr *cmsg;
+ int rc;
+
+ rc = recvmsg(conn_fd, &msg, 0);
+
+ if (rc <= 0) {
+ vubr_die("recvmsg");
+ }
+
+ vmsg->fd_num = 0;
+ for (cmsg = CMSG_FIRSTHDR(&msg);
+ cmsg != NULL;
+ cmsg = CMSG_NXTHDR(&msg, cmsg))
+ {
+ if (cmsg->cmsg_level == SOL_SOCKET && cmsg->cmsg_type == SCM_RIGHTS) {
+ fd_size = cmsg->cmsg_len - CMSG_LEN(0);
+ vmsg->fd_num = fd_size / sizeof(int);
+ memcpy(vmsg->fds, CMSG_DATA(cmsg), fd_size);
+ break;
+ }
+ }
+
+ if (vmsg->size > sizeof(vmsg->payload)) {
+ fprintf(stderr,
+ "Error: too big message request: %d, size: vmsg->size: %u, "
+ "while sizeof(vmsg->payload) = %lu\n",
+ vmsg->request, vmsg->size, sizeof(vmsg->payload));
+ exit(1);
+ }
+
+ if (vmsg->size) {
+ rc = read(conn_fd, &vmsg->payload, vmsg->size);
+ if (rc <= 0) {
+ vubr_die("recvmsg");
+ }
+
+ assert(rc == vmsg->size);
+ }
+}
+
+static void
+vubr_message_write(int conn_fd, VhostUserMsg *vmsg)
+{
+ int rc;
+
+ do {
+ rc = write(conn_fd, vmsg, VHOST_USER_HDR_SIZE + vmsg->size);
+ } while (rc < 0 && errno == EINTR);
+
+ if (rc < 0) {
+ vubr_die("write");
+ }
+}
+
+static void
+vubr_backend_udp_sendbuf(VubrDev *dev, uint8_t *buf, size_t len)
+{
+ int slen = sizeof(struct sockaddr_in);
+
+ if (sendto(dev->backend_udp_sock, buf, len, 0,
+ (struct sockaddr *) &dev->backend_udp_dest, slen) == -1) {
+ vubr_die("sendto()");
+ }
+}
+
+static int
+vubr_backend_udp_recvbuf(VubrDev *dev, uint8_t *buf, size_t buflen)
+{
+ int slen = sizeof(struct sockaddr_in);
+ int rc;
+
+ rc = recvfrom(dev->backend_udp_sock, buf, buflen, 0,
+ (struct sockaddr *) &dev->backend_udp_dest,
+ (socklen_t *)&slen);
+ if (rc == -1) {
+ vubr_die("recvfrom()");
+ }
+
+ return rc;
+}
+
+static void
+vubr_consume_raw_packet(VubrDev *dev, uint8_t *buf, uint32_t len)
+{
+ int hdrlen = sizeof(struct virtio_net_hdr_v1);
+
+ if (VHOST_USER_BRIDGE_DEBUG) {
+ print_buffer(buf, len);
+ }
+ vubr_backend_udp_sendbuf(dev, buf + hdrlen, len - hdrlen);
+}
+
+/* Kick the guest if necessary. */
+static void
+vubr_virtqueue_kick(VubrVirtq *vq)
+{
+ if (!(vq->avail->flags & VRING_AVAIL_F_NO_INTERRUPT)) {
+ DPRINT("Kicking the guest...\n");
+ eventfd_write(vq->call_fd, 1);
+ }
+}
+
+static void
+vubr_post_buffer(VubrDev *dev, VubrVirtq *vq, uint8_t *buf, int32_t len)
+{
+ struct vring_desc *desc = vq->desc;
+ struct vring_avail *avail = vq->avail;
+ struct vring_used *used = vq->used;
+
+ unsigned int size = vq->size;
+
+ uint16_t avail_index = atomic_mb_read(&avail->idx);
+
+ /* We check the available descriptors before posting the
+ * buffer, so here we assume that enough available
+ * descriptors. */
+ assert(vq->last_avail_index != avail_index);
+ uint16_t a_index = vq->last_avail_index % size;
+ uint16_t u_index = vq->last_used_index % size;
+ uint16_t d_index = avail->ring[a_index];
+
+ int i = d_index;
+
+ DPRINT("Post packet to guest on vq:\n");
+ DPRINT(" size = %d\n", vq->size);
+ DPRINT(" last_avail_index = %d\n", vq->last_avail_index);
+ DPRINT(" last_used_index = %d\n", vq->last_used_index);
+ DPRINT(" a_index = %d\n", a_index);
+ DPRINT(" u_index = %d\n", u_index);
+ DPRINT(" d_index = %d\n", d_index);
+ DPRINT(" desc[%d].addr = 0x%016"PRIx64"\n", i, desc[i].addr);
+ DPRINT(" desc[%d].len = %d\n", i, desc[i].len);
+ DPRINT(" desc[%d].flags = %d\n", i, desc[i].flags);
+ DPRINT(" avail->idx = %d\n", avail_index);
+ DPRINT(" used->idx = %d\n", used->idx);
+
+ if (!(desc[i].flags & VRING_DESC_F_WRITE)) {
+ /* FIXME: we should find writable descriptor. */
+ fprintf(stderr, "Error: descriptor is not writable. Exiting.\n");
+ exit(1);
+ }
+
+ void *chunk_start = (void *)gpa_to_va(dev, desc[i].addr);
+ uint32_t chunk_len = desc[i].len;
+
+ if (len <= chunk_len) {
+ memcpy(chunk_start, buf, len);
+ } else {
+ fprintf(stderr,
+ "Received too long packet from the backend. Dropping...\n");
+ return;
+ }
+
+ /* Add descriptor to the used ring. */
+ used->ring[u_index].id = d_index;
+ used->ring[u_index].len = len;
+
+ vq->last_avail_index++;
+ vq->last_used_index++;
+
+ atomic_mb_set(&used->idx, vq->last_used_index);
+
+ /* Kick the guest if necessary. */
+ vubr_virtqueue_kick(vq);
+}
+
+static int
+vubr_process_desc(VubrDev *dev, VubrVirtq *vq)
+{
+ struct vring_desc *desc = vq->desc;
+ struct vring_avail *avail = vq->avail;
+ struct vring_used *used = vq->used;
+
+ unsigned int size = vq->size;
+
+ uint16_t a_index = vq->last_avail_index % size;
+ uint16_t u_index = vq->last_used_index % size;
+ uint16_t d_index = avail->ring[a_index];
+
+ uint32_t i, len = 0;
+ size_t buf_size = 4096;
+ uint8_t buf[4096];
+
+ DPRINT("Chunks: ");
+ i = d_index;
+ do {
+ void *chunk_start = (void *)gpa_to_va(dev, desc[i].addr);
+ uint32_t chunk_len = desc[i].len;
+
+ if (len + chunk_len < buf_size) {
+ memcpy(buf + len, chunk_start, chunk_len);
+ DPRINT("%d ", chunk_len);
+ } else {
+ fprintf(stderr, "Error: too long packet. Dropping...\n");
+ break;
+ }
+
+ len += chunk_len;
+
+ if (!(desc[i].flags & VRING_DESC_F_NEXT)) {
+ break;
+ }
+
+ i = desc[i].next;
+ } while (1);
+ DPRINT("\n");
+
+ if (!len) {
+ return -1;
+ }
+
+ /* Add descriptor to the used ring. */
+ used->ring[u_index].id = d_index;
+ used->ring[u_index].len = len;
+
+ vubr_consume_raw_packet(dev, buf, len);
+
+ return 0;
+}
+
+static void
+vubr_process_avail(VubrDev *dev, VubrVirtq *vq)
+{
+ struct vring_avail *avail = vq->avail;
+ struct vring_used *used = vq->used;
+
+ while (vq->last_avail_index != atomic_mb_read(&avail->idx)) {
+ vubr_process_desc(dev, vq);
+ vq->last_avail_index++;
+ vq->last_used_index++;
+ }
+
+ atomic_mb_set(&used->idx, vq->last_used_index);
+}
+
+static void
+vubr_backend_recv_cb(int sock, void *ctx)
+{
+ VubrDev *dev = (VubrDev *) ctx;
+ VubrVirtq *rx_vq = &dev->vq[0];
+ uint8_t buf[4096];
+ struct virtio_net_hdr_v1 *hdr = (struct virtio_net_hdr_v1 *)buf;
+ int hdrlen = sizeof(struct virtio_net_hdr_v1);
+ int buflen = sizeof(buf);
+ int len;
+
+ DPRINT("\n\n *** IN UDP RECEIVE CALLBACK ***\n\n");
+
+ uint16_t avail_index = atomic_mb_read(&rx_vq->avail->idx);
+
+ /* If there is no available descriptors, just do nothing.
+ * The buffer will be handled by next arrived UDP packet,
+ * or next kick on receive virtq. */
+ if (rx_vq->last_avail_index == avail_index) {
+ DPRINT("Got UDP packet, but no available descriptors on RX virtq.\n");
+ return;
+ }
+
+ len = vubr_backend_udp_recvbuf(dev, buf + hdrlen, buflen - hdrlen);
+
+ *hdr = (struct virtio_net_hdr_v1) { };
+ hdr->num_buffers = 1;
+ vubr_post_buffer(dev, rx_vq, buf, len + hdrlen);
+}
+
+static void
+vubr_kick_cb(int sock, void *ctx)
+{
+ VubrDev *dev = (VubrDev *) ctx;
+ eventfd_t kick_data;
+ ssize_t rc;
+
+ rc = eventfd_read(sock, &kick_data);
+ if (rc == -1) {
+ vubr_die("eventfd_read()");
+ } else {
+ DPRINT("Got kick_data: %016"PRIx64"\n", kick_data);
+ vubr_process_avail(dev, &dev->vq[1]);
+ }
+}
+
+static int
+vubr_none_exec(VubrDev *dev, VhostUserMsg *vmsg)
+{
+ DPRINT("Function %s() not implemented yet.\n", __func__);
+ return 0;
+}
+
+static int
+vubr_get_features_exec(VubrDev *dev, VhostUserMsg *vmsg)
+{
+ vmsg->payload.u64 =
+ ((1ULL << VIRTIO_NET_F_MRG_RXBUF) |
+ (1ULL << VIRTIO_NET_F_CTRL_VQ) |
+ (1ULL << VIRTIO_NET_F_CTRL_RX) |
+ (1ULL << VHOST_F_LOG_ALL));
+ vmsg->size = sizeof(vmsg->payload.u64);
+
+ DPRINT("Sending back to guest u64: 0x%016"PRIx64"\n", vmsg->payload.u64);
+
+ /* reply */
+ return 1;
+}
+
+static int
+vubr_set_features_exec(VubrDev *dev, VhostUserMsg *vmsg)
+{
+ DPRINT("u64: 0x%016"PRIx64"\n", vmsg->payload.u64);
+ return 0;
+}
+
+static int
+vubr_set_owner_exec(VubrDev *dev, VhostUserMsg *vmsg)
+{
+ return 0;
+}
+
+static int
+vubr_reset_device_exec(VubrDev *dev, VhostUserMsg *vmsg)
+{
+ DPRINT("Function %s() not implemented yet.\n", __func__);
+ return 0;
+}
+
+static int
+vubr_set_mem_table_exec(VubrDev *dev, VhostUserMsg *vmsg)
+{
+ int i;
+ VhostUserMemory *memory = &vmsg->payload.memory;
+ dev->nregions = memory->nregions;
+
+ DPRINT("Nregions: %d\n", memory->nregions);
+ for (i = 0; i < dev->nregions; i++) {
+ void *mmap_addr;
+ VhostUserMemoryRegion *msg_region = &memory->regions[i];
+ VubrDevRegion *dev_region = &dev->regions[i];
+
+ DPRINT("Region %d\n", i);
+ DPRINT(" guest_phys_addr: 0x%016"PRIx64"\n",
+ msg_region->guest_phys_addr);
+ DPRINT(" memory_size: 0x%016"PRIx64"\n",
+ msg_region->memory_size);
+ DPRINT(" userspace_addr 0x%016"PRIx64"\n",
+ msg_region->userspace_addr);
+ DPRINT(" mmap_offset 0x%016"PRIx64"\n",
+ msg_region->mmap_offset);
+
+ dev_region->gpa = msg_region->guest_phys_addr;
+ dev_region->size = msg_region->memory_size;
+ dev_region->qva = msg_region->userspace_addr;
+ dev_region->mmap_offset = msg_region->mmap_offset;
+
+ /* We don't use offset argument of mmap() since the
+ * mapped address has to be page aligned, and we use huge
+ * pages. */
+ mmap_addr = mmap(0, dev_region->size + dev_region->mmap_offset,
+ PROT_READ | PROT_WRITE, MAP_SHARED,
+ vmsg->fds[i], 0);
+
+ if (mmap_addr == MAP_FAILED) {
+ vubr_die("mmap");
+ }
+
+ dev_region->mmap_addr = (uint64_t) mmap_addr;
+ DPRINT(" mmap_addr: 0x%016"PRIx64"\n", dev_region->mmap_addr);
+ }
+
+ return 0;
+}
+
+static int
+vubr_set_log_base_exec(VubrDev *dev, VhostUserMsg *vmsg)
+{
+ DPRINT("Function %s() not implemented yet.\n", __func__);
+ return 0;
+}
+
+static int
+vubr_set_log_fd_exec(VubrDev *dev, VhostUserMsg *vmsg)
+{
+ DPRINT("Function %s() not implemented yet.\n", __func__);
+ return 0;
+}
+
+static int
+vubr_set_vring_num_exec(VubrDev *dev, VhostUserMsg *vmsg)
+{
+ unsigned int index = vmsg->payload.state.index;
+ unsigned int num = vmsg->payload.state.num;
+
+ DPRINT("State.index: %d\n", index);
+ DPRINT("State.num: %d\n", num);
+ dev->vq[index].size = num;
+ return 0;
+}
+
+static int
+vubr_set_vring_addr_exec(VubrDev *dev, VhostUserMsg *vmsg)
+{
+ struct vhost_vring_addr *vra = &vmsg->payload.addr;
+ unsigned int index = vra->index;
+ VubrVirtq *vq = &dev->vq[index];
+
+ DPRINT("vhost_vring_addr:\n");
+ DPRINT(" index: %d\n", vra->index);
+ DPRINT(" flags: %d\n", vra->flags);
+ DPRINT(" desc_user_addr: 0x%016llx\n", vra->desc_user_addr);
+ DPRINT(" used_user_addr: 0x%016llx\n", vra->used_user_addr);
+ DPRINT(" avail_user_addr: 0x%016llx\n", vra->avail_user_addr);
+ DPRINT(" log_guest_addr: 0x%016llx\n", vra->log_guest_addr);
+
+ vq->desc = (struct vring_desc *)qva_to_va(dev, vra->desc_user_addr);
+ vq->used = (struct vring_used *)qva_to_va(dev, vra->used_user_addr);
+ vq->avail = (struct vring_avail *)qva_to_va(dev, vra->avail_user_addr);
+
+ DPRINT("Setting virtq addresses:\n");
+ DPRINT(" vring_desc at %p\n", vq->desc);
+ DPRINT(" vring_used at %p\n", vq->used);
+ DPRINT(" vring_avail at %p\n", vq->avail);
+
+ vq->last_used_index = vq->used->idx;
+ return 0;
+}
+
+static int
+vubr_set_vring_base_exec(VubrDev *dev, VhostUserMsg *vmsg)
+{
+ unsigned int index = vmsg->payload.state.index;
+ unsigned int num = vmsg->payload.state.num;
+
+ DPRINT("State.index: %d\n", index);
+ DPRINT("State.num: %d\n", num);
+ dev->vq[index].last_avail_index = num;
+
+ return 0;
+}
+
+static int
+vubr_get_vring_base_exec(VubrDev *dev, VhostUserMsg *vmsg)
+{
+ DPRINT("Function %s() not implemented yet.\n", __func__);
+ return 0;
+}
+
+static int
+vubr_set_vring_kick_exec(VubrDev *dev, VhostUserMsg *vmsg)
+{
+ uint64_t u64_arg = vmsg->payload.u64;
+ int index = u64_arg & VHOST_USER_VRING_IDX_MASK;
+
+ DPRINT("u64: 0x%016"PRIx64"\n", vmsg->payload.u64);
+
+ assert((u64_arg & VHOST_USER_VRING_NOFD_MASK) == 0);
+ assert(vmsg->fd_num == 1);
+
+ dev->vq[index].kick_fd = vmsg->fds[0];
+ DPRINT("Got kick_fd: %d for vq: %d\n", vmsg->fds[0], index);
+
+ if (index % 2 == 1) {
+ /* TX queue. */
+ dispatcher_add(&dev->dispatcher, dev->vq[index].kick_fd,
+ dev, vubr_kick_cb);
+
+ DPRINT("Waiting for kicks on fd: %d for vq: %d\n",
+ dev->vq[index].kick_fd, index);
+ }
+ return 0;
+}
+
+static int
+vubr_set_vring_call_exec(VubrDev *dev, VhostUserMsg *vmsg)
+{
+ uint64_t u64_arg = vmsg->payload.u64;
+ int index = u64_arg & VHOST_USER_VRING_IDX_MASK;
+
+ DPRINT("u64: 0x%016"PRIx64"\n", vmsg->payload.u64);
+ assert((u64_arg & VHOST_USER_VRING_NOFD_MASK) == 0);
+ assert(vmsg->fd_num == 1);
+
+ dev->vq[index].call_fd = vmsg->fds[0];
+ DPRINT("Got call_fd: %d for vq: %d\n", vmsg->fds[0], index);
+
+ return 0;
+}
+
+static int
+vubr_set_vring_err_exec(VubrDev *dev, VhostUserMsg *vmsg)
+{
+ DPRINT("u64: 0x%016"PRIx64"\n", vmsg->payload.u64);
+ return 0;
+}
+
+static int
+vubr_get_protocol_features_exec(VubrDev *dev, VhostUserMsg *vmsg)
+{
+ /* FIXME: unimplented */
+ DPRINT("u64: 0x%016"PRIx64"\n", vmsg->payload.u64);
+ return 0;
+}
+
+static int
+vubr_set_protocol_features_exec(VubrDev *dev, VhostUserMsg *vmsg)
+{
+ /* FIXME: unimplented */
+ DPRINT("u64: 0x%016"PRIx64"\n", vmsg->payload.u64);
+ return 0;
+}
+
+static int
+vubr_get_queue_num_exec(VubrDev *dev, VhostUserMsg *vmsg)
+{
+ DPRINT("Function %s() not implemented yet.\n", __func__);
+ return 0;
+}
+
+static int
+vubr_set_vring_enable_exec(VubrDev *dev, VhostUserMsg *vmsg)
+{
+ DPRINT("Function %s() not implemented yet.\n", __func__);
+ return 0;
+}
+
+static int
+vubr_send_rarp_exec(VubrDev *dev, VhostUserMsg *vmsg)
+{
+ DPRINT("Function %s() not implemented yet.\n", __func__);
+ return 0;
+}
+
+static int
+vubr_execute_request(VubrDev *dev, VhostUserMsg *vmsg)
+{
+ /* Print out generic part of the request. */
+ DPRINT(
+ "================== Vhost user message from QEMU ==================\n");
+ DPRINT("Request: %s (%d)\n", vubr_request_str[vmsg->request],
+ vmsg->request);
+ DPRINT("Flags: 0x%x\n", vmsg->flags);
+ DPRINT("Size: %d\n", vmsg->size);
+
+ if (vmsg->fd_num) {
+ int i;
+ DPRINT("Fds:");
+ for (i = 0; i < vmsg->fd_num; i++) {
+ DPRINT(" %d", vmsg->fds[i]);
+ }
+ DPRINT("\n");
+ }
+
+ switch (vmsg->request) {
+ case VHOST_USER_NONE:
+ return vubr_none_exec(dev, vmsg);
+ case VHOST_USER_GET_FEATURES:
+ return vubr_get_features_exec(dev, vmsg);
+ case VHOST_USER_SET_FEATURES:
+ return vubr_set_features_exec(dev, vmsg);
+ case VHOST_USER_SET_OWNER:
+ return vubr_set_owner_exec(dev, vmsg);
+ case VHOST_USER_RESET_DEVICE:
+ return vubr_reset_device_exec(dev, vmsg);
+ case VHOST_USER_SET_MEM_TABLE:
+ return vubr_set_mem_table_exec(dev, vmsg);
+ case VHOST_USER_SET_LOG_BASE:
+ return vubr_set_log_base_exec(dev, vmsg);
+ case VHOST_USER_SET_LOG_FD:
+ return vubr_set_log_fd_exec(dev, vmsg);
+ case VHOST_USER_SET_VRING_NUM:
+ return vubr_set_vring_num_exec(dev, vmsg);
+ case VHOST_USER_SET_VRING_ADDR:
+ return vubr_set_vring_addr_exec(dev, vmsg);
+ case VHOST_USER_SET_VRING_BASE:
+ return vubr_set_vring_base_exec(dev, vmsg);
+ case VHOST_USER_GET_VRING_BASE:
+ return vubr_get_vring_base_exec(dev, vmsg);
+ case VHOST_USER_SET_VRING_KICK:
+ return vubr_set_vring_kick_exec(dev, vmsg);
+ case VHOST_USER_SET_VRING_CALL:
+ return vubr_set_vring_call_exec(dev, vmsg);
+ case VHOST_USER_SET_VRING_ERR:
+ return vubr_set_vring_err_exec(dev, vmsg);
+ case VHOST_USER_GET_PROTOCOL_FEATURES:
+ return vubr_get_protocol_features_exec(dev, vmsg);
+ case VHOST_USER_SET_PROTOCOL_FEATURES:
+ return vubr_set_protocol_features_exec(dev, vmsg);
+ case VHOST_USER_GET_QUEUE_NUM:
+ return vubr_get_queue_num_exec(dev, vmsg);
+ case VHOST_USER_SET_VRING_ENABLE:
+ return vubr_set_vring_enable_exec(dev, vmsg);
+ case VHOST_USER_SEND_RARP:
+ return vubr_send_rarp_exec(dev, vmsg);
+
+ case VHOST_USER_MAX:
+ assert(vmsg->request != VHOST_USER_MAX);
+ }
+ return 0;
+}
+
+static void
+vubr_receive_cb(int sock, void *ctx)
+{
+ VubrDev *dev = (VubrDev *) ctx;
+ VhostUserMsg vmsg;
+ int reply_requested;
+
+ vubr_message_read(sock, &vmsg);
+ reply_requested = vubr_execute_request(dev, &vmsg);
+ if (reply_requested) {
+ /* Set the version in the flags when sending the reply */
+ vmsg.flags &= ~VHOST_USER_VERSION_MASK;
+ vmsg.flags |= VHOST_USER_VERSION;
+ vmsg.flags |= VHOST_USER_REPLY_MASK;
+ vubr_message_write(sock, &vmsg);
+ }
+}
+
+static void
+vubr_accept_cb(int sock, void *ctx)
+{
+ VubrDev *dev = (VubrDev *)ctx;
+ int conn_fd;
+ struct sockaddr_un un;
+ socklen_t len = sizeof(un);
+
+ conn_fd = accept(sock, (struct sockaddr *) &un, &len);
+ if (conn_fd == -1) {
+ vubr_die("accept()");
+ }
+ DPRINT("Got connection from remote peer on sock %d\n", conn_fd);
+ dispatcher_add(&dev->dispatcher, conn_fd, ctx, vubr_receive_cb);
+}
+
+static VubrDev *
+vubr_new(const char *path)
+{
+ VubrDev *dev = (VubrDev *) calloc(1, sizeof(VubrDev));
+ dev->nregions = 0;
+ int i;
+ struct sockaddr_un un;
+ size_t len;
+
+ for (i = 0; i < MAX_NR_VIRTQUEUE; i++) {
+ dev->vq[i] = (VubrVirtq) {
+ .call_fd = -1, .kick_fd = -1,
+ .size = 0,
+ .last_avail_index = 0, .last_used_index = 0,
+ .desc = 0, .avail = 0, .used = 0,
+ };
+ }
+
+ /* Get a UNIX socket. */
+ dev->sock = socket(AF_UNIX, SOCK_STREAM, 0);
+ if (dev->sock == -1) {
+ vubr_die("socket");
+ }
+
+ un.sun_family = AF_UNIX;
+ strcpy(un.sun_path, path);
+ len = sizeof(un.sun_family) + strlen(path);
+ unlink(path);
+
+ if (bind(dev->sock, (struct sockaddr *) &un, len) == -1) {
+ vubr_die("bind");
+ }
+
+ if (listen(dev->sock, 1) == -1) {
+ vubr_die("listen");
+ }
+
+ dispatcher_init(&dev->dispatcher);
+ dispatcher_add(&dev->dispatcher, dev->sock, (void *)dev,
+ vubr_accept_cb);
+
+ DPRINT("Waiting for connections on UNIX socket %s ...\n", path);
+ return dev;
+}
+
+static void
+vubr_backend_udp_setup(VubrDev *dev,
+ const char *local_host,
+ uint16_t local_port,
+ const char *dest_host,
+ uint16_t dest_port)
+{
+ int sock;
+ struct sockaddr_in si_local = {
+ .sin_family = AF_INET,
+ .sin_port = htons(local_port),
+ };
+
+ if (inet_aton(local_host, &si_local.sin_addr) == 0) {
+ fprintf(stderr, "inet_aton() failed.\n");
+ exit(1);
+ }
+
+ /* setup destination for sends */
+ dev->backend_udp_dest = (struct sockaddr_in) {
+ .sin_family = AF_INET,
+ .sin_port = htons(dest_port),
+ };
+ if (inet_aton(dest_host, &dev->backend_udp_dest.sin_addr) == 0) {
+ fprintf(stderr, "inet_aton() failed.\n");
+ exit(1);
+ }
+
+ sock = socket(AF_INET, SOCK_DGRAM, IPPROTO_UDP);
+ if (sock == -1) {
+ vubr_die("socket");
+ }
+
+ if (bind(sock, (struct sockaddr *)&si_local, sizeof(si_local)) == -1) {
+ vubr_die("bind");
+ }
+
+ dev->backend_udp_sock = sock;
+ dispatcher_add(&dev->dispatcher, sock, dev, vubr_backend_recv_cb);
+ DPRINT("Waiting for data from udp backend on %s:%d...\n",
+ local_host, local_port);
+}
+
+static void
+vubr_run(VubrDev *dev)
+{
+ while (1) {
+ /* timeout 200ms */
+ dispatcher_wait(&dev->dispatcher, 200000);
+ /* Here one can try polling strategy. */
+ }
+}
+
+int
+main(int argc, char *argv[])
+{
+ VubrDev *dev;
+
+ dev = vubr_new("/tmp/vubr.sock");
+ if (!dev) {
+ return 1;
+ }
+
+ vubr_backend_udp_setup(dev,
+ "127.0.0.1", 4444,
+ "127.0.0.1", 5555);
+ vubr_run(dev);
+ return 0;
+}
diff --git a/tests/vhost-user-test.c b/tests/vhost-user-test.c
index a74c934cc0..b6dde753f8 100644
--- a/tests/vhost-user-test.c
+++ b/tests/vhost-user-test.c
@@ -98,7 +98,7 @@ typedef struct VhostUserMsg {
struct vhost_vring_state state;
struct vhost_vring_addr addr;
VhostUserMemory memory;
- };
+ } payload;
} QEMU_PACKED VhostUserMsg;
static VhostUserMsg m __attribute__ ((unused));
@@ -242,23 +242,23 @@ static void chr_read(void *opaque, const uint8_t *buf, int size)
case VHOST_USER_GET_FEATURES:
/* send back features to qemu */
msg.flags |= VHOST_USER_REPLY_MASK;
- msg.size = sizeof(m.u64);
- msg.u64 = 0x1ULL << VHOST_F_LOG_ALL |
+ msg.size = sizeof(m.payload.u64);
+ msg.payload.u64 = 0x1ULL << VHOST_F_LOG_ALL |
0x1ULL << VHOST_USER_F_PROTOCOL_FEATURES;
p = (uint8_t *) &msg;
qemu_chr_fe_write_all(chr, p, VHOST_USER_HDR_SIZE + msg.size);
break;
case VHOST_USER_SET_FEATURES:
- g_assert_cmpint(msg.u64 & (0x1ULL << VHOST_USER_F_PROTOCOL_FEATURES),
+ g_assert_cmpint(msg.payload.u64 & (0x1ULL << VHOST_USER_F_PROTOCOL_FEATURES),
!=, 0ULL);
break;
case VHOST_USER_GET_PROTOCOL_FEATURES:
/* send back features to qemu */
msg.flags |= VHOST_USER_REPLY_MASK;
- msg.size = sizeof(m.u64);
- msg.u64 = 1 << VHOST_USER_PROTOCOL_F_LOG_SHMFD;
+ msg.size = sizeof(m.payload.u64);
+ msg.payload.u64 = 1 << VHOST_USER_PROTOCOL_F_LOG_SHMFD;
p = (uint8_t *) &msg;
qemu_chr_fe_write_all(chr, p, VHOST_USER_HDR_SIZE + msg.size);
break;
@@ -266,15 +266,15 @@ static void chr_read(void *opaque, const uint8_t *buf, int size)
case VHOST_USER_GET_VRING_BASE:
/* send back vring base to qemu */
msg.flags |= VHOST_USER_REPLY_MASK;
- msg.size = sizeof(m.state);
- msg.state.num = 0;
+ msg.size = sizeof(m.payload.state);
+ msg.payload.state.num = 0;
p = (uint8_t *) &msg;
qemu_chr_fe_write_all(chr, p, VHOST_USER_HDR_SIZE + msg.size);
break;
case VHOST_USER_SET_MEM_TABLE:
/* received the mem table */
- memcpy(&s->memory, &msg.memory, sizeof(msg.memory));
+ memcpy(&s->memory, &msg.payload.memory, sizeof(msg.payload.memory));
s->fds_num = qemu_chr_fe_get_msgfds(chr, s->fds, G_N_ELEMENTS(s->fds));
/* signal the test that it can continue */
diff --git a/util/mmap-alloc.c b/util/mmap-alloc.c
index 13942694cc..c37acbe58e 100644
--- a/util/mmap-alloc.c
+++ b/util/mmap-alloc.c
@@ -26,7 +26,7 @@ void *qemu_ram_mmap(int fd, size_t size, size_t align, bool shared)
void *ptr1;
if (ptr == MAP_FAILED) {
- return NULL;
+ return MAP_FAILED;
}
/* Make sure align is a power of 2 */
@@ -41,7 +41,7 @@ void *qemu_ram_mmap(int fd, size_t size, size_t align, bool shared)
fd, 0);
if (ptr1 == MAP_FAILED) {
munmap(ptr, total);
- return NULL;
+ return MAP_FAILED;
}
ptr += offset;