aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorPeter Maydell <peter.maydell@linaro.org>2016-06-17 11:25:46 +0100
committerPeter Maydell <peter.maydell@linaro.org>2016-06-17 11:25:46 +0100
commit7263a903c361edd174946a4708374f5aa6c6b834 (patch)
tree1154bb6eb2728a691a9c8562bed718040909891b
parent585fcd4b11070b3220685fc54ecca1991cdeb161 (diff)
parent874a2358307ce9d0466b0f559d33d422f9d4976d (diff)
Merge remote-tracking branch 'remotes/mst/tags/for_upstream' into staging
pc, pci, virtio: new features, cleanups, fixes Beginning of reconnect support for vhost-user. Misc cleanups and fixes. Signed-off-by: Michael S. Tsirkin <mst@redhat.com> # gpg: Signature made Fri 17 Jun 2016 01:28:39 BST # gpg: using RSA key 0x281F0DB8D28D5469 # gpg: Good signature from "Michael S. Tsirkin <mst@kernel.org>" # gpg: aka "Michael S. Tsirkin <mst@redhat.com>" # Primary key fingerprint: 0270 606B 6F3C DF3D 0B17 0970 C350 3912 AFBE 8E67 # Subkey fingerprint: 5D09 FD08 71C8 F85B 94CA 8A0D 281F 0DB8 D28D 5469 * remotes/mst/tags/for_upstream: MAINTAINERS: add Marcel to PCI msi_init: change return value to 0 on success fix some coding style problems pci core: assert ENOSPC when add capability test: start vhost-user reconnect test tests: append i386 tests vhost-net: save & restore vring enable state vhost-net: save & restore vhost-user acked features vhost-net: do not crash if backend is not present vhost-user: disconnect on start failure qemu-char: add qemu_chr_disconnect to close a fd accepted by listen fd tests/vhost-user-bridge: workaround stale vring base tests/vhost-user-bridge: add client mode vhost-user: add ability to know vhost-user backend disconnection pci: fix pci_requester_id() Signed-off-by: Peter Maydell <peter.maydell@linaro.org> Conflicts: tests/Makefile.include
-rw-r--r--MAINTAINERS1
-rw-r--r--hw/i386/kvm/pci-assign.c2
-rw-r--r--hw/net/vhost_net.c45
-rw-r--r--hw/net/vmxnet3.c2
-rw-r--r--hw/pci-bridge/ioh3420.c7
-rw-r--r--hw/pci-bridge/pci_bridge_dev.c4
-rw-r--r--hw/pci-bridge/xio3130_downstream.c6
-rw-r--r--hw/pci-bridge/xio3130_upstream.c3
-rw-r--r--hw/pci/msi.c18
-rw-r--r--hw/pci/pci.c82
-rw-r--r--include/hw/pci/pci.h26
-rw-r--r--include/net/net.h1
-rw-r--r--include/net/vhost-user.h1
-rw-r--r--include/net/vhost_net.h3
-rw-r--r--include/sysemu/char.h7
-rw-r--r--net/vhost-user.c32
-rw-r--r--qemu-char.c8
-rw-r--r--tests/Makefile.include2
-rw-r--r--tests/vhost-user-bridge.c51
-rw-r--r--tests/vhost-user-test.c136
20 files changed, 391 insertions, 46 deletions
diff --git a/MAINTAINERS b/MAINTAINERS
index df990a8ff6..fe2279e648 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -780,6 +780,7 @@ F: hw/ipack/
PCI
M: Michael S. Tsirkin <mst@redhat.com>
+M: Marcel Apfelbaum <marcel@redhat.com>
S: Supported
F: include/hw/pci/*
F: hw/misc/pci-testdev.c
diff --git a/hw/i386/kvm/pci-assign.c b/hw/i386/kvm/pci-assign.c
index dd9e73b6f5..f9c901471d 100644
--- a/hw/i386/kvm/pci-assign.c
+++ b/hw/i386/kvm/pci-assign.c
@@ -1481,7 +1481,7 @@ static int assigned_device_pci_cap_init(PCIDevice *pci_dev, Error **errp)
* error bits, leave the rest. */
status = pci_get_long(pci_dev->config + pos + PCI_X_STATUS);
status &= ~(PCI_X_STATUS_BUS | PCI_X_STATUS_DEVFN);
- status |= pci_requester_id(pci_dev);
+ status |= pci_get_bdf(pci_dev);
status &= ~(PCI_X_STATUS_SPL_DISC | PCI_X_STATUS_UNX_SPL |
PCI_X_STATUS_SPL_ERR);
pci_set_long(pci_dev->config + pos + PCI_X_STATUS, status);
diff --git a/hw/net/vhost_net.c b/hw/net/vhost_net.c
index 6e1032fc18..50f4dcd655 100644
--- a/hw/net/vhost_net.c
+++ b/hw/net/vhost_net.c
@@ -120,6 +120,11 @@ uint64_t vhost_net_get_max_queues(VHostNetState *net)
return net->dev.max_queues;
}
+uint64_t vhost_net_get_acked_features(VHostNetState *net)
+{
+ return net->dev.acked_features;
+}
+
static int vhost_net_get_fd(NetClientState *backend)
{
switch (backend->info->type) {
@@ -136,6 +141,7 @@ struct vhost_net *vhost_net_init(VhostNetOptions *options)
int r;
bool backend_kernel = options->backend_type == VHOST_BACKEND_TYPE_KERNEL;
struct vhost_net *net = g_malloc(sizeof *net);
+ uint64_t features = 0;
if (!options->net_backend) {
fprintf(stderr, "vhost-net requires net backend to be setup\n");
@@ -183,8 +189,21 @@ struct vhost_net *vhost_net_init(VhostNetOptions *options)
goto fail;
}
}
+
/* Set sane init value. Override when guest acks. */
- vhost_net_ack_features(net, 0);
+ if (net->nc->info->type == NET_CLIENT_OPTIONS_KIND_VHOST_USER) {
+ features = vhost_user_get_acked_features(net->nc);
+ if (~net->dev.features & features) {
+ fprintf(stderr, "vhost lacks feature mask %" PRIu64
+ " for backend\n",
+ (uint64_t)(~net->dev.features & features));
+ vhost_dev_cleanup(&net->dev);
+ goto fail;
+ }
+ }
+
+ vhost_net_ack_features(net, features);
+
return net;
fail:
g_free(net);
@@ -310,6 +329,15 @@ int vhost_net_start(VirtIODevice *dev, NetClientState *ncs,
if (r < 0) {
goto err_start;
}
+
+ if (ncs[i].peer->vring_enable) {
+ /* restore vring enable state */
+ r = vhost_set_vring_enable(ncs[i].peer, ncs[i].peer->vring_enable);
+
+ if (r < 0) {
+ goto err_start;
+ }
+ }
}
return 0;
@@ -401,8 +429,15 @@ VHostNetState *get_vhost_net(NetClientState *nc)
int vhost_set_vring_enable(NetClientState *nc, int enable)
{
VHostNetState *net = get_vhost_net(nc);
- const VhostOps *vhost_ops = net->dev.vhost_ops;
+ const VhostOps *vhost_ops;
+ nc->vring_enable = enable;
+
+ if (!net) {
+ return 0;
+ }
+
+ vhost_ops = net->dev.vhost_ops;
if (vhost_ops->vhost_set_vring_enable) {
return vhost_ops->vhost_set_vring_enable(&net->dev, enable);
}
@@ -442,10 +477,16 @@ uint64_t vhost_net_get_features(struct vhost_net *net, uint64_t features)
{
return features;
}
+
void vhost_net_ack_features(struct vhost_net *net, uint64_t features)
{
}
+uint64_t vhost_net_get_acked_features(VHostNetState *net)
+{
+ return 0;
+}
+
bool vhost_net_virtqueue_pending(VHostNetState *net, int idx)
{
return false;
diff --git a/hw/net/vmxnet3.c b/hw/net/vmxnet3.c
index 16645e6c23..d97897670d 100644
--- a/hw/net/vmxnet3.c
+++ b/hw/net/vmxnet3.c
@@ -348,7 +348,7 @@ typedef struct {
/* Interrupt management */
/*
- *This function returns sign whether interrupt line is in asserted state
+ * This function returns sign whether interrupt line is in asserted state
* This depends on the type of interrupt used. For INTX interrupt line will
* be asserted until explicit deassertion, for MSI(X) interrupt line will
* be deasserted automatically due to notification semantics of the MSI(X)
diff --git a/hw/pci-bridge/ioh3420.c b/hw/pci-bridge/ioh3420.c
index 0937fa34be..b4a7806e2e 100644
--- a/hw/pci-bridge/ioh3420.c
+++ b/hw/pci-bridge/ioh3420.c
@@ -106,12 +106,14 @@ static int ioh3420_initfn(PCIDevice *d)
if (rc < 0) {
goto err_bridge;
}
+
rc = msi_init(d, IOH_EP_MSI_OFFSET, IOH_EP_MSI_NR_VECTOR,
IOH_EP_MSI_SUPPORTED_FLAGS & PCI_MSI_FLAGS_64BIT,
IOH_EP_MSI_SUPPORTED_FLAGS & PCI_MSI_FLAGS_MASKBIT);
if (rc < 0) {
goto err_bridge;
}
+
rc = pcie_cap_init(d, IOH_EP_EXP_OFFSET, PCI_EXP_TYPE_ROOT_PORT, p->port);
if (rc < 0) {
goto err_msi;
@@ -120,18 +122,21 @@ static int ioh3420_initfn(PCIDevice *d)
pcie_cap_arifwd_init(d);
pcie_cap_deverr_init(d);
pcie_cap_slot_init(d, s->slot);
+ pcie_cap_root_init(d);
+
pcie_chassis_create(s->chassis);
rc = pcie_chassis_add_slot(s);
if (rc < 0) {
goto err_pcie_cap;
}
- pcie_cap_root_init(d);
+
rc = pcie_aer_init(d, IOH_EP_AER_OFFSET, PCI_ERR_SIZEOF);
if (rc < 0) {
goto err;
}
pcie_aer_root_init(d);
ioh3420_aer_vector_update(d);
+
return 0;
err:
diff --git a/hw/pci-bridge/pci_bridge_dev.c b/hw/pci-bridge/pci_bridge_dev.c
index 7b582e96ac..41ca47b15a 100644
--- a/hw/pci-bridge/pci_bridge_dev.c
+++ b/hw/pci-bridge/pci_bridge_dev.c
@@ -68,10 +68,12 @@ static int pci_bridge_dev_initfn(PCIDevice *dev)
/* MSI is not applicable without SHPC */
bridge_dev->flags &= ~(1 << PCI_BRIDGE_DEV_F_MSI_REQ);
}
+
err = slotid_cap_init(dev, 0, bridge_dev->chassis_nr, 0);
if (err) {
goto slotid_error;
}
+
if ((bridge_dev->flags & (1 << PCI_BRIDGE_DEV_F_MSI_REQ)) &&
msi_nonbroken) {
err = msi_init(dev, 0, 1, true, true);
@@ -79,6 +81,7 @@ static int pci_bridge_dev_initfn(PCIDevice *dev)
goto msi_error;
}
}
+
if (shpc_present(dev)) {
/* TODO: spec recommends using 64 bit prefetcheable BAR.
* Check whether that works well. */
@@ -86,6 +89,7 @@ static int pci_bridge_dev_initfn(PCIDevice *dev)
PCI_BASE_ADDRESS_MEM_TYPE_64, &bridge_dev->bar);
}
return 0;
+
msi_error:
slotid_cap_cleanup(dev);
slotid_error:
diff --git a/hw/pci-bridge/xio3130_downstream.c b/hw/pci-bridge/xio3130_downstream.c
index cf1ee63aba..e6d653de4f 100644
--- a/hw/pci-bridge/xio3130_downstream.c
+++ b/hw/pci-bridge/xio3130_downstream.c
@@ -70,11 +70,13 @@ static int xio3130_downstream_initfn(PCIDevice *d)
if (rc < 0) {
goto err_bridge;
}
+
rc = pci_bridge_ssvid_init(d, XIO3130_SSVID_OFFSET,
XIO3130_SSVID_SVID, XIO3130_SSVID_SSID);
if (rc < 0) {
goto err_bridge;
}
+
rc = pcie_cap_init(d, XIO3130_EXP_OFFSET, PCI_EXP_TYPE_DOWNSTREAM,
p->port);
if (rc < 0) {
@@ -83,12 +85,14 @@ static int xio3130_downstream_initfn(PCIDevice *d)
pcie_cap_flr_init(d);
pcie_cap_deverr_init(d);
pcie_cap_slot_init(d, s->slot);
+ pcie_cap_arifwd_init(d);
+
pcie_chassis_create(s->chassis);
rc = pcie_chassis_add_slot(s);
if (rc < 0) {
goto err_pcie_cap;
}
- pcie_cap_arifwd_init(d);
+
rc = pcie_aer_init(d, XIO3130_AER_OFFSET, PCI_ERR_SIZEOF);
if (rc < 0) {
goto err;
diff --git a/hw/pci-bridge/xio3130_upstream.c b/hw/pci-bridge/xio3130_upstream.c
index 164ef58c46..d97684474f 100644
--- a/hw/pci-bridge/xio3130_upstream.c
+++ b/hw/pci-bridge/xio3130_upstream.c
@@ -66,11 +66,13 @@ static int xio3130_upstream_initfn(PCIDevice *d)
if (rc < 0) {
goto err_bridge;
}
+
rc = pci_bridge_ssvid_init(d, XIO3130_SSVID_OFFSET,
XIO3130_SSVID_SVID, XIO3130_SSVID_SSID);
if (rc < 0) {
goto err_bridge;
}
+
rc = pcie_cap_init(d, XIO3130_EXP_OFFSET, PCI_EXP_TYPE_UPSTREAM,
p->port);
if (rc < 0) {
@@ -78,6 +80,7 @@ static int xio3130_upstream_initfn(PCIDevice *d)
}
pcie_cap_flr_init(d);
pcie_cap_deverr_init(d);
+
rc = pcie_aer_init(d, XIO3130_AER_OFFSET, PCI_ERR_SIZEOF);
if (rc < 0) {
goto err;
diff --git a/hw/pci/msi.c b/hw/pci/msi.c
index a87ef4d752..ed792251dd 100644
--- a/hw/pci/msi.c
+++ b/hw/pci/msi.c
@@ -165,6 +165,21 @@ bool msi_enabled(const PCIDevice *dev)
PCI_MSI_FLAGS_ENABLE);
}
+/*
+ * Make PCI device @dev MSI-capable.
+ * Non-zero @offset puts capability MSI at that offset in PCI config
+ * space.
+ * @nr_vectors is the number of MSI vectors (1, 2, 4, 8, 16 or 32).
+ * If @msi64bit, make the device capable of sending a 64-bit message
+ * address.
+ * If @msi_per_vector_mask, make the device support per-vector masking.
+ * Return 0 on success, return -errno on error.
+ *
+ * -ENOTSUP means lacking msi support for a msi-capable platform.
+ * -EINVAL means capability overlap, happens when @offset is non-zero,
+ * also means a programming error, except device assignment, which can check
+ * if a real HW is broken.
+ */
int msi_init(struct PCIDevice *dev, uint8_t offset,
unsigned int nr_vectors, bool msi64bit, bool msi_per_vector_mask)
{
@@ -220,7 +235,8 @@ int msi_init(struct PCIDevice *dev, uint8_t offset,
pci_set_long(dev->wmask + msi_mask_off(dev, msi64bit),
0xffffffff >> (PCI_MSI_VECTORS_MAX - nr_vectors));
}
- return config_offset;
+
+ return 0;
}
void msi_uninit(struct PCIDevice *dev)
diff --git a/hw/pci/pci.c b/hw/pci/pci.c
index bb605efae0..4b585f47b6 100644
--- a/hw/pci/pci.c
+++ b/hw/pci/pci.c
@@ -836,6 +836,81 @@ static void do_pci_unregister_device(PCIDevice *pci_dev)
address_space_destroy(&pci_dev->bus_master_as);
}
+/* Extract PCIReqIDCache into BDF format */
+static uint16_t pci_req_id_cache_extract(PCIReqIDCache *cache)
+{
+ uint8_t bus_n;
+ uint16_t result;
+
+ switch (cache->type) {
+ case PCI_REQ_ID_BDF:
+ result = pci_get_bdf(cache->dev);
+ break;
+ case PCI_REQ_ID_SECONDARY_BUS:
+ bus_n = pci_bus_num(cache->dev->bus);
+ result = PCI_BUILD_BDF(bus_n, 0);
+ break;
+ default:
+ error_printf("Invalid PCI requester ID cache type: %d\n",
+ cache->type);
+ exit(1);
+ break;
+ }
+
+ return result;
+}
+
+/* Parse bridges up to the root complex and return requester ID
+ * cache for specific device. For full PCIe topology, the cache
+ * result would be exactly the same as getting BDF of the device.
+ * However, several tricks are required when system mixed up with
+ * legacy PCI devices and PCIe-to-PCI bridges.
+ *
+ * Here we cache the proxy device (and type) not requester ID since
+ * bus number might change from time to time.
+ */
+static PCIReqIDCache pci_req_id_cache_get(PCIDevice *dev)
+{
+ PCIDevice *parent;
+ PCIReqIDCache cache = {
+ .dev = dev,
+ .type = PCI_REQ_ID_BDF,
+ };
+
+ while (!pci_bus_is_root(dev->bus)) {
+ /* We are under PCI/PCIe bridges */
+ parent = dev->bus->parent_dev;
+ if (pci_is_express(parent)) {
+ if (pcie_cap_get_type(parent) == PCI_EXP_TYPE_PCI_BRIDGE) {
+ /* When we pass through PCIe-to-PCI/PCIX bridges, we
+ * override the requester ID using secondary bus
+ * number of parent bridge with zeroed devfn
+ * (pcie-to-pci bridge spec chap 2.3). */
+ cache.type = PCI_REQ_ID_SECONDARY_BUS;
+ cache.dev = dev;
+ }
+ } else {
+ /* Legacy PCI, override requester ID with the bridge's
+ * BDF upstream. When the root complex connects to
+ * legacy PCI devices (including buses), it can only
+ * obtain requester ID info from directly attached
+ * devices. If devices are attached under bridges, only
+ * the requester ID of the bridge that is directly
+ * attached to the root complex can be recognized. */
+ cache.type = PCI_REQ_ID_BDF;
+ cache.dev = parent;
+ }
+ dev = parent;
+ }
+
+ return cache;
+}
+
+uint16_t pci_requester_id(PCIDevice *dev)
+{
+ return pci_req_id_cache_extract(&dev->requester_id_cache);
+}
+
/* -1 for devfn means auto assign */
static PCIDevice *do_pci_register_device(PCIDevice *pci_dev, PCIBus *bus,
const char *name, int devfn,
@@ -885,6 +960,7 @@ static PCIDevice *do_pci_register_device(PCIDevice *pci_dev, PCIBus *bus,
}
pci_dev->devfn = devfn;
+ pci_dev->requester_id_cache = pci_req_id_cache_get(pci_dev);
dma_as = pci_device_iommu_address_space(pci_dev);
memory_region_init_alias(&pci_dev->bus_master_enable_region,
@@ -2152,10 +2228,8 @@ int pci_add_capability2(PCIDevice *pdev, uint8_t cap_id,
if (!offset) {
offset = pci_find_space(pdev, size);
- if (!offset) {
- error_setg(errp, "out of PCI config space");
- return -ENOSPC;
- }
+ /* out of PCI config space is programming error */
+ assert(offset);
} else {
/* Verify that capabilities don't overlap. Note: device assignment
* depends on this check to verify that the device is not broken.
diff --git a/include/hw/pci/pci.h b/include/hw/pci/pci.h
index 4420f47598..9ed1624f09 100644
--- a/include/hw/pci/pci.h
+++ b/include/hw/pci/pci.h
@@ -15,6 +15,7 @@
#define PCI_DEVFN(slot, func) ((((slot) & 0x1f) << 3) | ((func) & 0x07))
#define PCI_SLOT(devfn) (((devfn) >> 3) & 0x1f)
#define PCI_FUNC(devfn) ((devfn) & 0x07)
+#define PCI_BUILD_BDF(bus, devfn) ((bus << 8) | (devfn))
#define PCI_SLOT_MAX 32
#define PCI_FUNC_MAX 8
@@ -230,6 +231,20 @@ typedef void (*MSIVectorPollNotifier)(PCIDevice *dev,
unsigned int vector_start,
unsigned int vector_end);
+enum PCIReqIDType {
+ PCI_REQ_ID_INVALID = 0,
+ PCI_REQ_ID_BDF,
+ PCI_REQ_ID_SECONDARY_BUS,
+ PCI_REQ_ID_MAX,
+};
+typedef enum PCIReqIDType PCIReqIDType;
+
+struct PCIReqIDCache {
+ PCIDevice *dev;
+ PCIReqIDType type;
+};
+typedef struct PCIReqIDCache PCIReqIDCache;
+
struct PCIDevice {
DeviceState qdev;
@@ -252,6 +267,11 @@ struct PCIDevice {
/* the following fields are read only */
PCIBus *bus;
int32_t devfn;
+ /* Cached device to fetch requester ID from, to avoid the PCI
+ * tree walking every time we invoke PCI request (e.g.,
+ * MSI). For conventional PCI root complex, this field is
+ * meaningless. */
+ PCIReqIDCache requester_id_cache;
char name[64];
PCIIORegion io_regions[PCI_NUM_REGIONS];
AddressSpace bus_master_as;
@@ -692,11 +712,13 @@ static inline uint32_t pci_config_size(const PCIDevice *d)
return pci_is_express(d) ? PCIE_CONFIG_SPACE_SIZE : PCI_CONFIG_SPACE_SIZE;
}
-static inline uint16_t pci_requester_id(PCIDevice *dev)
+static inline uint16_t pci_get_bdf(PCIDevice *dev)
{
- return (pci_bus_num(dev->bus) << 8) | dev->devfn;
+ return PCI_BUILD_BDF(pci_bus_num(dev->bus), dev->devfn);
}
+uint16_t pci_requester_id(PCIDevice *dev);
+
/* DMA access functions */
static inline AddressSpace *pci_get_address_space(PCIDevice *dev)
{
diff --git a/include/net/net.h b/include/net/net.h
index a69e382ba7..a5c5095154 100644
--- a/include/net/net.h
+++ b/include/net/net.h
@@ -99,6 +99,7 @@ struct NetClientState {
NetClientDestructor *destructor;
unsigned int queue_index;
unsigned rxfilter_notify_enabled:1;
+ int vring_enable;
QTAILQ_HEAD(NetFilterHead, NetFilterState) filters;
};
diff --git a/include/net/vhost-user.h b/include/net/vhost-user.h
index 85109f63aa..efae35d57e 100644
--- a/include/net/vhost-user.h
+++ b/include/net/vhost-user.h
@@ -13,5 +13,6 @@
struct vhost_net;
struct vhost_net *vhost_user_get_vhost_net(NetClientState *nc);
+uint64_t vhost_user_get_acked_features(NetClientState *nc);
#endif /* VHOST_USER_H_ */
diff --git a/include/net/vhost_net.h b/include/net/vhost_net.h
index 3389b410d8..0bd48770d8 100644
--- a/include/net/vhost_net.h
+++ b/include/net/vhost_net.h
@@ -31,4 +31,7 @@ int vhost_net_notify_migration_done(VHostNetState *net, char* mac_addr);
VHostNetState *get_vhost_net(NetClientState *nc);
int vhost_set_vring_enable(NetClientState * nc, int enable);
+
+uint64_t vhost_net_get_acked_features(VHostNetState *net);
+
#endif
diff --git a/include/sysemu/char.h b/include/sysemu/char.h
index 372a6fd40b..1eb2d0f309 100644
--- a/include/sysemu/char.h
+++ b/include/sysemu/char.h
@@ -75,6 +75,7 @@ struct CharDriverState {
IOReadHandler *chr_read;
void *handler_opaque;
void (*chr_close)(struct CharDriverState *chr);
+ void (*chr_disconnect)(struct CharDriverState *chr);
void (*chr_accept_input)(struct CharDriverState *chr);
void (*chr_set_echo)(struct CharDriverState *chr, bool echo);
void (*chr_set_fe_open)(struct CharDriverState *chr, int fe_open);
@@ -143,6 +144,12 @@ void qemu_chr_parse_common(QemuOpts *opts, ChardevCommon *backend);
*/
CharDriverState *qemu_chr_new(const char *label, const char *filename,
void (*init)(struct CharDriverState *s));
+/**
+ * @qemu_chr_disconnect:
+ *
+ * Close a fd accpeted by character backend.
+ */
+void qemu_chr_disconnect(CharDriverState *chr);
/**
* @qemu_chr_new_noreplay:
diff --git a/net/vhost-user.c b/net/vhost-user.c
index 1b9e73a2dc..d72ce9b490 100644
--- a/net/vhost-user.c
+++ b/net/vhost-user.c
@@ -22,6 +22,8 @@ typedef struct VhostUserState {
NetClientState nc;
CharDriverState *chr;
VHostNetState *vhost_net;
+ int watch;
+ uint64_t acked_features;
} VhostUserState;
typedef struct VhostUserChardevProps {
@@ -36,6 +38,13 @@ VHostNetState *vhost_user_get_vhost_net(NetClientState *nc)
return s->vhost_net;
}
+uint64_t vhost_user_get_acked_features(NetClientState *nc)
+{
+ VhostUserState *s = DO_UPCAST(VhostUserState, nc, nc);
+ assert(nc->info->type == NET_CLIENT_OPTIONS_KIND_VHOST_USER);
+ return s->acked_features;
+}
+
static int vhost_user_running(VhostUserState *s)
{
return (s->vhost_net) ? 1 : 0;
@@ -55,6 +64,8 @@ static void vhost_user_stop(int queues, NetClientState *ncs[])
}
if (s->vhost_net) {
+ /* save acked features */
+ s->acked_features = vhost_net_get_acked_features(s->vhost_net);
vhost_net_cleanup(s->vhost_net);
s->vhost_net = NULL;
}
@@ -167,6 +178,20 @@ static NetClientInfo net_vhost_user_info = {
.has_ufo = vhost_user_has_ufo,
};
+static gboolean net_vhost_user_watch(GIOChannel *chan, GIOCondition cond,
+ void *opaque)
+{
+ VhostUserState *s = opaque;
+ uint8_t buf[1];
+
+ /* We don't actually want to read anything, but CHR_EVENT_CLOSED will be
+ * raised as a side-effect of the read.
+ */
+ qemu_chr_fe_read_all(s->chr, buf, sizeof(buf));
+
+ return FALSE;
+}
+
static void net_vhost_user_event(void *opaque, int event)
{
const char *name = opaque;
@@ -184,14 +209,19 @@ static void net_vhost_user_event(void *opaque, int event)
trace_vhost_user_event(s->chr->label, event);
switch (event) {
case CHR_EVENT_OPENED:
+ s->watch = qemu_chr_fe_add_watch(s->chr, G_IO_HUP,
+ net_vhost_user_watch, s);
if (vhost_user_start(queues, ncs) < 0) {
- exit(1);
+ qemu_chr_disconnect(s->chr);
+ return;
}
qmp_set_link(name, true, &err);
break;
case CHR_EVENT_CLOSED:
qmp_set_link(name, false, &err);
vhost_user_stop(queues, ncs);
+ g_source_remove(s->watch);
+ s->watch = 0;
break;
}
diff --git a/qemu-char.c b/qemu-char.c
index c926e9a379..84f49acbac 100644
--- a/qemu-char.c
+++ b/qemu-char.c
@@ -4011,6 +4011,13 @@ void qemu_chr_fe_release(CharDriverState *s)
s->avail_connections++;
}
+void qemu_chr_disconnect(CharDriverState *chr)
+{
+ if (chr->chr_disconnect) {
+ chr->chr_disconnect(chr);
+ }
+}
+
static void qemu_chr_free_common(CharDriverState *chr)
{
g_free(chr->filename);
@@ -4388,6 +4395,7 @@ static CharDriverState *qmp_chardev_open_socket(const char *id,
chr->chr_write = tcp_chr_write;
chr->chr_sync_read = tcp_chr_sync_read;
chr->chr_close = tcp_chr_close;
+ chr->chr_disconnect = tcp_chr_disconnect;
chr->get_msgfds = tcp_get_msgfds;
chr->set_msgfds = tcp_set_msgfds;
chr->chr_add_client = tcp_chr_add_client;
diff --git a/tests/Makefile.include b/tests/Makefile.include
index 6135875c37..bf620b8dd4 100644
--- a/tests/Makefile.include
+++ b/tests/Makefile.include
@@ -235,7 +235,7 @@ check-qtest-i386-y += tests/test-netfilter$(EXESUF)
check-qtest-i386-y += tests/test-filter-mirror$(EXESUF)
check-qtest-i386-y += tests/test-filter-redirector$(EXESUF)
check-qtest-i386-y += tests/postcopy-test$(EXESUF)
-check-qtest-x86_64-y = $(check-qtest-i386-y)
+check-qtest-x86_64-y += $(check-qtest-i386-y)
gcov-files-i386-y += i386-softmmu/hw/timer/mc146818rtc.c
gcov-files-x86_64-y = $(subst i386-softmmu/,x86_64-softmmu/,$(gcov-files-i386-y))
check-qtest-mips-y = tests/endianness-test$(EXESUF)
diff --git a/tests/vhost-user-bridge.c b/tests/vhost-user-bridge.c
index 57c8f91c5e..45fa2b6148 100644
--- a/tests/vhost-user-bridge.c
+++ b/tests/vhost-user-bridge.c
@@ -945,6 +945,13 @@ vubr_set_vring_addr_exec(VubrDev *dev, VhostUserMsg *vmsg)
DPRINT(" vring_avail at %p\n", vq->avail);
vq->last_used_index = vq->used->idx;
+
+ if (vq->last_avail_index != vq->used->idx) {
+ DPRINT("Last avail index != used index: %d != %d, resuming",
+ vq->last_avail_index, vq->used->idx);
+ vq->last_avail_index = vq->used->idx;
+ }
+
return 0;
}
@@ -1203,12 +1210,13 @@ vubr_accept_cb(int sock, void *ctx)
}
static VubrDev *
-vubr_new(const char *path)
+vubr_new(const char *path, bool client)
{
VubrDev *dev = (VubrDev *) calloc(1, sizeof(VubrDev));
dev->nregions = 0;
int i;
struct sockaddr_un un;
+ CallbackFunc cb;
size_t len;
for (i = 0; i < MAX_NR_VIRTQUEUE; i++) {
@@ -1237,21 +1245,30 @@ vubr_new(const char *path)
un.sun_family = AF_UNIX;
strcpy(un.sun_path, path);
len = sizeof(un.sun_family) + strlen(path);
- unlink(path);
- if (bind(dev->sock, (struct sockaddr *) &un, len) == -1) {
- vubr_die("bind");
- }
+ if (!client) {
+ unlink(path);
+
+ if (bind(dev->sock, (struct sockaddr *) &un, len) == -1) {
+ vubr_die("bind");
+ }
+
+ if (listen(dev->sock, 1) == -1) {
+ vubr_die("listen");
+ }
+ cb = vubr_accept_cb;
- if (listen(dev->sock, 1) == -1) {
- vubr_die("listen");
+ DPRINT("Waiting for connections on UNIX socket %s ...\n", path);
+ } else {
+ if (connect(dev->sock, (struct sockaddr *)&un, len) == -1) {
+ vubr_die("connect");
+ }
+ cb = vubr_receive_cb;
}
dispatcher_init(&dev->dispatcher);
- dispatcher_add(&dev->dispatcher, dev->sock, (void *)dev,
- vubr_accept_cb);
+ dispatcher_add(&dev->dispatcher, dev->sock, (void *)dev, cb);
- DPRINT("Waiting for connections on UNIX socket %s ...\n", path);
return dev;
}
@@ -1368,8 +1385,9 @@ main(int argc, char *argv[])
{
VubrDev *dev;
int opt;
+ bool client = false;
- while ((opt = getopt(argc, argv, "l:r:u:")) != -1) {
+ while ((opt = getopt(argc, argv, "l:r:u:c")) != -1) {
switch (opt) {
case 'l':
@@ -1385,16 +1403,20 @@ main(int argc, char *argv[])
case 'u':
ud_socket_path = strdup(optarg);
break;
+ case 'c':
+ client = true;
+ break;
default:
goto out;
}
}
- DPRINT("ud socket: %s\n", ud_socket_path);
+ DPRINT("ud socket: %s (%s)\n", ud_socket_path,
+ client ? "client" : "server");
DPRINT("local: %s:%s\n", lhost, lport);
DPRINT("remote: %s:%s\n", rhost, rport);
- dev = vubr_new(ud_socket_path);
+ dev = vubr_new(ud_socket_path, client);
if (!dev) {
return 1;
}
@@ -1405,13 +1427,14 @@ main(int argc, char *argv[])
out:
fprintf(stderr, "Usage: %s ", argv[0]);
- fprintf(stderr, "[-u ud_socket_path] [-l lhost:lport] [-r rhost:rport]\n");
+ fprintf(stderr, "[-c] [-u ud_socket_path] [-l lhost:lport] [-r rhost:rport]\n");
fprintf(stderr, "\t-u path to unix doman socket. default: %s\n",
DEFAULT_UD_SOCKET);
fprintf(stderr, "\t-l local host and port. default: %s:%s\n",
DEFAULT_LHOST, DEFAULT_LPORT);
fprintf(stderr, "\t-r remote host and port. default: %s:%s\n",
DEFAULT_RHOST, DEFAULT_RPORT);
+ fprintf(stderr, "\t-c client mode\n");
return 1;
}
diff --git a/tests/vhost-user-test.c b/tests/vhost-user-test.c
index b2b1665ab8..8b2164b99d 100644
--- a/tests/vhost-user-test.c
+++ b/tests/vhost-user-test.c
@@ -32,7 +32,7 @@
#define QEMU_CMD_ACCEL " -machine accel=tcg"
#define QEMU_CMD_MEM " -m %d -object memory-backend-file,id=mem,size=%dM,"\
"mem-path=%s,share=on -numa node,memdev=mem"
-#define QEMU_CMD_CHR " -chardev socket,id=%s,path=%s"
+#define QEMU_CMD_CHR " -chardev socket,id=%s,path=%s%s"
#define QEMU_CMD_NETDEV " -netdev vhost-user,id=net0,chardev=%s,vhostforce"
#define QEMU_CMD_NET " -device virtio-net-pci,netdev=net0,romfile=./pc-bios/pxe-virtio.rom"
@@ -244,7 +244,12 @@ static void chr_read(void *opaque, const uint8_t *buf, int size)
if (msg.size) {
p += VHOST_USER_HDR_SIZE;
- g_assert_cmpint(qemu_chr_fe_read_all(chr, p, msg.size), ==, msg.size);
+ size = qemu_chr_fe_read_all(chr, p, msg.size);
+ if (size != msg.size) {
+ g_test_message("Wrong message size received %d != %d\n",
+ size, msg.size);
+ return;
+ }
}
switch (msg.request) {
@@ -361,17 +366,10 @@ static const char *init_hugepagefs(const char *path)
static TestServer *test_server_new(const gchar *name)
{
TestServer *server = g_new0(TestServer, 1);
- gchar *chr_path;
server->socket_path = g_strdup_printf("%s/%s.sock", tmpfs, name);
server->mig_path = g_strdup_printf("%s/%s.mig", tmpfs, name);
-
- chr_path = g_strdup_printf("unix:%s,server,nowait", server->socket_path);
server->chr_name = g_strdup_printf("chr-%s", name);
- server->chr = qemu_chr_new(server->chr_name, chr_path, NULL);
- g_free(chr_path);
-
- qemu_chr_add_handlers(server->chr, chr_can_read, chr_read, NULL, server);
g_mutex_init(&server->data_mutex);
g_cond_init(&server->data_cond);
@@ -381,13 +379,34 @@ static TestServer *test_server_new(const gchar *name)
return server;
}
-#define GET_QEMU_CMD(s) \
- g_strdup_printf(QEMU_CMD, 512, 512, (root), (s)->chr_name, \
- (s)->socket_path, (s)->chr_name)
+static void test_server_create_chr(TestServer *server, const gchar *opt)
+{
+ gchar *chr_path;
-#define GET_QEMU_CMDE(s, mem, extra, ...) \
- g_strdup_printf(QEMU_CMD extra, (mem), (mem), (root), (s)->chr_name, \
- (s)->socket_path, (s)->chr_name, ##__VA_ARGS__)
+ chr_path = g_strdup_printf("unix:%s%s", server->socket_path, opt);
+ server->chr = qemu_chr_new(server->chr_name, chr_path, NULL);
+ g_free(chr_path);
+
+ qemu_chr_add_handlers(server->chr, chr_can_read, chr_read, NULL, server);
+}
+
+static void test_server_listen(TestServer *server)
+{
+ test_server_create_chr(server, ",server,nowait");
+}
+
+static inline void test_server_connect(TestServer *server)
+{
+ test_server_create_chr(server, ",reconnect=1");
+}
+
+#define GET_QEMU_CMD(s) \
+ g_strdup_printf(QEMU_CMD, 512, 512, (root), (s)->chr_name, \
+ (s)->socket_path, "", (s)->chr_name)
+
+#define GET_QEMU_CMDE(s, mem, chr_opts, extra, ...) \
+ g_strdup_printf(QEMU_CMD extra, (mem), (mem), (root), (s)->chr_name, \
+ (s)->socket_path, (chr_opts), (s)->chr_name, ##__VA_ARGS__)
static gboolean _test_server_free(TestServer *server)
{
@@ -535,7 +554,10 @@ static void test_migrate(void)
guint8 *log;
guint64 size;
- cmd = GET_QEMU_CMDE(s, 2, "");
+ test_server_listen(s);
+ test_server_listen(dest);
+
+ cmd = GET_QEMU_CMDE(s, 2, "", "");
from = qtest_start(cmd);
g_free(cmd);
@@ -543,7 +565,7 @@ static void test_migrate(void)
size = get_log_size(s);
g_assert_cmpint(size, ==, (2 * 1024 * 1024) / (VHOST_LOG_PAGE * 8));
- cmd = GET_QEMU_CMDE(dest, 2, " -incoming %s", uri);
+ cmd = GET_QEMU_CMDE(dest, 2, "", " -incoming %s", uri);
to = qtest_init(cmd);
g_free(cmd);
@@ -603,6 +625,80 @@ static void test_migrate(void)
global_qtest = global;
}
+#ifdef CONFIG_HAS_GLIB_SUBPROCESS_TESTS
+static void wait_for_rings_started(TestServer *s, size_t count)
+{
+ gint64 end_time;
+
+ g_mutex_lock(&s->data_mutex);
+ end_time = g_get_monotonic_time() + 5 * G_TIME_SPAN_SECOND;
+ while (ctpop64(s->rings) != count) {
+ if (!g_cond_wait_until(&s->data_cond, &s->data_mutex, end_time)) {
+ /* timeout has passed */
+ g_assert_cmpint(ctpop64(s->rings), ==, count);
+ break;
+ }
+ }
+
+ g_mutex_unlock(&s->data_mutex);
+}
+
+static gboolean
+reconnect_cb(gpointer user_data)
+{
+ TestServer *s = user_data;
+
+ qemu_chr_disconnect(s->chr);
+
+ return FALSE;
+}
+
+static gpointer
+connect_thread(gpointer data)
+{
+ TestServer *s = data;
+
+ /* wait for qemu to start before first try, to avoid extra warnings */
+ g_usleep(G_USEC_PER_SEC);
+ test_server_connect(s);
+
+ return NULL;
+}
+
+static void test_reconnect_subprocess(void)
+{
+ TestServer *s = test_server_new("reconnect");
+ char *cmd;
+
+ g_thread_new("connect", connect_thread, s);
+ cmd = GET_QEMU_CMDE(s, 2, ",server", "");
+ qtest_start(cmd);
+ g_free(cmd);
+
+ wait_for_fds(s);
+ wait_for_rings_started(s, 2);
+
+ /* reconnect */
+ s->fds_num = 0;
+ s->rings = 0;
+ g_idle_add(reconnect_cb, s);
+ wait_for_fds(s);
+ wait_for_rings_started(s, 2);
+
+ qtest_end();
+ test_server_free(s);
+ return;
+}
+
+static void test_reconnect(void)
+{
+ gchar *path = g_strdup_printf("/%s/vhost-user/reconnect/subprocess",
+ qtest_get_arch());
+ g_test_trap_subprocess(path, 0, 0);
+ g_test_trap_assert_passed();
+}
+#endif
+
int main(int argc, char **argv)
{
QTestState *s = NULL;
@@ -634,6 +730,7 @@ int main(int argc, char **argv)
}
server = test_server_new("test");
+ test_server_listen(server);
loop = g_main_loop_new(NULL, FALSE);
/* run the main loop thread so the chardev may operate */
@@ -646,6 +743,11 @@ int main(int argc, char **argv)
qtest_add_data_func("/vhost-user/read-guest-mem", server, read_guest_mem);
qtest_add_func("/vhost-user/migrate", test_migrate);
+#ifdef CONFIG_HAS_GLIB_SUBPROCESS_TESTS
+ qtest_add_func("/vhost-user/reconnect/subprocess",
+ test_reconnect_subprocess);
+ qtest_add_func("/vhost-user/reconnect", test_reconnect);
+#endif
ret = g_test_run();