aboutsummaryrefslogtreecommitdiff
path: root/hw
diff options
context:
space:
mode:
Diffstat (limited to 'hw')
-rw-r--r--hw/ide/ahci.c9
-rw-r--r--hw/ide/ahci.h1
-rw-r--r--hw/ide/atapi.c29
-rw-r--r--hw/ide/core.c27
-rw-r--r--hw/ide/internal.h1
-rw-r--r--hw/ide/macio.c40
-rw-r--r--hw/pci.c38
-rw-r--r--hw/pcie.c12
-rw-r--r--hw/pcie_aer.c9
-rw-r--r--hw/scsi-disk.c17
-rw-r--r--hw/sh_pci.c2
-rw-r--r--hw/vhost.c74
-rw-r--r--hw/vhost.h2
-rw-r--r--hw/vhost_net.c16
-rw-r--r--hw/virtio-blk.c20
-rw-r--r--hw/xen_disk.c5
16 files changed, 252 insertions, 50 deletions
diff --git a/hw/ide/ahci.c b/hw/ide/ahci.c
index 29521babf7..f4fa1545bd 100644
--- a/hw/ide/ahci.c
+++ b/hw/ide/ahci.c
@@ -710,6 +710,7 @@ static void ncq_cb(void *opaque, int ret)
DPRINTF(ncq_tfs->drive->port_no, "NCQ transfer tag %d finished\n",
ncq_tfs->tag);
+ bdrv_acct_done(ncq_tfs->drive->port.ifs[0].bs, &ncq_tfs->acct);
qemu_sglist_destroy(&ncq_tfs->sglist);
ncq_tfs->used = 0;
}
@@ -756,6 +757,10 @@ static void process_ncq_command(AHCIState *s, int port, uint8_t *cmd_fis,
ncq_tfs->is_read = 1;
DPRINTF(port, "tag %d aio read %ld\n", ncq_tfs->tag, ncq_tfs->lba);
+
+ bdrv_acct_start(ncq_tfs->drive->port.ifs[0].bs, &ncq_tfs->acct,
+ (ncq_tfs->sector_count-1) * BDRV_SECTOR_SIZE,
+ BDRV_ACCT_READ);
ncq_tfs->aiocb = dma_bdrv_read(ncq_tfs->drive->port.ifs[0].bs,
&ncq_tfs->sglist, ncq_tfs->lba,
ncq_cb, ncq_tfs);
@@ -766,6 +771,10 @@ static void process_ncq_command(AHCIState *s, int port, uint8_t *cmd_fis,
ncq_tfs->is_read = 0;
DPRINTF(port, "tag %d aio write %ld\n", ncq_tfs->tag, ncq_tfs->lba);
+
+ bdrv_acct_start(ncq_tfs->drive->port.ifs[0].bs, &ncq_tfs->acct,
+ (ncq_tfs->sector_count-1) * BDRV_SECTOR_SIZE,
+ BDRV_ACCT_WRITE);
ncq_tfs->aiocb = dma_bdrv_write(ncq_tfs->drive->port.ifs[0].bs,
&ncq_tfs->sglist, ncq_tfs->lba,
ncq_cb, ncq_tfs);
diff --git a/hw/ide/ahci.h b/hw/ide/ahci.h
index e456193b2b..832539c23c 100644
--- a/hw/ide/ahci.h
+++ b/hw/ide/ahci.h
@@ -258,6 +258,7 @@ typedef struct NCQTransferState {
AHCIDevice *drive;
BlockDriverAIOCB *aiocb;
QEMUSGList sglist;
+ BlockAcctCookie acct;
int is_read;
uint16_t sector_count;
uint64_t lba;
diff --git a/hw/ide/atapi.c b/hw/ide/atapi.c
index fe2fb0b806..c552320122 100644
--- a/hw/ide/atapi.c
+++ b/hw/ide/atapi.c
@@ -104,17 +104,20 @@ static void cd_data_to_raw(uint8_t *buf, int lba)
memset(buf, 0, 288);
}
-static int cd_read_sector(BlockDriverState *bs, int lba, uint8_t *buf,
- int sector_size)
+static int cd_read_sector(IDEState *s, int lba, uint8_t *buf, int sector_size)
{
int ret;
switch(sector_size) {
case 2048:
- ret = bdrv_read(bs, (int64_t)lba << 2, buf, 4);
+ bdrv_acct_start(s->bs, &s->acct, 4 * BDRV_SECTOR_SIZE, BDRV_ACCT_READ);
+ ret = bdrv_read(s->bs, (int64_t)lba << 2, buf, 4);
+ bdrv_acct_done(s->bs, &s->acct);
break;
case 2352:
- ret = bdrv_read(bs, (int64_t)lba << 2, buf + 16, 4);
+ bdrv_acct_start(s->bs, &s->acct, 4 * BDRV_SECTOR_SIZE, BDRV_ACCT_READ);
+ ret = bdrv_read(s->bs, (int64_t)lba << 2, buf + 16, 4);
+ bdrv_acct_done(s->bs, &s->acct);
if (ret < 0)
return ret;
cd_data_to_raw(buf, lba);
@@ -181,7 +184,7 @@ void ide_atapi_cmd_reply_end(IDEState *s)
} else {
/* see if a new sector must be read */
if (s->lba != -1 && s->io_buffer_index >= s->cd_sector_size) {
- ret = cd_read_sector(s->bs, s->lba, s->io_buffer, s->cd_sector_size);
+ ret = cd_read_sector(s, s->lba, s->io_buffer, s->cd_sector_size);
if (ret < 0) {
ide_transfer_stop(s);
ide_atapi_io_error(s, ret);
@@ -250,6 +253,7 @@ static void ide_atapi_cmd_reply(IDEState *s, int size, int max_size)
s->io_buffer_index = 0;
if (s->atapi_dma) {
+ bdrv_acct_start(s->bs, &s->acct, size, BDRV_ACCT_READ);
s->status = READY_STAT | SEEK_STAT | DRQ_STAT;
s->bus->dma->ops->start_dma(s->bus->dma, s,
ide_atapi_cmd_read_dma_cb);
@@ -322,10 +326,7 @@ static void ide_atapi_cmd_read_dma_cb(void *opaque, int ret)
s->status = READY_STAT | SEEK_STAT;
s->nsector = (s->nsector & ~7) | ATAPI_INT_REASON_IO | ATAPI_INT_REASON_CD;
ide_set_irq(s->bus);
- eot:
- s->bus->dma->ops->add_status(s->bus->dma, BM_STATUS_INT);
- ide_set_inactive(s);
- return;
+ goto eot;
}
s->io_buffer_index = 0;
@@ -343,9 +344,11 @@ static void ide_atapi_cmd_read_dma_cb(void *opaque, int ret)
#ifdef DEBUG_AIO
printf("aio_read_cd: lba=%u n=%d\n", s->lba, n);
#endif
+
s->bus->dma->iov.iov_base = (void *)(s->io_buffer + data_offset);
s->bus->dma->iov.iov_len = n * 4 * 512;
qemu_iovec_init_external(&s->bus->dma->qiov, &s->bus->dma->iov, 1);
+
s->bus->dma->aiocb = bdrv_aio_readv(s->bs, (int64_t)s->lba << 2,
&s->bus->dma->qiov, n * 4,
ide_atapi_cmd_read_dma_cb, s);
@@ -355,6 +358,12 @@ static void ide_atapi_cmd_read_dma_cb(void *opaque, int ret)
ASC_MEDIUM_NOT_PRESENT);
goto eot;
}
+
+ return;
+eot:
+ bdrv_acct_done(s->bs, &s->acct);
+ s->bus->dma->ops->add_status(s->bus->dma, BM_STATUS_INT);
+ ide_set_inactive(s);
}
/* start a CD-CDROM read command with DMA */
@@ -368,6 +377,8 @@ static void ide_atapi_cmd_read_dma(IDEState *s, int lba, int nb_sectors,
s->io_buffer_size = 0;
s->cd_sector_size = sector_size;
+ bdrv_acct_start(s->bs, &s->acct, s->packet_transfer_size, BDRV_ACCT_READ);
+
/* XXX: check if BUSY_STAT should be set */
s->status = READY_STAT | SEEK_STAT | DRQ_STAT | BUSY_STAT;
s->bus->dma->ops->start_dma(s->bus->dma, s,
diff --git a/hw/ide/core.c b/hw/ide/core.c
index d145b19b0c..40abc1edd2 100644
--- a/hw/ide/core.c
+++ b/hw/ide/core.c
@@ -473,7 +473,10 @@ void ide_sector_read(IDEState *s)
#endif
if (n > s->req_nb_sectors)
n = s->req_nb_sectors;
+
+ bdrv_acct_start(s->bs, &s->acct, n * BDRV_SECTOR_SIZE, BDRV_ACCT_READ);
ret = bdrv_read(s->bs, sector_num, s->io_buffer, n);
+ bdrv_acct_done(s->bs, &s->acct);
if (ret != 0) {
if (ide_handle_rw_error(s, -ret,
BM_STATUS_PIO_RETRY | BM_STATUS_RETRY_READ))
@@ -610,7 +613,10 @@ handle_rw_error:
return;
eot:
- ide_set_inactive(s);
+ if (s->dma_cmd == IDE_DMA_READ || s->dma_cmd == IDE_DMA_WRITE) {
+ bdrv_acct_done(s->bs, &s->acct);
+ }
+ ide_set_inactive(s);
}
static void ide_sector_start_dma(IDEState *s, enum ide_dma_cmd dma_cmd)
@@ -619,6 +625,20 @@ static void ide_sector_start_dma(IDEState *s, enum ide_dma_cmd dma_cmd)
s->io_buffer_index = 0;
s->io_buffer_size = 0;
s->dma_cmd = dma_cmd;
+
+ switch (dma_cmd) {
+ case IDE_DMA_READ:
+ bdrv_acct_start(s->bs, &s->acct, s->nsector * BDRV_SECTOR_SIZE,
+ BDRV_ACCT_READ);
+ break;
+ case IDE_DMA_WRITE:
+ bdrv_acct_start(s->bs, &s->acct, s->nsector * BDRV_SECTOR_SIZE,
+ BDRV_ACCT_WRITE);
+ break;
+ default:
+ break;
+ }
+
s->bus->dma->ops->start_dma(s->bus->dma, s, ide_dma_cb);
}
@@ -641,7 +661,10 @@ void ide_sector_write(IDEState *s)
n = s->nsector;
if (n > s->req_nb_sectors)
n = s->req_nb_sectors;
+
+ bdrv_acct_start(s->bs, &s->acct, n * BDRV_SECTOR_SIZE, BDRV_ACCT_READ);
ret = bdrv_write(s->bs, sector_num, s->io_buffer, n);
+ bdrv_acct_done(s->bs, &s->acct);
if (ret != 0) {
if (ide_handle_rw_error(s, -ret, BM_STATUS_PIO_RETRY))
@@ -685,6 +708,7 @@ static void ide_flush_cb(void *opaque, int ret)
}
}
+ bdrv_acct_done(s->bs, &s->acct);
s->status = READY_STAT | SEEK_STAT;
ide_set_irq(s->bus);
}
@@ -698,6 +722,7 @@ void ide_flush_cache(IDEState *s)
return;
}
+ bdrv_acct_start(s->bs, &s->acct, 0, BDRV_ACCT_FLUSH);
acb = bdrv_aio_flush(s->bs, ide_flush_cb, s);
if (acb == NULL) {
ide_flush_cb(s, -EIO);
diff --git a/hw/ide/internal.h b/hw/ide/internal.h
index 02e805f070..7f5ef8de1d 100644
--- a/hw/ide/internal.h
+++ b/hw/ide/internal.h
@@ -440,6 +440,7 @@ struct IDEState {
int lba;
int cd_sector_size;
int atapi_dma; /* true if dma is requested for the packet cmd */
+ BlockAcctCookie acct;
/* ATA DMA state */
int io_buffer_size;
QEMUSGList sg;
diff --git a/hw/ide/macio.c b/hw/ide/macio.c
index 44fb3fef60..fdf5d75082 100644
--- a/hw/ide/macio.c
+++ b/hw/ide/macio.c
@@ -52,8 +52,7 @@ static void pmac_ide_atapi_transfer_cb(void *opaque, int ret)
m->aiocb = NULL;
qemu_sglist_destroy(&s->sg);
ide_atapi_io_error(s, ret);
- io->dma_end(opaque);
- return;
+ goto done;
}
if (s->io_buffer_size > 0) {
@@ -71,8 +70,7 @@ static void pmac_ide_atapi_transfer_cb(void *opaque, int ret)
ide_atapi_cmd_ok(s);
if (io->len == 0) {
- io->dma_end(opaque);
- return;
+ goto done;
}
/* launch next transfer */
@@ -92,9 +90,14 @@ static void pmac_ide_atapi_transfer_cb(void *opaque, int ret)
/* Note: media not present is the most likely case */
ide_atapi_cmd_error(s, SENSE_NOT_READY,
ASC_MEDIUM_NOT_PRESENT);
- io->dma_end(opaque);
- return;
+ goto done;
}
+ return;
+
+done:
+ bdrv_acct_done(s->bs, &s->acct);
+ io->dma_end(opaque);
+ return;
}
static void pmac_ide_transfer_cb(void *opaque, int ret)
@@ -109,8 +112,7 @@ static void pmac_ide_transfer_cb(void *opaque, int ret)
m->aiocb = NULL;
qemu_sglist_destroy(&s->sg);
ide_dma_error(s);
- io->dma_end(io);
- return;
+ goto done;
}
sector_num = ide_get_sector(s);
@@ -130,10 +132,8 @@ static void pmac_ide_transfer_cb(void *opaque, int ret)
}
/* end of DMA ? */
-
if (io->len == 0) {
- io->dma_end(io);
- return;
+ goto done;
}
/* launch next transfer */
@@ -163,6 +163,12 @@ static void pmac_ide_transfer_cb(void *opaque, int ret)
if (!m->aiocb)
pmac_ide_transfer_cb(io, -1);
+ return;
+done:
+ if (s->dma_cmd == IDE_DMA_READ || s->dma_cmd == IDE_DMA_WRITE) {
+ bdrv_acct_done(s->bs, &s->acct);
+ }
+ io->dma_end(io);
}
static void pmac_ide_transfer(DBDMA_io *io)
@@ -172,10 +178,22 @@ static void pmac_ide_transfer(DBDMA_io *io)
s->io_buffer_size = 0;
if (s->drive_kind == IDE_CD) {
+ bdrv_acct_start(s->bs, &s->acct, io->len, BDRV_ACCT_READ);
pmac_ide_atapi_transfer_cb(io, 0);
return;
}
+ switch (s->dma_cmd) {
+ case IDE_DMA_READ:
+ bdrv_acct_start(s->bs, &s->acct, io->len, BDRV_ACCT_READ);
+ break;
+ case IDE_DMA_WRITE:
+ bdrv_acct_start(s->bs, &s->acct, io->len, BDRV_ACCT_WRITE);
+ break;
+ default:
+ break;
+ }
+
pmac_ide_transfer_cb(io, 0);
}
diff --git a/hw/pci.c b/hw/pci.c
index 6124790f01..57ff7b1098 100644
--- a/hw/pci.c
+++ b/hw/pci.c
@@ -1811,6 +1811,25 @@ static uint8_t pci_find_capability_list(PCIDevice *pdev, uint8_t cap_id,
return next;
}
+static uint8_t pci_find_capability_at_offset(PCIDevice *pdev, uint8_t offset)
+{
+ uint8_t next, prev, found = 0;
+
+ if (!(pdev->used[offset])) {
+ return 0;
+ }
+
+ assert(pdev->config[PCI_STATUS] & PCI_STATUS_CAP_LIST);
+
+ for (prev = PCI_CAPABILITY_LIST; (next = pdev->config[prev]);
+ prev = next + PCI_CAP_LIST_NEXT) {
+ if (next <= offset && next > found) {
+ found = next;
+ }
+ }
+ return found;
+}
+
/* Patch the PCI vendor and device ids in a PCI rom image if necessary.
This is needed for an option rom which is used for more than one device. */
static void pci_patch_ids(PCIDevice *pdev, uint8_t *ptr, int size)
@@ -1952,11 +1971,30 @@ int pci_add_capability(PCIDevice *pdev, uint8_t cap_id,
uint8_t offset, uint8_t size)
{
uint8_t *config;
+ int i, overlapping_cap;
+
if (!offset) {
offset = pci_find_space(pdev, size);
if (!offset) {
return -ENOSPC;
}
+ } else {
+ /* Verify that capabilities don't overlap. Note: device assignment
+ * depends on this check to verify that the device is not broken.
+ * Should never trigger for emulated devices, but it's helpful
+ * for debugging these. */
+ for (i = offset; i < offset + size; i++) {
+ overlapping_cap = pci_find_capability_at_offset(pdev, i);
+ if (overlapping_cap) {
+ fprintf(stderr, "ERROR: %04x:%02x:%02x.%x "
+ "Attempt to add PCI capability %x at offset "
+ "%x overlaps existing capability %x at offset %x\n",
+ pci_find_domain(pdev->bus), pci_bus_num(pdev->bus),
+ PCI_SLOT(pdev->devfn), PCI_FUNC(pdev->devfn),
+ cap_id, offset, overlapping_cap, i);
+ return -EINVAL;
+ }
+ }
}
config = pdev->config + offset;
diff --git a/hw/pcie.c b/hw/pcie.c
index 39607bf31a..5c9eb2f0ac 100644
--- a/hw/pcie.c
+++ b/hw/pcie.c
@@ -175,6 +175,14 @@ static void hotplug_event_notify(PCIDevice *dev)
}
}
+static void hotplug_event_clear(PCIDevice *dev)
+{
+ hotplug_event_update_event_status(dev);
+ if (!msix_enabled(dev) && !msi_enabled(dev) && !dev->exp.hpev_notified) {
+ qemu_set_irq(dev->irq[dev->exp.hpev_intx], 0);
+ }
+}
+
/*
* A PCI Express Hot-Plug Event has occurred, so update slot status register
* and notify OS of the event if necessary.
@@ -320,6 +328,10 @@ void pcie_cap_slot_write_config(PCIDevice *dev,
uint8_t *exp_cap = dev->config + pos;
uint16_t sltsta = pci_get_word(exp_cap + PCI_EXP_SLTSTA);
+ if (ranges_overlap(addr, len, pos + PCI_EXP_SLTSTA, 2)) {
+ hotplug_event_clear(dev);
+ }
+
if (!ranges_overlap(addr, len, pos + PCI_EXP_SLTCTL, 2)) {
return;
}
diff --git a/hw/pcie_aer.c b/hw/pcie_aer.c
index 2ae65ec807..62c06eafd6 100644
--- a/hw/pcie_aer.c
+++ b/hw/pcie_aer.c
@@ -415,7 +415,7 @@ static void pcie_aer_update_log(PCIDevice *dev, const PCIEAERErr *err)
int i;
assert(err->status);
- assert(err->status & (err->status - 1));
+ assert(!(err->status & (err->status - 1)));
errcap &= ~(PCI_ERR_CAP_FEP_MASK | PCI_ERR_CAP_TLP);
errcap |= PCI_ERR_CAP_FEP(first_bit);
@@ -495,7 +495,7 @@ static int pcie_aer_record_error(PCIDevice *dev,
int fep = PCI_ERR_CAP_FEP(errcap);
assert(err->status);
- assert(err->status & (err->status - 1));
+ assert(!(err->status & (err->status - 1)));
if (errcap & PCI_ERR_CAP_MHRE &&
(pci_get_long(aer_cap + PCI_ERR_UNCOR_STATUS) & (1U << fep))) {
@@ -979,20 +979,21 @@ int do_pcie_aer_inejct_error(Monitor *mon,
if (pcie_aer_parse_error_string(error_name, &error_status, &correctable)) {
char *e = NULL;
error_status = strtoul(error_name, &e, 0);
- correctable = !!qdict_get_int(qdict, "correctable");
+ correctable = qdict_get_try_bool(qdict, "correctable", 0);
if (!e || *e != '\0') {
monitor_printf(mon, "invalid error status value. \"%s\"",
error_name);
return -EINVAL;
}
}
+ err.status = error_status;
err.source_id = (pci_bus_num(dev->bus) << 8) | dev->devfn;
err.flags = 0;
if (correctable) {
err.flags |= PCIE_AER_ERR_IS_CORRECTABLE;
}
- if (qdict_get_int(qdict, "advisory_non_fatal")) {
+ if (qdict_get_try_bool(qdict, "advisory_non_fatal", 0)) {
err.flags |= PCIE_AER_ERR_MAYBE_ADVISORY;
}
if (qdict_haskey(qdict, "header0")) {
diff --git a/hw/scsi-disk.c b/hw/scsi-disk.c
index d94b1eb53c..3cc830ff95 100644
--- a/hw/scsi-disk.c
+++ b/hw/scsi-disk.c
@@ -57,6 +57,7 @@ typedef struct SCSIDiskReq {
struct iovec iov;
QEMUIOVector qiov;
uint32_t status;
+ BlockAcctCookie acct;
} SCSIDiskReq;
struct SCSIDiskState
@@ -107,10 +108,13 @@ static void scsi_cancel_io(SCSIRequest *req)
static void scsi_read_complete(void * opaque, int ret)
{
SCSIDiskReq *r = (SCSIDiskReq *)opaque;
+ SCSIDiskState *s = DO_UPCAST(SCSIDiskState, qdev, r->req.dev);
int n;
r->req.aiocb = NULL;
+ bdrv_acct_done(s->bs, &r->acct);
+
if (ret) {
if (scsi_handle_rw_error(r, -ret, SCSI_REQ_STATUS_RETRY_READ)) {
return;
@@ -161,6 +165,8 @@ static void scsi_read_data(SCSIRequest *req)
r->iov.iov_len = n * 512;
qemu_iovec_init_external(&r->qiov, &r->iov, 1);
+
+ bdrv_acct_start(s->bs, &r->acct, n * BDRV_SECTOR_SIZE, BDRV_ACCT_READ);
r->req.aiocb = bdrv_aio_readv(s->bs, r->sector, &r->qiov, n,
scsi_read_complete, r);
if (r->req.aiocb == NULL) {
@@ -207,11 +213,14 @@ static int scsi_handle_rw_error(SCSIDiskReq *r, int error, int type)
static void scsi_write_complete(void * opaque, int ret)
{
SCSIDiskReq *r = (SCSIDiskReq *)opaque;
+ SCSIDiskState *s = DO_UPCAST(SCSIDiskState, qdev, r->req.dev);
uint32_t len;
uint32_t n;
r->req.aiocb = NULL;
+ bdrv_acct_done(s->bs, &r->acct);
+
if (ret) {
if (scsi_handle_rw_error(r, -ret, SCSI_REQ_STATUS_RETRY_WRITE)) {
return;
@@ -252,6 +261,8 @@ static void scsi_write_data(SCSIRequest *req)
n = r->iov.iov_len / 512;
if (n) {
qemu_iovec_init_external(&r->qiov, &r->iov, 1);
+
+ bdrv_acct_start(s->bs, &r->acct, n * BDRV_SECTOR_SIZE, BDRV_ACCT_WRITE);
r->req.aiocb = bdrv_aio_writev(s->bs, r->sector, &r->qiov, n,
scsi_write_complete, r);
if (r->req.aiocb == NULL) {
@@ -854,13 +865,19 @@ static int scsi_disk_emulate_command(SCSIDiskReq *r, uint8_t *outbuf)
buflen = 8;
break;
case SYNCHRONIZE_CACHE:
+ {
+ BlockAcctCookie acct;
+
+ bdrv_acct_start(s->bs, &acct, 0, BDRV_ACCT_FLUSH);
ret = bdrv_flush(s->bs);
+ bdrv_acct_done(s->bs, &acct);
if (ret < 0) {
if (scsi_handle_rw_error(r, -ret, SCSI_REQ_STATUS_RETRY_FLUSH)) {
return -1;
}
}
break;
+ }
case GET_CONFIGURATION:
memset(outbuf, 0, 8);
/* ??? This should probably return much more information. For now
diff --git a/hw/sh_pci.c b/hw/sh_pci.c
index 76061bb756..36f39300d5 100644
--- a/hw/sh_pci.c
+++ b/hw/sh_pci.c
@@ -150,7 +150,7 @@ static int sh_pci_init_device(SysBusDevice *dev)
PCI_DEVFN(0, 0), 4);
memory_region_init_io(&s->memconfig_p4, &sh_pci_reg_ops, s,
"sh_pci", 0x224);
- memory_region_init_alias(&s->memconfig_a7, "sh_pci.2", &s->memconfig_a7,
+ memory_region_init_alias(&s->memconfig_a7, "sh_pci.2", &s->memconfig_p4,
0, 0x224);
isa_mmio_setup(&s->isa, 0x40000);
sysbus_init_mmio_cb2(dev, sh_pci_map, sh_pci_unmap);
diff --git a/hw/vhost.c b/hw/vhost.c
index 18860678ba..0870cb7d85 100644
--- a/hw/vhost.c
+++ b/hw/vhost.c
@@ -515,11 +515,6 @@ static int vhost_virtqueue_init(struct vhost_dev *dev,
};
struct VirtQueue *vvq = virtio_get_queue(vdev, idx);
- if (!vdev->binding->set_host_notifier) {
- fprintf(stderr, "binding does not support host notifiers\n");
- return -ENOSYS;
- }
-
vq->num = state.num = virtio_queue_get_num(vdev, idx);
r = ioctl(dev->control, VHOST_SET_VRING_NUM, &state);
if (r) {
@@ -567,12 +562,6 @@ static int vhost_virtqueue_init(struct vhost_dev *dev,
r = -errno;
goto fail_alloc;
}
- r = vdev->binding->set_host_notifier(vdev->binding_opaque, idx, true);
- if (r < 0) {
- fprintf(stderr, "Error binding host notifier: %d\n", -r);
- goto fail_host_notifier;
- }
-
file.fd = event_notifier_get_fd(virtio_queue_get_host_notifier(vvq));
r = ioctl(dev->control, VHOST_SET_VRING_KICK, &file);
if (r) {
@@ -591,8 +580,6 @@ static int vhost_virtqueue_init(struct vhost_dev *dev,
fail_call:
fail_kick:
- vdev->binding->set_host_notifier(vdev->binding_opaque, idx, false);
-fail_host_notifier:
fail_alloc:
cpu_physical_memory_unmap(vq->ring, virtio_queue_get_ring_size(vdev, idx),
0, 0);
@@ -618,12 +605,6 @@ static void vhost_virtqueue_cleanup(struct vhost_dev *dev,
.index = idx,
};
int r;
- r = vdev->binding->set_host_notifier(vdev->binding_opaque, idx, false);
- if (r < 0) {
- fprintf(stderr, "vhost VQ %d host cleanup failed: %d\n", idx, r);
- fflush(stderr);
- }
- assert (r >= 0);
r = ioctl(dev->control, VHOST_GET_VRING_BASE, &state);
if (r < 0) {
fprintf(stderr, "vhost VQ %d ring restore failed: %d\n", idx, r);
@@ -697,6 +678,60 @@ bool vhost_dev_query(struct vhost_dev *hdev, VirtIODevice *vdev)
hdev->force;
}
+/* Stop processing guest IO notifications in qemu.
+ * Start processing them in vhost in kernel.
+ */
+int vhost_dev_enable_notifiers(struct vhost_dev *hdev, VirtIODevice *vdev)
+{
+ int i, r;
+ if (!vdev->binding->set_host_notifier) {
+ fprintf(stderr, "binding does not support host notifiers\n");
+ r = -ENOSYS;
+ goto fail;
+ }
+
+ for (i = 0; i < hdev->nvqs; ++i) {
+ r = vdev->binding->set_host_notifier(vdev->binding_opaque, i, true);
+ if (r < 0) {
+ fprintf(stderr, "vhost VQ %d notifier binding failed: %d\n", i, -r);
+ goto fail_vq;
+ }
+ }
+
+ return 0;
+fail_vq:
+ while (--i >= 0) {
+ r = vdev->binding->set_host_notifier(vdev->binding_opaque, i, false);
+ if (r < 0) {
+ fprintf(stderr, "vhost VQ %d notifier cleanup error: %d\n", i, -r);
+ fflush(stderr);
+ }
+ assert (r >= 0);
+ }
+fail:
+ return r;
+}
+
+/* Stop processing guest IO notifications in vhost.
+ * Start processing them in qemu.
+ * This might actually run the qemu handlers right away,
+ * so virtio in qemu must be completely setup when this is called.
+ */
+void vhost_dev_disable_notifiers(struct vhost_dev *hdev, VirtIODevice *vdev)
+{
+ int i, r;
+
+ for (i = 0; i < hdev->nvqs; ++i) {
+ r = vdev->binding->set_host_notifier(vdev->binding_opaque, i, false);
+ if (r < 0) {
+ fprintf(stderr, "vhost VQ %d notifier cleanup failed: %d\n", i, -r);
+ fflush(stderr);
+ }
+ assert (r >= 0);
+ }
+}
+
+/* Host notifiers must be enabled at this point. */
int vhost_dev_start(struct vhost_dev *hdev, VirtIODevice *vdev)
{
int i, r;
@@ -762,6 +797,7 @@ fail:
return r;
}
+/* Host notifiers must be enabled at this point. */
void vhost_dev_stop(struct vhost_dev *hdev, VirtIODevice *vdev)
{
int i, r;
diff --git a/hw/vhost.h b/hw/vhost.h
index c8c595a147..c9452f0732 100644
--- a/hw/vhost.h
+++ b/hw/vhost.h
@@ -46,5 +46,7 @@ void vhost_dev_cleanup(struct vhost_dev *hdev);
bool vhost_dev_query(struct vhost_dev *hdev, VirtIODevice *vdev);
int vhost_dev_start(struct vhost_dev *hdev, VirtIODevice *vdev);
void vhost_dev_stop(struct vhost_dev *hdev, VirtIODevice *vdev);
+int vhost_dev_enable_notifiers(struct vhost_dev *hdev, VirtIODevice *vdev);
+void vhost_dev_disable_notifiers(struct vhost_dev *hdev, VirtIODevice *vdev);
#endif
diff --git a/hw/vhost_net.c b/hw/vhost_net.c
index a55981200d..950a6b8d99 100644
--- a/hw/vhost_net.c
+++ b/hw/vhost_net.c
@@ -139,16 +139,22 @@ int vhost_net_start(struct vhost_net *net,
{
struct vhost_vring_file file = { };
int r;
+
+ net->dev.nvqs = 2;
+ net->dev.vqs = net->vqs;
+
+ r = vhost_dev_enable_notifiers(&net->dev, dev);
+ if (r < 0) {
+ goto fail_notifiers;
+ }
if (net->dev.acked_features & (1 << VIRTIO_NET_F_MRG_RXBUF)) {
tap_set_vnet_hdr_len(net->vc,
sizeof(struct virtio_net_hdr_mrg_rxbuf));
}
- net->dev.nvqs = 2;
- net->dev.vqs = net->vqs;
r = vhost_dev_start(&net->dev, dev);
if (r < 0) {
- return r;
+ goto fail_start;
}
net->vc->info->poll(net->vc, false);
@@ -173,6 +179,9 @@ fail:
if (net->dev.acked_features & (1 << VIRTIO_NET_F_MRG_RXBUF)) {
tap_set_vnet_hdr_len(net->vc, sizeof(struct virtio_net_hdr));
}
+fail_start:
+ vhost_dev_disable_notifiers(&net->dev, dev);
+fail_notifiers:
return r;
}
@@ -190,6 +199,7 @@ void vhost_net_stop(struct vhost_net *net,
if (net->dev.acked_features & (1 << VIRTIO_NET_F_MRG_RXBUF)) {
tap_set_vnet_hdr_len(net->vc, sizeof(struct virtio_net_hdr));
}
+ vhost_dev_disable_notifiers(&net->dev, dev);
}
void vhost_net_cleanup(struct vhost_net *net)
diff --git a/hw/virtio-blk.c b/hw/virtio-blk.c
index dad8c0a6a2..2a8ccd0aa9 100644
--- a/hw/virtio-blk.c
+++ b/hw/virtio-blk.c
@@ -47,6 +47,7 @@ typedef struct VirtIOBlockReq
struct virtio_scsi_inhdr *scsi;
QEMUIOVector qiov;
struct VirtIOBlockReq *next;
+ BlockAcctCookie acct;
} VirtIOBlockReq;
static void virtio_blk_req_complete(VirtIOBlockReq *req, int status)
@@ -58,8 +59,6 @@ static void virtio_blk_req_complete(VirtIOBlockReq *req, int status)
stb_p(&req->in->status, status);
virtqueue_push(s->vq, &req->elem, req->qiov.size + sizeof(*req->in));
virtio_notify(&s->vdev, s->vq);
-
- g_free(req);
}
static int virtio_blk_handle_rw_error(VirtIOBlockReq *req, int error,
@@ -81,6 +80,8 @@ static int virtio_blk_handle_rw_error(VirtIOBlockReq *req, int error,
vm_stop(VMSTOP_DISKFULL);
} else {
virtio_blk_req_complete(req, VIRTIO_BLK_S_IOERR);
+ bdrv_acct_done(s->bs, &req->acct);
+ g_free(req);
bdrv_mon_event(s->bs, BDRV_ACTION_REPORT, is_read);
}
@@ -100,6 +101,8 @@ static void virtio_blk_rw_complete(void *opaque, int ret)
}
virtio_blk_req_complete(req, VIRTIO_BLK_S_OK);
+ bdrv_acct_done(req->dev->bs, &req->acct);
+ g_free(req);
}
static void virtio_blk_flush_complete(void *opaque, int ret)
@@ -113,6 +116,8 @@ static void virtio_blk_flush_complete(void *opaque, int ret)
}
virtio_blk_req_complete(req, VIRTIO_BLK_S_OK);
+ bdrv_acct_done(req->dev->bs, &req->acct);
+ g_free(req);
}
static VirtIOBlockReq *virtio_blk_alloc_request(VirtIOBlock *s)
@@ -155,6 +160,7 @@ static void virtio_blk_handle_scsi(VirtIOBlockReq *req)
*/
if (req->elem.out_num < 2 || req->elem.in_num < 3) {
virtio_blk_req_complete(req, VIRTIO_BLK_S_IOERR);
+ g_free(req);
return;
}
@@ -163,6 +169,7 @@ static void virtio_blk_handle_scsi(VirtIOBlockReq *req)
*/
if (req->elem.out_num > 2 && req->elem.in_num > 3) {
virtio_blk_req_complete(req, VIRTIO_BLK_S_UNSUPP);
+ g_free(req);
return;
}
@@ -229,11 +236,13 @@ static void virtio_blk_handle_scsi(VirtIOBlockReq *req)
stl_p(&req->scsi->data_len, hdr.dxfer_len);
virtio_blk_req_complete(req, status);
+ g_free(req);
}
#else
static void virtio_blk_handle_scsi(VirtIOBlockReq *req)
{
virtio_blk_req_complete(req, VIRTIO_BLK_S_UNSUPP);
+ g_free(req);
}
#endif /* __linux__ */
@@ -266,6 +275,8 @@ static void virtio_blk_handle_flush(VirtIOBlockReq *req, MultiReqBuffer *mrb)
{
BlockDriverAIOCB *acb;
+ bdrv_acct_start(req->dev->bs, &req->acct, 0, BDRV_ACCT_FLUSH);
+
/*
* Make sure all outstanding writes are posted to the backing device.
*/
@@ -284,6 +295,8 @@ static void virtio_blk_handle_write(VirtIOBlockReq *req, MultiReqBuffer *mrb)
sector = ldq_p(&req->out->sector);
+ bdrv_acct_start(req->dev->bs, &req->acct, req->qiov.size, BDRV_ACCT_WRITE);
+
trace_virtio_blk_handle_write(req, sector, req->qiov.size / 512);
if (sector & req->dev->sector_mask) {
@@ -317,6 +330,8 @@ static void virtio_blk_handle_read(VirtIOBlockReq *req)
sector = ldq_p(&req->out->sector);
+ bdrv_acct_start(req->dev->bs, &req->acct, req->qiov.size, BDRV_ACCT_READ);
+
if (sector & req->dev->sector_mask) {
virtio_blk_rw_complete(req, -EIO);
return;
@@ -370,6 +385,7 @@ static void virtio_blk_handle_request(VirtIOBlockReq *req,
s->serial ? s->serial : "",
MIN(req->elem.in_sg[0].iov_len, VIRTIO_BLK_ID_BYTES));
virtio_blk_req_complete(req, VIRTIO_BLK_S_OK);
+ g_free(req);
} else if (type & VIRTIO_BLK_T_OUT) {
qemu_iovec_init_external(&req->qiov, &req->elem.out_sg[1],
req->elem.out_num - 1);
diff --git a/hw/xen_disk.c b/hw/xen_disk.c
index 31f91514f2..bd5c66916b 100644
--- a/hw/xen_disk.c
+++ b/hw/xen_disk.c
@@ -79,6 +79,7 @@ struct ioreq {
struct XenBlkDev *blkdev;
QLIST_ENTRY(ioreq) list;
+ BlockAcctCookie acct;
};
struct XenBlkDev {
@@ -401,6 +402,7 @@ static void qemu_aio_complete(void *opaque, int ret)
ioreq->status = ioreq->aio_errors ? BLKIF_RSP_ERROR : BLKIF_RSP_OKAY;
ioreq_unmap(ioreq);
ioreq_finish(ioreq);
+ bdrv_acct_done(ioreq->blkdev->bs, &ioreq->acct);
qemu_bh_schedule(ioreq->blkdev->bh);
}
@@ -419,6 +421,7 @@ static int ioreq_runio_qemu_aio(struct ioreq *ioreq)
switch (ioreq->req.operation) {
case BLKIF_OP_READ:
+ bdrv_acct_start(blkdev->bs, &ioreq->acct, ioreq->v.size, BDRV_ACCT_READ);
ioreq->aio_inflight++;
bdrv_aio_readv(blkdev->bs, ioreq->start / BLOCK_SIZE,
&ioreq->v, ioreq->v.size / BLOCK_SIZE,
@@ -429,6 +432,8 @@ static int ioreq_runio_qemu_aio(struct ioreq *ioreq)
if (!ioreq->req.nr_segments) {
break;
}
+
+ bdrv_acct_start(blkdev->bs, &ioreq->acct, ioreq->v.size, BDRV_ACCT_WRITE);
ioreq->aio_inflight++;
bdrv_aio_writev(blkdev->bs, ioreq->start / BLOCK_SIZE,
&ioreq->v, ioreq->v.size / BLOCK_SIZE,