diff options
Diffstat (limited to 'hw')
-rw-r--r-- | hw/ide/ahci.c | 9 | ||||
-rw-r--r-- | hw/ide/ahci.h | 1 | ||||
-rw-r--r-- | hw/ide/atapi.c | 29 | ||||
-rw-r--r-- | hw/ide/core.c | 27 | ||||
-rw-r--r-- | hw/ide/internal.h | 1 | ||||
-rw-r--r-- | hw/ide/macio.c | 40 | ||||
-rw-r--r-- | hw/pci.c | 38 | ||||
-rw-r--r-- | hw/pcie.c | 12 | ||||
-rw-r--r-- | hw/pcie_aer.c | 9 | ||||
-rw-r--r-- | hw/scsi-disk.c | 17 | ||||
-rw-r--r-- | hw/sh_pci.c | 2 | ||||
-rw-r--r-- | hw/vhost.c | 74 | ||||
-rw-r--r-- | hw/vhost.h | 2 | ||||
-rw-r--r-- | hw/vhost_net.c | 16 | ||||
-rw-r--r-- | hw/virtio-blk.c | 20 | ||||
-rw-r--r-- | hw/xen_disk.c | 5 |
16 files changed, 252 insertions, 50 deletions
diff --git a/hw/ide/ahci.c b/hw/ide/ahci.c index 29521babf7..f4fa1545bd 100644 --- a/hw/ide/ahci.c +++ b/hw/ide/ahci.c @@ -710,6 +710,7 @@ static void ncq_cb(void *opaque, int ret) DPRINTF(ncq_tfs->drive->port_no, "NCQ transfer tag %d finished\n", ncq_tfs->tag); + bdrv_acct_done(ncq_tfs->drive->port.ifs[0].bs, &ncq_tfs->acct); qemu_sglist_destroy(&ncq_tfs->sglist); ncq_tfs->used = 0; } @@ -756,6 +757,10 @@ static void process_ncq_command(AHCIState *s, int port, uint8_t *cmd_fis, ncq_tfs->is_read = 1; DPRINTF(port, "tag %d aio read %ld\n", ncq_tfs->tag, ncq_tfs->lba); + + bdrv_acct_start(ncq_tfs->drive->port.ifs[0].bs, &ncq_tfs->acct, + (ncq_tfs->sector_count-1) * BDRV_SECTOR_SIZE, + BDRV_ACCT_READ); ncq_tfs->aiocb = dma_bdrv_read(ncq_tfs->drive->port.ifs[0].bs, &ncq_tfs->sglist, ncq_tfs->lba, ncq_cb, ncq_tfs); @@ -766,6 +771,10 @@ static void process_ncq_command(AHCIState *s, int port, uint8_t *cmd_fis, ncq_tfs->is_read = 0; DPRINTF(port, "tag %d aio write %ld\n", ncq_tfs->tag, ncq_tfs->lba); + + bdrv_acct_start(ncq_tfs->drive->port.ifs[0].bs, &ncq_tfs->acct, + (ncq_tfs->sector_count-1) * BDRV_SECTOR_SIZE, + BDRV_ACCT_WRITE); ncq_tfs->aiocb = dma_bdrv_write(ncq_tfs->drive->port.ifs[0].bs, &ncq_tfs->sglist, ncq_tfs->lba, ncq_cb, ncq_tfs); diff --git a/hw/ide/ahci.h b/hw/ide/ahci.h index e456193b2b..832539c23c 100644 --- a/hw/ide/ahci.h +++ b/hw/ide/ahci.h @@ -258,6 +258,7 @@ typedef struct NCQTransferState { AHCIDevice *drive; BlockDriverAIOCB *aiocb; QEMUSGList sglist; + BlockAcctCookie acct; int is_read; uint16_t sector_count; uint64_t lba; diff --git a/hw/ide/atapi.c b/hw/ide/atapi.c index fe2fb0b806..c552320122 100644 --- a/hw/ide/atapi.c +++ b/hw/ide/atapi.c @@ -104,17 +104,20 @@ static void cd_data_to_raw(uint8_t *buf, int lba) memset(buf, 0, 288); } -static int cd_read_sector(BlockDriverState *bs, int lba, uint8_t *buf, - int sector_size) +static int cd_read_sector(IDEState *s, int lba, uint8_t *buf, int sector_size) { int ret; switch(sector_size) { case 2048: - ret = bdrv_read(bs, (int64_t)lba << 2, buf, 4); + bdrv_acct_start(s->bs, &s->acct, 4 * BDRV_SECTOR_SIZE, BDRV_ACCT_READ); + ret = bdrv_read(s->bs, (int64_t)lba << 2, buf, 4); + bdrv_acct_done(s->bs, &s->acct); break; case 2352: - ret = bdrv_read(bs, (int64_t)lba << 2, buf + 16, 4); + bdrv_acct_start(s->bs, &s->acct, 4 * BDRV_SECTOR_SIZE, BDRV_ACCT_READ); + ret = bdrv_read(s->bs, (int64_t)lba << 2, buf + 16, 4); + bdrv_acct_done(s->bs, &s->acct); if (ret < 0) return ret; cd_data_to_raw(buf, lba); @@ -181,7 +184,7 @@ void ide_atapi_cmd_reply_end(IDEState *s) } else { /* see if a new sector must be read */ if (s->lba != -1 && s->io_buffer_index >= s->cd_sector_size) { - ret = cd_read_sector(s->bs, s->lba, s->io_buffer, s->cd_sector_size); + ret = cd_read_sector(s, s->lba, s->io_buffer, s->cd_sector_size); if (ret < 0) { ide_transfer_stop(s); ide_atapi_io_error(s, ret); @@ -250,6 +253,7 @@ static void ide_atapi_cmd_reply(IDEState *s, int size, int max_size) s->io_buffer_index = 0; if (s->atapi_dma) { + bdrv_acct_start(s->bs, &s->acct, size, BDRV_ACCT_READ); s->status = READY_STAT | SEEK_STAT | DRQ_STAT; s->bus->dma->ops->start_dma(s->bus->dma, s, ide_atapi_cmd_read_dma_cb); @@ -322,10 +326,7 @@ static void ide_atapi_cmd_read_dma_cb(void *opaque, int ret) s->status = READY_STAT | SEEK_STAT; s->nsector = (s->nsector & ~7) | ATAPI_INT_REASON_IO | ATAPI_INT_REASON_CD; ide_set_irq(s->bus); - eot: - s->bus->dma->ops->add_status(s->bus->dma, BM_STATUS_INT); - ide_set_inactive(s); - return; + goto eot; } s->io_buffer_index = 0; @@ -343,9 +344,11 @@ static void ide_atapi_cmd_read_dma_cb(void *opaque, int ret) #ifdef DEBUG_AIO printf("aio_read_cd: lba=%u n=%d\n", s->lba, n); #endif + s->bus->dma->iov.iov_base = (void *)(s->io_buffer + data_offset); s->bus->dma->iov.iov_len = n * 4 * 512; qemu_iovec_init_external(&s->bus->dma->qiov, &s->bus->dma->iov, 1); + s->bus->dma->aiocb = bdrv_aio_readv(s->bs, (int64_t)s->lba << 2, &s->bus->dma->qiov, n * 4, ide_atapi_cmd_read_dma_cb, s); @@ -355,6 +358,12 @@ static void ide_atapi_cmd_read_dma_cb(void *opaque, int ret) ASC_MEDIUM_NOT_PRESENT); goto eot; } + + return; +eot: + bdrv_acct_done(s->bs, &s->acct); + s->bus->dma->ops->add_status(s->bus->dma, BM_STATUS_INT); + ide_set_inactive(s); } /* start a CD-CDROM read command with DMA */ @@ -368,6 +377,8 @@ static void ide_atapi_cmd_read_dma(IDEState *s, int lba, int nb_sectors, s->io_buffer_size = 0; s->cd_sector_size = sector_size; + bdrv_acct_start(s->bs, &s->acct, s->packet_transfer_size, BDRV_ACCT_READ); + /* XXX: check if BUSY_STAT should be set */ s->status = READY_STAT | SEEK_STAT | DRQ_STAT | BUSY_STAT; s->bus->dma->ops->start_dma(s->bus->dma, s, diff --git a/hw/ide/core.c b/hw/ide/core.c index d145b19b0c..40abc1edd2 100644 --- a/hw/ide/core.c +++ b/hw/ide/core.c @@ -473,7 +473,10 @@ void ide_sector_read(IDEState *s) #endif if (n > s->req_nb_sectors) n = s->req_nb_sectors; + + bdrv_acct_start(s->bs, &s->acct, n * BDRV_SECTOR_SIZE, BDRV_ACCT_READ); ret = bdrv_read(s->bs, sector_num, s->io_buffer, n); + bdrv_acct_done(s->bs, &s->acct); if (ret != 0) { if (ide_handle_rw_error(s, -ret, BM_STATUS_PIO_RETRY | BM_STATUS_RETRY_READ)) @@ -610,7 +613,10 @@ handle_rw_error: return; eot: - ide_set_inactive(s); + if (s->dma_cmd == IDE_DMA_READ || s->dma_cmd == IDE_DMA_WRITE) { + bdrv_acct_done(s->bs, &s->acct); + } + ide_set_inactive(s); } static void ide_sector_start_dma(IDEState *s, enum ide_dma_cmd dma_cmd) @@ -619,6 +625,20 @@ static void ide_sector_start_dma(IDEState *s, enum ide_dma_cmd dma_cmd) s->io_buffer_index = 0; s->io_buffer_size = 0; s->dma_cmd = dma_cmd; + + switch (dma_cmd) { + case IDE_DMA_READ: + bdrv_acct_start(s->bs, &s->acct, s->nsector * BDRV_SECTOR_SIZE, + BDRV_ACCT_READ); + break; + case IDE_DMA_WRITE: + bdrv_acct_start(s->bs, &s->acct, s->nsector * BDRV_SECTOR_SIZE, + BDRV_ACCT_WRITE); + break; + default: + break; + } + s->bus->dma->ops->start_dma(s->bus->dma, s, ide_dma_cb); } @@ -641,7 +661,10 @@ void ide_sector_write(IDEState *s) n = s->nsector; if (n > s->req_nb_sectors) n = s->req_nb_sectors; + + bdrv_acct_start(s->bs, &s->acct, n * BDRV_SECTOR_SIZE, BDRV_ACCT_READ); ret = bdrv_write(s->bs, sector_num, s->io_buffer, n); + bdrv_acct_done(s->bs, &s->acct); if (ret != 0) { if (ide_handle_rw_error(s, -ret, BM_STATUS_PIO_RETRY)) @@ -685,6 +708,7 @@ static void ide_flush_cb(void *opaque, int ret) } } + bdrv_acct_done(s->bs, &s->acct); s->status = READY_STAT | SEEK_STAT; ide_set_irq(s->bus); } @@ -698,6 +722,7 @@ void ide_flush_cache(IDEState *s) return; } + bdrv_acct_start(s->bs, &s->acct, 0, BDRV_ACCT_FLUSH); acb = bdrv_aio_flush(s->bs, ide_flush_cb, s); if (acb == NULL) { ide_flush_cb(s, -EIO); diff --git a/hw/ide/internal.h b/hw/ide/internal.h index 02e805f070..7f5ef8de1d 100644 --- a/hw/ide/internal.h +++ b/hw/ide/internal.h @@ -440,6 +440,7 @@ struct IDEState { int lba; int cd_sector_size; int atapi_dma; /* true if dma is requested for the packet cmd */ + BlockAcctCookie acct; /* ATA DMA state */ int io_buffer_size; QEMUSGList sg; diff --git a/hw/ide/macio.c b/hw/ide/macio.c index 44fb3fef60..fdf5d75082 100644 --- a/hw/ide/macio.c +++ b/hw/ide/macio.c @@ -52,8 +52,7 @@ static void pmac_ide_atapi_transfer_cb(void *opaque, int ret) m->aiocb = NULL; qemu_sglist_destroy(&s->sg); ide_atapi_io_error(s, ret); - io->dma_end(opaque); - return; + goto done; } if (s->io_buffer_size > 0) { @@ -71,8 +70,7 @@ static void pmac_ide_atapi_transfer_cb(void *opaque, int ret) ide_atapi_cmd_ok(s); if (io->len == 0) { - io->dma_end(opaque); - return; + goto done; } /* launch next transfer */ @@ -92,9 +90,14 @@ static void pmac_ide_atapi_transfer_cb(void *opaque, int ret) /* Note: media not present is the most likely case */ ide_atapi_cmd_error(s, SENSE_NOT_READY, ASC_MEDIUM_NOT_PRESENT); - io->dma_end(opaque); - return; + goto done; } + return; + +done: + bdrv_acct_done(s->bs, &s->acct); + io->dma_end(opaque); + return; } static void pmac_ide_transfer_cb(void *opaque, int ret) @@ -109,8 +112,7 @@ static void pmac_ide_transfer_cb(void *opaque, int ret) m->aiocb = NULL; qemu_sglist_destroy(&s->sg); ide_dma_error(s); - io->dma_end(io); - return; + goto done; } sector_num = ide_get_sector(s); @@ -130,10 +132,8 @@ static void pmac_ide_transfer_cb(void *opaque, int ret) } /* end of DMA ? */ - if (io->len == 0) { - io->dma_end(io); - return; + goto done; } /* launch next transfer */ @@ -163,6 +163,12 @@ static void pmac_ide_transfer_cb(void *opaque, int ret) if (!m->aiocb) pmac_ide_transfer_cb(io, -1); + return; +done: + if (s->dma_cmd == IDE_DMA_READ || s->dma_cmd == IDE_DMA_WRITE) { + bdrv_acct_done(s->bs, &s->acct); + } + io->dma_end(io); } static void pmac_ide_transfer(DBDMA_io *io) @@ -172,10 +178,22 @@ static void pmac_ide_transfer(DBDMA_io *io) s->io_buffer_size = 0; if (s->drive_kind == IDE_CD) { + bdrv_acct_start(s->bs, &s->acct, io->len, BDRV_ACCT_READ); pmac_ide_atapi_transfer_cb(io, 0); return; } + switch (s->dma_cmd) { + case IDE_DMA_READ: + bdrv_acct_start(s->bs, &s->acct, io->len, BDRV_ACCT_READ); + break; + case IDE_DMA_WRITE: + bdrv_acct_start(s->bs, &s->acct, io->len, BDRV_ACCT_WRITE); + break; + default: + break; + } + pmac_ide_transfer_cb(io, 0); } @@ -1811,6 +1811,25 @@ static uint8_t pci_find_capability_list(PCIDevice *pdev, uint8_t cap_id, return next; } +static uint8_t pci_find_capability_at_offset(PCIDevice *pdev, uint8_t offset) +{ + uint8_t next, prev, found = 0; + + if (!(pdev->used[offset])) { + return 0; + } + + assert(pdev->config[PCI_STATUS] & PCI_STATUS_CAP_LIST); + + for (prev = PCI_CAPABILITY_LIST; (next = pdev->config[prev]); + prev = next + PCI_CAP_LIST_NEXT) { + if (next <= offset && next > found) { + found = next; + } + } + return found; +} + /* Patch the PCI vendor and device ids in a PCI rom image if necessary. This is needed for an option rom which is used for more than one device. */ static void pci_patch_ids(PCIDevice *pdev, uint8_t *ptr, int size) @@ -1952,11 +1971,30 @@ int pci_add_capability(PCIDevice *pdev, uint8_t cap_id, uint8_t offset, uint8_t size) { uint8_t *config; + int i, overlapping_cap; + if (!offset) { offset = pci_find_space(pdev, size); if (!offset) { return -ENOSPC; } + } else { + /* Verify that capabilities don't overlap. Note: device assignment + * depends on this check to verify that the device is not broken. + * Should never trigger for emulated devices, but it's helpful + * for debugging these. */ + for (i = offset; i < offset + size; i++) { + overlapping_cap = pci_find_capability_at_offset(pdev, i); + if (overlapping_cap) { + fprintf(stderr, "ERROR: %04x:%02x:%02x.%x " + "Attempt to add PCI capability %x at offset " + "%x overlaps existing capability %x at offset %x\n", + pci_find_domain(pdev->bus), pci_bus_num(pdev->bus), + PCI_SLOT(pdev->devfn), PCI_FUNC(pdev->devfn), + cap_id, offset, overlapping_cap, i); + return -EINVAL; + } + } } config = pdev->config + offset; @@ -175,6 +175,14 @@ static void hotplug_event_notify(PCIDevice *dev) } } +static void hotplug_event_clear(PCIDevice *dev) +{ + hotplug_event_update_event_status(dev); + if (!msix_enabled(dev) && !msi_enabled(dev) && !dev->exp.hpev_notified) { + qemu_set_irq(dev->irq[dev->exp.hpev_intx], 0); + } +} + /* * A PCI Express Hot-Plug Event has occurred, so update slot status register * and notify OS of the event if necessary. @@ -320,6 +328,10 @@ void pcie_cap_slot_write_config(PCIDevice *dev, uint8_t *exp_cap = dev->config + pos; uint16_t sltsta = pci_get_word(exp_cap + PCI_EXP_SLTSTA); + if (ranges_overlap(addr, len, pos + PCI_EXP_SLTSTA, 2)) { + hotplug_event_clear(dev); + } + if (!ranges_overlap(addr, len, pos + PCI_EXP_SLTCTL, 2)) { return; } diff --git a/hw/pcie_aer.c b/hw/pcie_aer.c index 2ae65ec807..62c06eafd6 100644 --- a/hw/pcie_aer.c +++ b/hw/pcie_aer.c @@ -415,7 +415,7 @@ static void pcie_aer_update_log(PCIDevice *dev, const PCIEAERErr *err) int i; assert(err->status); - assert(err->status & (err->status - 1)); + assert(!(err->status & (err->status - 1))); errcap &= ~(PCI_ERR_CAP_FEP_MASK | PCI_ERR_CAP_TLP); errcap |= PCI_ERR_CAP_FEP(first_bit); @@ -495,7 +495,7 @@ static int pcie_aer_record_error(PCIDevice *dev, int fep = PCI_ERR_CAP_FEP(errcap); assert(err->status); - assert(err->status & (err->status - 1)); + assert(!(err->status & (err->status - 1))); if (errcap & PCI_ERR_CAP_MHRE && (pci_get_long(aer_cap + PCI_ERR_UNCOR_STATUS) & (1U << fep))) { @@ -979,20 +979,21 @@ int do_pcie_aer_inejct_error(Monitor *mon, if (pcie_aer_parse_error_string(error_name, &error_status, &correctable)) { char *e = NULL; error_status = strtoul(error_name, &e, 0); - correctable = !!qdict_get_int(qdict, "correctable"); + correctable = qdict_get_try_bool(qdict, "correctable", 0); if (!e || *e != '\0') { monitor_printf(mon, "invalid error status value. \"%s\"", error_name); return -EINVAL; } } + err.status = error_status; err.source_id = (pci_bus_num(dev->bus) << 8) | dev->devfn; err.flags = 0; if (correctable) { err.flags |= PCIE_AER_ERR_IS_CORRECTABLE; } - if (qdict_get_int(qdict, "advisory_non_fatal")) { + if (qdict_get_try_bool(qdict, "advisory_non_fatal", 0)) { err.flags |= PCIE_AER_ERR_MAYBE_ADVISORY; } if (qdict_haskey(qdict, "header0")) { diff --git a/hw/scsi-disk.c b/hw/scsi-disk.c index d94b1eb53c..3cc830ff95 100644 --- a/hw/scsi-disk.c +++ b/hw/scsi-disk.c @@ -57,6 +57,7 @@ typedef struct SCSIDiskReq { struct iovec iov; QEMUIOVector qiov; uint32_t status; + BlockAcctCookie acct; } SCSIDiskReq; struct SCSIDiskState @@ -107,10 +108,13 @@ static void scsi_cancel_io(SCSIRequest *req) static void scsi_read_complete(void * opaque, int ret) { SCSIDiskReq *r = (SCSIDiskReq *)opaque; + SCSIDiskState *s = DO_UPCAST(SCSIDiskState, qdev, r->req.dev); int n; r->req.aiocb = NULL; + bdrv_acct_done(s->bs, &r->acct); + if (ret) { if (scsi_handle_rw_error(r, -ret, SCSI_REQ_STATUS_RETRY_READ)) { return; @@ -161,6 +165,8 @@ static void scsi_read_data(SCSIRequest *req) r->iov.iov_len = n * 512; qemu_iovec_init_external(&r->qiov, &r->iov, 1); + + bdrv_acct_start(s->bs, &r->acct, n * BDRV_SECTOR_SIZE, BDRV_ACCT_READ); r->req.aiocb = bdrv_aio_readv(s->bs, r->sector, &r->qiov, n, scsi_read_complete, r); if (r->req.aiocb == NULL) { @@ -207,11 +213,14 @@ static int scsi_handle_rw_error(SCSIDiskReq *r, int error, int type) static void scsi_write_complete(void * opaque, int ret) { SCSIDiskReq *r = (SCSIDiskReq *)opaque; + SCSIDiskState *s = DO_UPCAST(SCSIDiskState, qdev, r->req.dev); uint32_t len; uint32_t n; r->req.aiocb = NULL; + bdrv_acct_done(s->bs, &r->acct); + if (ret) { if (scsi_handle_rw_error(r, -ret, SCSI_REQ_STATUS_RETRY_WRITE)) { return; @@ -252,6 +261,8 @@ static void scsi_write_data(SCSIRequest *req) n = r->iov.iov_len / 512; if (n) { qemu_iovec_init_external(&r->qiov, &r->iov, 1); + + bdrv_acct_start(s->bs, &r->acct, n * BDRV_SECTOR_SIZE, BDRV_ACCT_WRITE); r->req.aiocb = bdrv_aio_writev(s->bs, r->sector, &r->qiov, n, scsi_write_complete, r); if (r->req.aiocb == NULL) { @@ -854,13 +865,19 @@ static int scsi_disk_emulate_command(SCSIDiskReq *r, uint8_t *outbuf) buflen = 8; break; case SYNCHRONIZE_CACHE: + { + BlockAcctCookie acct; + + bdrv_acct_start(s->bs, &acct, 0, BDRV_ACCT_FLUSH); ret = bdrv_flush(s->bs); + bdrv_acct_done(s->bs, &acct); if (ret < 0) { if (scsi_handle_rw_error(r, -ret, SCSI_REQ_STATUS_RETRY_FLUSH)) { return -1; } } break; + } case GET_CONFIGURATION: memset(outbuf, 0, 8); /* ??? This should probably return much more information. For now diff --git a/hw/sh_pci.c b/hw/sh_pci.c index 76061bb756..36f39300d5 100644 --- a/hw/sh_pci.c +++ b/hw/sh_pci.c @@ -150,7 +150,7 @@ static int sh_pci_init_device(SysBusDevice *dev) PCI_DEVFN(0, 0), 4); memory_region_init_io(&s->memconfig_p4, &sh_pci_reg_ops, s, "sh_pci", 0x224); - memory_region_init_alias(&s->memconfig_a7, "sh_pci.2", &s->memconfig_a7, + memory_region_init_alias(&s->memconfig_a7, "sh_pci.2", &s->memconfig_p4, 0, 0x224); isa_mmio_setup(&s->isa, 0x40000); sysbus_init_mmio_cb2(dev, sh_pci_map, sh_pci_unmap); diff --git a/hw/vhost.c b/hw/vhost.c index 18860678ba..0870cb7d85 100644 --- a/hw/vhost.c +++ b/hw/vhost.c @@ -515,11 +515,6 @@ static int vhost_virtqueue_init(struct vhost_dev *dev, }; struct VirtQueue *vvq = virtio_get_queue(vdev, idx); - if (!vdev->binding->set_host_notifier) { - fprintf(stderr, "binding does not support host notifiers\n"); - return -ENOSYS; - } - vq->num = state.num = virtio_queue_get_num(vdev, idx); r = ioctl(dev->control, VHOST_SET_VRING_NUM, &state); if (r) { @@ -567,12 +562,6 @@ static int vhost_virtqueue_init(struct vhost_dev *dev, r = -errno; goto fail_alloc; } - r = vdev->binding->set_host_notifier(vdev->binding_opaque, idx, true); - if (r < 0) { - fprintf(stderr, "Error binding host notifier: %d\n", -r); - goto fail_host_notifier; - } - file.fd = event_notifier_get_fd(virtio_queue_get_host_notifier(vvq)); r = ioctl(dev->control, VHOST_SET_VRING_KICK, &file); if (r) { @@ -591,8 +580,6 @@ static int vhost_virtqueue_init(struct vhost_dev *dev, fail_call: fail_kick: - vdev->binding->set_host_notifier(vdev->binding_opaque, idx, false); -fail_host_notifier: fail_alloc: cpu_physical_memory_unmap(vq->ring, virtio_queue_get_ring_size(vdev, idx), 0, 0); @@ -618,12 +605,6 @@ static void vhost_virtqueue_cleanup(struct vhost_dev *dev, .index = idx, }; int r; - r = vdev->binding->set_host_notifier(vdev->binding_opaque, idx, false); - if (r < 0) { - fprintf(stderr, "vhost VQ %d host cleanup failed: %d\n", idx, r); - fflush(stderr); - } - assert (r >= 0); r = ioctl(dev->control, VHOST_GET_VRING_BASE, &state); if (r < 0) { fprintf(stderr, "vhost VQ %d ring restore failed: %d\n", idx, r); @@ -697,6 +678,60 @@ bool vhost_dev_query(struct vhost_dev *hdev, VirtIODevice *vdev) hdev->force; } +/* Stop processing guest IO notifications in qemu. + * Start processing them in vhost in kernel. + */ +int vhost_dev_enable_notifiers(struct vhost_dev *hdev, VirtIODevice *vdev) +{ + int i, r; + if (!vdev->binding->set_host_notifier) { + fprintf(stderr, "binding does not support host notifiers\n"); + r = -ENOSYS; + goto fail; + } + + for (i = 0; i < hdev->nvqs; ++i) { + r = vdev->binding->set_host_notifier(vdev->binding_opaque, i, true); + if (r < 0) { + fprintf(stderr, "vhost VQ %d notifier binding failed: %d\n", i, -r); + goto fail_vq; + } + } + + return 0; +fail_vq: + while (--i >= 0) { + r = vdev->binding->set_host_notifier(vdev->binding_opaque, i, false); + if (r < 0) { + fprintf(stderr, "vhost VQ %d notifier cleanup error: %d\n", i, -r); + fflush(stderr); + } + assert (r >= 0); + } +fail: + return r; +} + +/* Stop processing guest IO notifications in vhost. + * Start processing them in qemu. + * This might actually run the qemu handlers right away, + * so virtio in qemu must be completely setup when this is called. + */ +void vhost_dev_disable_notifiers(struct vhost_dev *hdev, VirtIODevice *vdev) +{ + int i, r; + + for (i = 0; i < hdev->nvqs; ++i) { + r = vdev->binding->set_host_notifier(vdev->binding_opaque, i, false); + if (r < 0) { + fprintf(stderr, "vhost VQ %d notifier cleanup failed: %d\n", i, -r); + fflush(stderr); + } + assert (r >= 0); + } +} + +/* Host notifiers must be enabled at this point. */ int vhost_dev_start(struct vhost_dev *hdev, VirtIODevice *vdev) { int i, r; @@ -762,6 +797,7 @@ fail: return r; } +/* Host notifiers must be enabled at this point. */ void vhost_dev_stop(struct vhost_dev *hdev, VirtIODevice *vdev) { int i, r; diff --git a/hw/vhost.h b/hw/vhost.h index c8c595a147..c9452f0732 100644 --- a/hw/vhost.h +++ b/hw/vhost.h @@ -46,5 +46,7 @@ void vhost_dev_cleanup(struct vhost_dev *hdev); bool vhost_dev_query(struct vhost_dev *hdev, VirtIODevice *vdev); int vhost_dev_start(struct vhost_dev *hdev, VirtIODevice *vdev); void vhost_dev_stop(struct vhost_dev *hdev, VirtIODevice *vdev); +int vhost_dev_enable_notifiers(struct vhost_dev *hdev, VirtIODevice *vdev); +void vhost_dev_disable_notifiers(struct vhost_dev *hdev, VirtIODevice *vdev); #endif diff --git a/hw/vhost_net.c b/hw/vhost_net.c index a55981200d..950a6b8d99 100644 --- a/hw/vhost_net.c +++ b/hw/vhost_net.c @@ -139,16 +139,22 @@ int vhost_net_start(struct vhost_net *net, { struct vhost_vring_file file = { }; int r; + + net->dev.nvqs = 2; + net->dev.vqs = net->vqs; + + r = vhost_dev_enable_notifiers(&net->dev, dev); + if (r < 0) { + goto fail_notifiers; + } if (net->dev.acked_features & (1 << VIRTIO_NET_F_MRG_RXBUF)) { tap_set_vnet_hdr_len(net->vc, sizeof(struct virtio_net_hdr_mrg_rxbuf)); } - net->dev.nvqs = 2; - net->dev.vqs = net->vqs; r = vhost_dev_start(&net->dev, dev); if (r < 0) { - return r; + goto fail_start; } net->vc->info->poll(net->vc, false); @@ -173,6 +179,9 @@ fail: if (net->dev.acked_features & (1 << VIRTIO_NET_F_MRG_RXBUF)) { tap_set_vnet_hdr_len(net->vc, sizeof(struct virtio_net_hdr)); } +fail_start: + vhost_dev_disable_notifiers(&net->dev, dev); +fail_notifiers: return r; } @@ -190,6 +199,7 @@ void vhost_net_stop(struct vhost_net *net, if (net->dev.acked_features & (1 << VIRTIO_NET_F_MRG_RXBUF)) { tap_set_vnet_hdr_len(net->vc, sizeof(struct virtio_net_hdr)); } + vhost_dev_disable_notifiers(&net->dev, dev); } void vhost_net_cleanup(struct vhost_net *net) diff --git a/hw/virtio-blk.c b/hw/virtio-blk.c index dad8c0a6a2..2a8ccd0aa9 100644 --- a/hw/virtio-blk.c +++ b/hw/virtio-blk.c @@ -47,6 +47,7 @@ typedef struct VirtIOBlockReq struct virtio_scsi_inhdr *scsi; QEMUIOVector qiov; struct VirtIOBlockReq *next; + BlockAcctCookie acct; } VirtIOBlockReq; static void virtio_blk_req_complete(VirtIOBlockReq *req, int status) @@ -58,8 +59,6 @@ static void virtio_blk_req_complete(VirtIOBlockReq *req, int status) stb_p(&req->in->status, status); virtqueue_push(s->vq, &req->elem, req->qiov.size + sizeof(*req->in)); virtio_notify(&s->vdev, s->vq); - - g_free(req); } static int virtio_blk_handle_rw_error(VirtIOBlockReq *req, int error, @@ -81,6 +80,8 @@ static int virtio_blk_handle_rw_error(VirtIOBlockReq *req, int error, vm_stop(VMSTOP_DISKFULL); } else { virtio_blk_req_complete(req, VIRTIO_BLK_S_IOERR); + bdrv_acct_done(s->bs, &req->acct); + g_free(req); bdrv_mon_event(s->bs, BDRV_ACTION_REPORT, is_read); } @@ -100,6 +101,8 @@ static void virtio_blk_rw_complete(void *opaque, int ret) } virtio_blk_req_complete(req, VIRTIO_BLK_S_OK); + bdrv_acct_done(req->dev->bs, &req->acct); + g_free(req); } static void virtio_blk_flush_complete(void *opaque, int ret) @@ -113,6 +116,8 @@ static void virtio_blk_flush_complete(void *opaque, int ret) } virtio_blk_req_complete(req, VIRTIO_BLK_S_OK); + bdrv_acct_done(req->dev->bs, &req->acct); + g_free(req); } static VirtIOBlockReq *virtio_blk_alloc_request(VirtIOBlock *s) @@ -155,6 +160,7 @@ static void virtio_blk_handle_scsi(VirtIOBlockReq *req) */ if (req->elem.out_num < 2 || req->elem.in_num < 3) { virtio_blk_req_complete(req, VIRTIO_BLK_S_IOERR); + g_free(req); return; } @@ -163,6 +169,7 @@ static void virtio_blk_handle_scsi(VirtIOBlockReq *req) */ if (req->elem.out_num > 2 && req->elem.in_num > 3) { virtio_blk_req_complete(req, VIRTIO_BLK_S_UNSUPP); + g_free(req); return; } @@ -229,11 +236,13 @@ static void virtio_blk_handle_scsi(VirtIOBlockReq *req) stl_p(&req->scsi->data_len, hdr.dxfer_len); virtio_blk_req_complete(req, status); + g_free(req); } #else static void virtio_blk_handle_scsi(VirtIOBlockReq *req) { virtio_blk_req_complete(req, VIRTIO_BLK_S_UNSUPP); + g_free(req); } #endif /* __linux__ */ @@ -266,6 +275,8 @@ static void virtio_blk_handle_flush(VirtIOBlockReq *req, MultiReqBuffer *mrb) { BlockDriverAIOCB *acb; + bdrv_acct_start(req->dev->bs, &req->acct, 0, BDRV_ACCT_FLUSH); + /* * Make sure all outstanding writes are posted to the backing device. */ @@ -284,6 +295,8 @@ static void virtio_blk_handle_write(VirtIOBlockReq *req, MultiReqBuffer *mrb) sector = ldq_p(&req->out->sector); + bdrv_acct_start(req->dev->bs, &req->acct, req->qiov.size, BDRV_ACCT_WRITE); + trace_virtio_blk_handle_write(req, sector, req->qiov.size / 512); if (sector & req->dev->sector_mask) { @@ -317,6 +330,8 @@ static void virtio_blk_handle_read(VirtIOBlockReq *req) sector = ldq_p(&req->out->sector); + bdrv_acct_start(req->dev->bs, &req->acct, req->qiov.size, BDRV_ACCT_READ); + if (sector & req->dev->sector_mask) { virtio_blk_rw_complete(req, -EIO); return; @@ -370,6 +385,7 @@ static void virtio_blk_handle_request(VirtIOBlockReq *req, s->serial ? s->serial : "", MIN(req->elem.in_sg[0].iov_len, VIRTIO_BLK_ID_BYTES)); virtio_blk_req_complete(req, VIRTIO_BLK_S_OK); + g_free(req); } else if (type & VIRTIO_BLK_T_OUT) { qemu_iovec_init_external(&req->qiov, &req->elem.out_sg[1], req->elem.out_num - 1); diff --git a/hw/xen_disk.c b/hw/xen_disk.c index 31f91514f2..bd5c66916b 100644 --- a/hw/xen_disk.c +++ b/hw/xen_disk.c @@ -79,6 +79,7 @@ struct ioreq { struct XenBlkDev *blkdev; QLIST_ENTRY(ioreq) list; + BlockAcctCookie acct; }; struct XenBlkDev { @@ -401,6 +402,7 @@ static void qemu_aio_complete(void *opaque, int ret) ioreq->status = ioreq->aio_errors ? BLKIF_RSP_ERROR : BLKIF_RSP_OKAY; ioreq_unmap(ioreq); ioreq_finish(ioreq); + bdrv_acct_done(ioreq->blkdev->bs, &ioreq->acct); qemu_bh_schedule(ioreq->blkdev->bh); } @@ -419,6 +421,7 @@ static int ioreq_runio_qemu_aio(struct ioreq *ioreq) switch (ioreq->req.operation) { case BLKIF_OP_READ: + bdrv_acct_start(blkdev->bs, &ioreq->acct, ioreq->v.size, BDRV_ACCT_READ); ioreq->aio_inflight++; bdrv_aio_readv(blkdev->bs, ioreq->start / BLOCK_SIZE, &ioreq->v, ioreq->v.size / BLOCK_SIZE, @@ -429,6 +432,8 @@ static int ioreq_runio_qemu_aio(struct ioreq *ioreq) if (!ioreq->req.nr_segments) { break; } + + bdrv_acct_start(blkdev->bs, &ioreq->acct, ioreq->v.size, BDRV_ACCT_WRITE); ioreq->aio_inflight++; bdrv_aio_writev(blkdev->bs, ioreq->start / BLOCK_SIZE, &ioreq->v, ioreq->v.size / BLOCK_SIZE, |