diff options
-rw-r--r-- | Makefile | 1 | ||||
-rw-r--r-- | Makefile.objs | 3 | ||||
-rw-r--r-- | arch_init.c | 35 | ||||
-rw-r--r-- | buffered_file.c | 9 | ||||
-rw-r--r-- | cpu-common.h | 3 | ||||
-rw-r--r-- | exec.c | 24 | ||||
-rw-r--r-- | hw/cirrus_vga.c | 4 | ||||
-rw-r--r-- | hw/e1000.c | 4 | ||||
-rw-r--r-- | hw/eepro100.c | 14 | ||||
-rw-r--r-- | hw/hw.h | 8 | ||||
-rw-r--r-- | hw/ide/via.c | 2 | ||||
-rw-r--r-- | hw/ioh3420.c | 80 | ||||
-rw-r--r-- | hw/lsi53c895a.c | 7 | ||||
-rw-r--r-- | hw/openpic.c | 2 | ||||
-rw-r--r-- | hw/pci.c | 174 | ||||
-rw-r--r-- | hw/pci.h | 1 | ||||
-rw-r--r-- | hw/pci_bridge.c | 11 | ||||
-rw-r--r-- | hw/pcie.h | 14 | ||||
-rw-r--r-- | hw/pcie_aer.c | 815 | ||||
-rw-r--r-- | hw/pcie_aer.h | 106 | ||||
-rw-r--r-- | hw/pcie_port.c | 8 | ||||
-rw-r--r-- | hw/pcie_regs.h | 2 | ||||
-rw-r--r-- | hw/qdev.c | 87 | ||||
-rw-r--r-- | hw/qdev.h | 18 | ||||
-rw-r--r-- | hw/usb-ohci.c | 2 | ||||
-rw-r--r-- | hw/vhost.c | 1 | ||||
-rw-r--r-- | hw/virtio.c | 27 | ||||
-rw-r--r-- | hw/xio3130_downstream.c | 43 | ||||
-rw-r--r-- | hw/xio3130_upstream.c | 33 | ||||
-rw-r--r-- | kvm-all.c | 2 | ||||
-rw-r--r-- | migration.c | 6 | ||||
-rw-r--r-- | pc-bios/README | 2 | ||||
-rw-r--r-- | pc-bios/gpxe-eepro100-80861229.rom | bin | 56832 -> 0 bytes | |||
-rw-r--r-- | qemu-common.h | 3 | ||||
-rw-r--r-- | savevm.c | 4 | ||||
-rw-r--r-- | vl.c | 1 |
36 files changed, 1446 insertions, 110 deletions
@@ -208,7 +208,6 @@ BLOBS=bios.bin vgabios.bin vgabios-cirrus.bin \ vgabios-stdvga.bin vgabios-vmware.bin \ ppc_rom.bin openbios-sparc32 openbios-sparc64 openbios-ppc \ gpxe-eepro100-80861209.rom \ -gpxe-eepro100-80861229.rom \ pxe-e1000.bin \ pxe-ne2k_pci.bin pxe-pcnet.bin \ pxe-rtl8139.bin pxe-virtio.bin \ diff --git a/Makefile.objs b/Makefile.objs index 13ba26fdcb..257623bce9 100644 --- a/Makefile.objs +++ b/Makefile.objs @@ -217,7 +217,8 @@ hw-obj-$(CONFIG_PIIX4) += piix4.o # PCI watchdog devices hw-obj-$(CONFIG_PCI) += wdt_i6300esb.o -hw-obj-$(CONFIG_PCI) += pcie.o pcie_port.o +hw-obj-$(CONFIG_PCI) += pcie.o pcie_aer.o pcie_port.o +hw-obj-$(CONFIG_PCI) += msix.o msi.o # PCI network cards hw-obj-$(CONFIG_NE2000_PCI) += ne2000.o diff --git a/arch_init.c b/arch_init.c index 44869255ef..e32e289c8f 100644 --- a/arch_init.c +++ b/arch_init.c @@ -23,6 +23,7 @@ */ #include <stdint.h> #include <stdarg.h> +#include <stdlib.h> #ifndef _WIN32 #include <sys/types.h> #include <sys/mman.h> @@ -212,6 +213,39 @@ uint64_t ram_bytes_total(void) return total; } +static int block_compar(const void *a, const void *b) +{ + RAMBlock * const *ablock = a; + RAMBlock * const *bblock = b; + if ((*ablock)->offset < (*bblock)->offset) { + return -1; + } else if ((*ablock)->offset > (*bblock)->offset) { + return 1; + } + return 0; +} + +static void sort_ram_list(void) +{ + RAMBlock *block, *nblock, **blocks; + int n; + n = 0; + QLIST_FOREACH(block, &ram_list.blocks, next) { + ++n; + } + blocks = qemu_malloc(n * sizeof *blocks); + n = 0; + QLIST_FOREACH_SAFE(block, &ram_list.blocks, next, nblock) { + blocks[n++] = block; + QLIST_REMOVE(block, next); + } + qsort(blocks, n, sizeof *blocks, block_compar); + while (--n >= 0) { + QLIST_INSERT_HEAD(&ram_list.blocks, blocks[n], next); + } + qemu_free(blocks); +} + int ram_save_live(Monitor *mon, QEMUFile *f, int stage, void *opaque) { ram_addr_t addr; @@ -234,6 +268,7 @@ int ram_save_live(Monitor *mon, QEMUFile *f, int stage, void *opaque) bytes_transferred = 0; last_block = NULL; last_offset = 0; + sort_ram_list(); /* Make sure all dirty bits are set */ QLIST_FOREACH(block, &ram_list.blocks, next) { diff --git a/buffered_file.c b/buffered_file.c index 1836e7e242..8435a31946 100644 --- a/buffered_file.c +++ b/buffered_file.c @@ -206,20 +206,23 @@ static int buffered_rate_limit(void *opaque) return 0; } -static size_t buffered_set_rate_limit(void *opaque, size_t new_rate) +static int64_t buffered_set_rate_limit(void *opaque, int64_t new_rate) { QEMUFileBuffered *s = opaque; - if (s->has_error) goto out; + if (new_rate > SIZE_MAX) { + new_rate = SIZE_MAX; + } + s->xfer_limit = new_rate / 10; out: return s->xfer_limit; } -static size_t buffered_get_rate_limit(void *opaque) +static int64_t buffered_get_rate_limit(void *opaque) { QEMUFileBuffered *s = opaque; diff --git a/cpu-common.h b/cpu-common.h index a543b5d7cf..bb6b137e16 100644 --- a/cpu-common.h +++ b/cpu-common.h @@ -46,6 +46,9 @@ ram_addr_t qemu_ram_alloc(DeviceState *dev, const char *name, ram_addr_t size); void qemu_ram_free(ram_addr_t addr); /* This should only be used for ram local to a device. */ void *qemu_get_ram_ptr(ram_addr_t addr); +/* Same but slower, to use for migration, where the order of + * RAMBlocks must not change. */ +void *qemu_safe_ram_ptr(ram_addr_t addr); /* This should not be used by devices. */ int qemu_ram_addr_from_host(void *ptr, ram_addr_t *ram_addr); ram_addr_t qemu_ram_addr_from_host_nofail(void *ptr); @@ -2030,10 +2030,10 @@ void cpu_physical_memory_reset_dirty(ram_addr_t start, ram_addr_t end, /* we modify the TLB cache so that the dirty bit will be set again when accessing the range */ - start1 = (unsigned long)qemu_get_ram_ptr(start); + start1 = (unsigned long)qemu_safe_ram_ptr(start); /* Chek that we don't span multiple blocks - this breaks the address comparisons below. */ - if ((unsigned long)qemu_get_ram_ptr(end - 1) - start1 + if ((unsigned long)qemu_safe_ram_ptr(end - 1) - start1 != (end - 1) - start) { abort(); } @@ -2858,6 +2858,7 @@ ram_addr_t qemu_ram_alloc_from_ptr(DeviceState *dev, const char *name, new_block->length = size; QLIST_INSERT_HEAD(&ram_list.blocks, new_block, next); + fprintf(stderr, "alloc ram %s len 0x%x\n", new_block->idstr, (int)new_block->length); ram_list.phys_dirty = qemu_realloc(ram_list.phys_dirty, last_ram_offset() >> TARGET_PAGE_BITS); @@ -2931,6 +2932,25 @@ void *qemu_get_ram_ptr(ram_addr_t addr) return NULL; } +/* Return a host pointer to ram allocated with qemu_ram_alloc. + * Same as qemu_get_ram_ptr but avoid reordering ramblocks. + */ +void *qemu_safe_ram_ptr(ram_addr_t addr) +{ + RAMBlock *block; + + QLIST_FOREACH(block, &ram_list.blocks, next) { + if (addr - block->offset < block->length) { + return block->host + (addr - block->offset); + } + } + + fprintf(stderr, "Bad ram offset %" PRIx64 "\n", (uint64_t)addr); + abort(); + + return NULL; +} + int qemu_ram_addr_from_host(void *ptr, ram_addr_t *ram_addr) { RAMBlock *block; diff --git a/hw/cirrus_vga.c b/hw/cirrus_vga.c index aadc56f692..40be55d1b1 100644 --- a/hw/cirrus_vga.c +++ b/hw/cirrus_vga.c @@ -3204,10 +3204,10 @@ static int pci_cirrus_vga_initfn(PCIDevice *dev) /* memory #0 LFB */ /* memory #1 memory-mapped I/O */ /* XXX: s->vga.vram_size must be a power of two */ - pci_register_bar((PCIDevice *)d, 0, 0x2000000, + pci_register_bar(&d->dev, 0, 0x2000000, PCI_BASE_ADDRESS_MEM_PREFETCH, cirrus_pci_lfb_map); if (device_id == CIRRUS_ID_CLGD5446) { - pci_register_bar((PCIDevice *)d, 1, CIRRUS_PNPMMIO_SIZE, + pci_register_bar(&d->dev, 1, CIRRUS_PNPMMIO_SIZE, PCI_BASE_ADDRESS_SPACE_MEMORY, cirrus_pci_mmio_map); } return 0; diff --git a/hw/e1000.c b/hw/e1000.c index 7811699ea9..57d08cfa35 100644 --- a/hw/e1000.c +++ b/hw/e1000.c @@ -1133,10 +1133,10 @@ static int pci_e1000_init(PCIDevice *pci_dev) d->mmio_index = cpu_register_io_memory(e1000_mmio_read, e1000_mmio_write, d); - pci_register_bar((PCIDevice *)d, 0, PNPMMIO_SIZE, + pci_register_bar(&d->dev, 0, PNPMMIO_SIZE, PCI_BASE_ADDRESS_SPACE_MEMORY, e1000_mmio_map); - pci_register_bar((PCIDevice *)d, 1, IOPORT_SIZE, + pci_register_bar(&d->dev, 1, IOPORT_SIZE, PCI_BASE_ADDRESS_SPACE_IO, ioport_map); memmove(d->eeprom_data, e1000_eeprom_template, diff --git a/hw/eepro100.c b/hw/eepro100.c index 41d792ad24..f8a700a2b8 100644 --- a/hw/eepro100.c +++ b/hw/eepro100.c @@ -2048,17 +2048,9 @@ static void eepro100_register_devices(void) size_t i; for (i = 0; i < ARRAY_SIZE(e100_devices); i++) { PCIDeviceInfo *pci_dev = &e100_devices[i].pci; - switch (e100_devices[i].device_id) { - case PCI_DEVICE_ID_INTEL_82551IT: - pci_dev->romfile = "gpxe-eepro100-80861209.rom"; - break; - case PCI_DEVICE_ID_INTEL_82557: - pci_dev->romfile = "gpxe-eepro100-80861229.rom"; - break; - case 0x2449: - pci_dev->romfile = "gpxe-eepro100-80862449.rom"; - break; - } + /* We use the same rom file for all device ids. + QEMU fixes the device id during rom load. */ + pci_dev->romfile = "gpxe-eepro100-80861209.rom"; pci_dev->init = e100_nic_init; pci_dev->exit = pci_nic_uninit; pci_dev->qdev.props = e100_properties; @@ -39,8 +39,8 @@ typedef int (QEMUFileRateLimit)(void *opaque); * the new actual bandwidth. It should be new_rate if everything goes ok, and * the old rate otherwise */ -typedef size_t (QEMUFileSetRateLimit)(void *opaque, size_t new_rate); -typedef size_t (QEMUFileGetRateLimit)(void *opaque); +typedef int64_t (QEMUFileSetRateLimit)(void *opaque, int64_t new_rate); +typedef int64_t (QEMUFileGetRateLimit)(void *opaque); QEMUFile *qemu_fopen_ops(void *opaque, QEMUFilePutBufferFunc *put_buffer, QEMUFileGetBufferFunc *get_buffer, @@ -83,8 +83,8 @@ unsigned int qemu_get_be16(QEMUFile *f); unsigned int qemu_get_be32(QEMUFile *f); uint64_t qemu_get_be64(QEMUFile *f); int qemu_file_rate_limit(QEMUFile *f); -size_t qemu_file_set_rate_limit(QEMUFile *f, size_t new_rate); -size_t qemu_file_get_rate_limit(QEMUFile *f); +int64_t qemu_file_set_rate_limit(QEMUFile *f, int64_t new_rate); +int64_t qemu_file_get_rate_limit(QEMUFile *f); int qemu_file_has_error(QEMUFile *f); void qemu_file_set_error(QEMUFile *f); diff --git a/hw/ide/via.c b/hw/ide/via.c index 3e41d005b1..66be0c4cce 100644 --- a/hw/ide/via.c +++ b/hw/ide/via.c @@ -149,7 +149,7 @@ static int vt82c686b_ide_initfn(PCIDevice *dev) pci_set_long(pci_conf + PCI_CAPABILITY_LIST, 0x000000c0); qemu_register_reset(via_reset, d); - pci_register_bar((PCIDevice *)d, 4, 0x10, + pci_register_bar(&d->dev, 4, 0x10, PCI_BASE_ADDRESS_SPACE_IO, bmdma_map); vmstate_register(&dev->qdev, 0, &vmstate_ide_pci, d); diff --git a/hw/ioh3420.c b/hw/ioh3420.c index 3cc129f50b..95adf0978f 100644 --- a/hw/ioh3420.c +++ b/hw/ioh3420.c @@ -36,25 +36,59 @@ #define IOH_EP_EXP_OFFSET 0x90 #define IOH_EP_AER_OFFSET 0x100 +/* + * If two MSI vector are allocated, Advanced Error Interrupt Message Number + * is 1. otherwise 0. + * 17.12.5.10 RPERRSTS, 32:27 bit Advanced Error Interrupt Message Number. + */ +static uint8_t ioh3420_aer_vector(const PCIDevice *d) +{ + switch (msi_nr_vectors_allocated(d)) { + case 1: + return 0; + case 2: + return 1; + case 4: + case 8: + case 16: + case 32: + default: + break; + } + abort(); + return 0; +} + +static void ioh3420_aer_vector_update(PCIDevice *d) +{ + pcie_aer_root_set_vector(d, ioh3420_aer_vector(d)); +} + static void ioh3420_write_config(PCIDevice *d, uint32_t address, uint32_t val, int len) { + uint32_t root_cmd = + pci_get_long(d->config + d->exp.aer_cap + PCI_ERR_ROOT_COMMAND); + pci_bridge_write_config(d, address, val, len); msi_write_config(d, address, val, len); + ioh3420_aer_vector_update(d); pcie_cap_slot_write_config(d, address, val, len); - /* TODO: AER */ + pcie_aer_write_config(d, address, val, len); + pcie_aer_root_write_config(d, address, val, len, root_cmd); } static void ioh3420_reset(DeviceState *qdev) { PCIDevice *d = DO_UPCAST(PCIDevice, qdev, qdev); msi_reset(d); + ioh3420_aer_vector_update(d); pcie_cap_root_reset(d); pcie_cap_deverr_reset(d); pcie_cap_slot_reset(d); + pcie_aer_root_reset(d); pci_bridge_reset(qdev); pci_bridge_disable_base_limit(d); - /* TODO: AER */ } static int ioh3420_initfn(PCIDevice *d) @@ -63,6 +97,7 @@ static int ioh3420_initfn(PCIDevice *d) PCIEPort *p = DO_UPCAST(PCIEPort, br, br); PCIESlot *s = DO_UPCAST(PCIESlot, port, p); int rc; + int tmp; rc = pci_bridge_initfn(d); if (rc < 0) { @@ -78,35 +113,57 @@ static int ioh3420_initfn(PCIDevice *d) rc = pci_bridge_ssvid_init(d, IOH_EP_SSVID_OFFSET, IOH_EP_SSVID_SVID, IOH_EP_SSVID_SSID); if (rc < 0) { - return rc; + goto err_bridge; } rc = msi_init(d, IOH_EP_MSI_OFFSET, IOH_EP_MSI_NR_VECTOR, IOH_EP_MSI_SUPPORTED_FLAGS & PCI_MSI_FLAGS_64BIT, IOH_EP_MSI_SUPPORTED_FLAGS & PCI_MSI_FLAGS_MASKBIT); if (rc < 0) { - return rc; + goto err_bridge; } rc = pcie_cap_init(d, IOH_EP_EXP_OFFSET, PCI_EXP_TYPE_ROOT_PORT, p->port); if (rc < 0) { - return rc; + goto err_msi; } pcie_cap_deverr_init(d); pcie_cap_slot_init(d, s->slot); pcie_chassis_create(s->chassis); rc = pcie_chassis_add_slot(s); if (rc < 0) { + goto err_pcie_cap; return rc; } pcie_cap_root_init(d); - /* TODO: AER */ + rc = pcie_aer_init(d, IOH_EP_AER_OFFSET); + if (rc < 0) { + goto err; + } + pcie_aer_root_init(d); + ioh3420_aer_vector_update(d); return 0; + +err: + pcie_chassis_del_slot(s); +err_pcie_cap: + pcie_cap_exit(d); +err_msi: + msi_uninit(d); +err_bridge: + tmp = pci_bridge_exitfn(d); + assert(!tmp); + return rc; } static int ioh3420_exitfn(PCIDevice *d) { - /* TODO: AER */ - msi_uninit(d); + PCIBridge* br = DO_UPCAST(PCIBridge, dev, d); + PCIEPort *p = DO_UPCAST(PCIEPort, br, br); + PCIESlot *s = DO_UPCAST(PCIESlot, port, p); + + pcie_aer_exit(d); + pcie_chassis_del_slot(s); pcie_cap_exit(d); + msi_uninit(d); return pci_bridge_exitfn(d); } @@ -142,7 +199,8 @@ static const VMStateDescription vmstate_ioh3420 = { .post_load = pcie_cap_slot_post_load, .fields = (VMStateField[]) { VMSTATE_PCIE_DEVICE(port.br.dev, PCIESlot), - /* TODO: AER */ + VMSTATE_STRUCT(port.br.dev.exp.aer_log, PCIESlot, 0, + vmstate_pcie_aer_log, PCIEAERLog), VMSTATE_END_OF_LIST() } }; @@ -164,7 +222,9 @@ static PCIDeviceInfo ioh3420_info = { DEFINE_PROP_UINT8("port", PCIESlot, port.port, 0), DEFINE_PROP_UINT8("chassis", PCIESlot, chassis, 0), DEFINE_PROP_UINT16("slot", PCIESlot, slot, 0), - /* TODO: AER */ + DEFINE_PROP_UINT16("aer_log_max", PCIESlot, + port.br.dev.exp.aer_log.log_max, + PCIE_AER_LOG_MAX_DEFAULT), DEFINE_PROP_END_OF_LIST(), } }; diff --git a/hw/lsi53c895a.c b/hw/lsi53c895a.c index f97335eaa9..1aef62f9a6 100644 --- a/hw/lsi53c895a.c +++ b/hw/lsi53c895a.c @@ -2177,12 +2177,11 @@ static int lsi_scsi_init(PCIDevice *dev) s->ram_io_addr = cpu_register_io_memory(lsi_ram_readfn, lsi_ram_writefn, s); - /* TODO: use dev and get rid of cast below */ - pci_register_bar((struct PCIDevice *)s, 0, 256, + pci_register_bar(&s->dev, 0, 256, PCI_BASE_ADDRESS_SPACE_IO, lsi_io_mapfunc); - pci_register_bar((struct PCIDevice *)s, 1, 0x400, + pci_register_bar(&s->dev, 1, 0x400, PCI_BASE_ADDRESS_SPACE_MEMORY, lsi_mmio_mapfunc); - pci_register_bar((struct PCIDevice *)s, 2, 0x2000, + pci_register_bar(&s->dev, 2, 0x2000, PCI_BASE_ADDRESS_SPACE_MEMORY, lsi_ram_mapfunc); QTAILQ_INIT(&s->queue); diff --git a/hw/openpic.c b/hw/openpic.c index 01bf15fc3b..f6b8f21272 100644 --- a/hw/openpic.c +++ b/hw/openpic.c @@ -1197,7 +1197,7 @@ qemu_irq *openpic_init (PCIBus *bus, int *pmem_index, int nb_cpus, pci_conf[0x3d] = 0x00; // no interrupt pin /* Register I/O spaces */ - pci_register_bar((PCIDevice *)opp, 0, 0x40000, + pci_register_bar(&opp->pci_dev, 0, 0x40000, PCI_BASE_ADDRESS_SPACE_MEMORY, &openpic_map); } else { opp = qemu_mallocz(sizeof(openpic_t)); @@ -43,12 +43,14 @@ static void pcibus_dev_print(Monitor *mon, DeviceState *dev, int indent); static char *pcibus_get_dev_path(DeviceState *dev); +static int pcibus_reset(BusState *qbus); struct BusInfo pci_bus_info = { .name = "PCI", .size = sizeof(PCIBus), .print_dev = pcibus_dev_print, .get_dev_path = pcibus_get_dev_path, + .reset = pcibus_reset, .props = (Property[]) { DEFINE_PROP_PCI_DEVFN("addr", PCIDevice, devfn, -1), DEFINE_PROP_STRING("romfile", PCIDevice, romfile), @@ -61,7 +63,7 @@ struct BusInfo pci_bus_info = { static void pci_update_mappings(PCIDevice *d); static void pci_set_irq(void *opaque, int irq_num, int level); -static int pci_add_option_rom(PCIDevice *pdev); +static int pci_add_option_rom(PCIDevice *pdev, bool is_default_rom); static void pci_del_option_rom(PCIDevice *pdev); static uint16_t pci_default_sub_vendor_id = PCI_SUBVENDOR_ID_REDHAT_QUMRANET; @@ -136,6 +138,11 @@ static void pci_update_irq_status(PCIDevice *dev) static void pci_device_reset(PCIDevice *dev) { int r; + /* TODO: call the below unconditionally once all pci devices + * are qdevified */ + if (dev->qdev.info) { + qdev_reset_all(&dev->qdev); + } dev->irq_state = 0; pci_update_irq_status(dev); @@ -143,6 +150,9 @@ static void pci_device_reset(PCIDevice *dev) pci_word_test_and_clear_mask(dev->config + PCI_COMMAND, pci_get_word(dev->wmask + PCI_COMMAND) | pci_get_word(dev->w1cmask + PCI_COMMAND)); + pci_word_test_and_clear_mask(dev->config + PCI_STATUS, + pci_get_word(dev->wmask + PCI_STATUS) | + pci_get_word(dev->w1cmask + PCI_STATUS)); dev->config[PCI_CACHE_LINE_SIZE] = 0x0; dev->config[PCI_INTERRUPT_LINE] = 0x0; for (r = 0; r < PCI_NUM_REGIONS; ++r) { @@ -161,9 +171,12 @@ static void pci_device_reset(PCIDevice *dev) pci_update_mappings(dev); } -static void pci_bus_reset(void *opaque) +/* + * Trigger pci bus reset under a given bus. + * To be called on RST# assert. + */ +void pci_bus_reset(PCIBus *bus) { - PCIBus *bus = opaque; int i; for (i = 0; i < bus->nirq; i++) { @@ -176,6 +189,15 @@ static void pci_bus_reset(void *opaque) } } +static int pcibus_reset(BusState *qbus) +{ + pci_bus_reset(DO_UPCAST(PCIBus, qbus, qbus)); + + /* topology traverse is done by pci_bus_reset(). + Tell qbus/qdev walker not to traverse the tree */ + return 1; +} + static void pci_host_bus_register(int domain, PCIBus *bus) { struct PCIHostBus *host; @@ -230,7 +252,6 @@ void pci_bus_new_inplace(PCIBus *bus, DeviceState *parent, pci_host_bus_register(0, bus); /* for now only pci domain 0 is supported */ vmstate_register(NULL, -1, &vmstate_pcibus, bus); - qemu_register_reset(pci_bus_reset, bus); } PCIBus *pci_bus_new(DeviceState *parent, const char *name, int devfn_min) @@ -552,6 +573,18 @@ static void pci_init_wmask(PCIDevice *dev) config_size - PCI_CONFIG_HEADER_SIZE); } +static void pci_init_w1cmask(PCIDevice *dev) +{ + /* + * Note: It's okay to set w1cmask even for readonly bits as + * long as their value is hardwired to 0. + */ + pci_set_word(dev->w1cmask + PCI_STATUS, + PCI_STATUS_PARITY | PCI_STATUS_SIG_TARGET_ABORT | + PCI_STATUS_REC_TARGET_ABORT | PCI_STATUS_REC_MASTER_ABORT | + PCI_STATUS_SIG_SYSTEM_ERROR | PCI_STATUS_DETECTED_PARITY); +} + static void pci_init_wmask_bridge(PCIDevice *d) { /* PCI_PRIMARY_BUS, PCI_SECONDARY_BUS, PCI_SUBORDINATE_BUS and @@ -573,7 +606,29 @@ static void pci_init_wmask_bridge(PCIDevice *d) /* PCI_PREF_BASE_UPPER32 and PCI_PREF_LIMIT_UPPER32 */ memset(d->wmask + PCI_PREF_BASE_UPPER32, 0xff, 8); - pci_set_word(d->wmask + PCI_BRIDGE_CONTROL, 0xffff); +/* TODO: add this define to pci_regs.h in linux and then in qemu. */ +#define PCI_BRIDGE_CTL_VGA_16BIT 0x10 /* VGA 16-bit decode */ +#define PCI_BRIDGE_CTL_DISCARD 0x100 /* Primary discard timer */ +#define PCI_BRIDGE_CTL_SEC_DISCARD 0x200 /* Secondary discard timer */ +#define PCI_BRIDGE_CTL_DISCARD_STATUS 0x400 /* Discard timer status */ +#define PCI_BRIDGE_CTL_DISCARD_SERR 0x800 /* Discard timer SERR# enable */ + pci_set_word(d->wmask + PCI_BRIDGE_CONTROL, + PCI_BRIDGE_CTL_PARITY | + PCI_BRIDGE_CTL_SERR | + PCI_BRIDGE_CTL_ISA | + PCI_BRIDGE_CTL_VGA | + PCI_BRIDGE_CTL_VGA_16BIT | + PCI_BRIDGE_CTL_MASTER_ABORT | + PCI_BRIDGE_CTL_BUS_RESET | + PCI_BRIDGE_CTL_FAST_BACK | + PCI_BRIDGE_CTL_DISCARD | + PCI_BRIDGE_CTL_SEC_DISCARD | + PCI_BRIDGE_CTL_DISCARD_STATUS | + PCI_BRIDGE_CTL_DISCARD_SERR); + /* Below does not do anything as we never set this bit, put here for + * completeness. */ + pci_set_word(d->w1cmask + PCI_BRIDGE_CONTROL, + PCI_BRIDGE_CTL_DISCARD_STATUS); } static int pci_init_multifunction(PCIBus *bus, PCIDevice *dev) @@ -676,6 +731,7 @@ static PCIDevice *do_pci_register_device(PCIDevice *pci_dev, PCIBus *bus, } pci_init_cmask(pci_dev); pci_init_wmask(pci_dev); + pci_init_w1cmask(pci_dev); if (is_bridge) { pci_init_wmask_bridge(pci_dev); } @@ -1484,6 +1540,16 @@ void pci_bridge_update_mappings(PCIBus *b) } } +/* Whether a given bus number is in range of the secondary + * bus of the given bridge device. */ +static bool pci_secondary_bus_in_range(PCIDevice *dev, int bus_num) +{ + return !(pci_get_word(dev->config + PCI_BRIDGE_CONTROL) & + PCI_BRIDGE_CTL_BUS_RESET) /* Don't walk the bus if it's reset. */ && + dev->config[PCI_SECONDARY_BUS] < bus_num && + bus_num <= dev->config[PCI_SUBORDINATE_BUS]; +} + PCIBus *pci_find_bus(PCIBus *bus, int bus_num) { PCIBus *sec; @@ -1496,20 +1562,21 @@ PCIBus *pci_find_bus(PCIBus *bus, int bus_num) return bus; } + /* Consider all bus numbers in range for the host pci bridge. */ + if (bus->parent_dev && + !pci_secondary_bus_in_range(bus->parent_dev, bus_num)) { + return NULL; + } + /* try child bus */ - if (!bus->parent_dev /* host pci bridge */ || - (bus->parent_dev->config[PCI_SECONDARY_BUS] < bus_num && - bus_num <= bus->parent_dev->config[PCI_SUBORDINATE_BUS])) { - for (; bus; bus = sec) { - QLIST_FOREACH(sec, &bus->child, sibling) { - assert(sec->parent_dev); - if (sec->parent_dev->config[PCI_SECONDARY_BUS] == bus_num) { - return sec; - } - if (sec->parent_dev->config[PCI_SECONDARY_BUS] < bus_num && - bus_num <= sec->parent_dev->config[PCI_SUBORDINATE_BUS]) { - break; - } + for (; bus; bus = sec) { + QLIST_FOREACH(sec, &bus->child, sibling) { + assert(sec->parent_dev); + if (sec->parent_dev->config[PCI_SECONDARY_BUS] == bus_num) { + return sec; + } + if (pci_secondary_bus_in_range(sec->parent_dev, bus_num)) { + break; } } } @@ -1533,6 +1600,7 @@ static int pci_qdev_init(DeviceState *qdev, DeviceInfo *base) PCIDeviceInfo *info = container_of(base, PCIDeviceInfo, qdev); PCIBus *bus; int devfn, rc; + bool is_default_rom; /* initialize cap_present for pci_is_express() and pci_config_size() */ if (info->is_express) { @@ -1553,9 +1621,12 @@ static int pci_qdev_init(DeviceState *qdev, DeviceInfo *base) } /* rom loading */ - if (pci_dev->romfile == NULL && info->romfile != NULL) + is_default_rom = false; + if (pci_dev->romfile == NULL && info->romfile != NULL) { pci_dev->romfile = qemu_strdup(info->romfile); - pci_add_option_rom(pci_dev); + is_default_rom = true; + } + pci_add_option_rom(pci_dev, is_default_rom); if (bus->hotplug) { /* Let buses differentiate between hotplug and when device is @@ -1663,8 +1734,64 @@ static void pci_map_option_rom(PCIDevice *pdev, int region_num, pcibus_t addr, p cpu_register_physical_memory(addr, size, pdev->rom_offset); } +/* Patch the PCI vendor and device ids in a PCI rom image if necessary. + This is needed for an option rom which is used for more than one device. */ +static void pci_patch_ids(PCIDevice *pdev, uint8_t *ptr, int size) +{ + uint16_t vendor_id; + uint16_t device_id; + uint16_t rom_vendor_id; + uint16_t rom_device_id; + uint16_t rom_magic; + uint16_t pcir_offset; + uint8_t checksum; + + /* Words in rom data are little endian (like in PCI configuration), + so they can be read / written with pci_get_word / pci_set_word. */ + + /* Only a valid rom will be patched. */ + rom_magic = pci_get_word(ptr); + if (rom_magic != 0xaa55) { + PCI_DPRINTF("Bad ROM magic %04x\n", rom_magic); + return; + } + pcir_offset = pci_get_word(ptr + 0x18); + if (pcir_offset + 8 >= size || memcmp(ptr + pcir_offset, "PCIR", 4)) { + PCI_DPRINTF("Bad PCIR offset 0x%x or signature\n", pcir_offset); + return; + } + + vendor_id = pci_get_word(pdev->config + PCI_VENDOR_ID); + device_id = pci_get_word(pdev->config + PCI_DEVICE_ID); + rom_vendor_id = pci_get_word(ptr + pcir_offset + 4); + rom_device_id = pci_get_word(ptr + pcir_offset + 6); + + PCI_DPRINTF("%s: ROM id %04x%04x / PCI id %04x%04x\n", pdev->romfile, + vendor_id, device_id, rom_vendor_id, rom_device_id); + + checksum = ptr[6]; + + if (vendor_id != rom_vendor_id) { + /* Patch vendor id and checksum (at offset 6 for etherboot roms). */ + checksum += (uint8_t)rom_vendor_id + (uint8_t)(rom_vendor_id >> 8); + checksum -= (uint8_t)vendor_id + (uint8_t)(vendor_id >> 8); + PCI_DPRINTF("ROM checksum %02x / %02x\n", ptr[6], checksum); + ptr[6] = checksum; + pci_set_word(ptr + pcir_offset + 4, vendor_id); + } + + if (device_id != rom_device_id) { + /* Patch device id and checksum (at offset 6 for etherboot roms). */ + checksum += (uint8_t)rom_device_id + (uint8_t)(rom_device_id >> 8); + checksum -= (uint8_t)device_id + (uint8_t)(device_id >> 8); + PCI_DPRINTF("ROM checksum %02x / %02x\n", ptr[6], checksum); + ptr[6] = checksum; + pci_set_word(ptr + pcir_offset + 6, device_id); + } +} + /* Add an option rom for the device */ -static int pci_add_option_rom(PCIDevice *pdev) +static int pci_add_option_rom(PCIDevice *pdev, bool is_default_rom) { int size; char *path; @@ -1715,6 +1842,11 @@ static int pci_add_option_rom(PCIDevice *pdev) load_image(path, ptr); qemu_free(path); + if (is_default_rom) { + /* Only the default rom images will be patched (if needed). */ + pci_patch_ids(pdev, ptr, size); + } + pci_register_bar(pdev, PCI_ROM_SLOT, size, 0, pci_map_option_rom); @@ -232,6 +232,7 @@ void pci_bus_hotplug(PCIBus *bus, pci_hotplug_fn hotplug, DeviceState *dev); PCIBus *pci_register_bus(DeviceState *parent, const char *name, pci_set_irq_fn set_irq, pci_map_irq_fn map_irq, void *irq_opaque, int devfn_min, int nirq); +void pci_bus_reset(PCIBus *bus); void pci_bus_set_mem_base(PCIBus *bus, target_phys_addr_t base); diff --git a/hw/pci_bridge.c b/hw/pci_bridge.c index 58cc2e4cee..464d89708f 100644 --- a/hw/pci_bridge.c +++ b/hw/pci_bridge.c @@ -139,6 +139,10 @@ pcibus_t pci_bridge_get_limit(const PCIDevice *bridge, uint8_t type) void pci_bridge_write_config(PCIDevice *d, uint32_t address, uint32_t val, int len) { + PCIBridge *s = container_of(d, PCIBridge, dev); + uint16_t oldctl = pci_get_word(d->config + PCI_BRIDGE_CONTROL); + uint16_t newctl; + pci_default_write_config(d, address, val, len); if (/* io base/limit */ @@ -147,9 +151,14 @@ void pci_bridge_write_config(PCIDevice *d, /* memory base/limit, prefetchable base/limit and io base/limit upper 16 */ ranges_overlap(address, len, PCI_MEMORY_BASE, 20)) { - PCIBridge *s = container_of(d, PCIBridge, dev); pci_bridge_update_mappings(&s->sec_bus); } + + newctl = pci_get_word(d->config + PCI_BRIDGE_CONTROL); + if (~oldctl & newctl & PCI_BRIDGE_CTL_BUS_RESET) { + /* Trigger hot reset on 0->1 transition. */ + pci_bus_reset(&s->sec_bus); + } } void pci_bridge_disable_base_limit(PCIDevice *dev) @@ -24,6 +24,7 @@ #include "hw.h" #include "pci_regs.h" #include "pcie_regs.h" +#include "pcie_aer.h" typedef enum { /* for attention and power indicator */ @@ -79,6 +80,19 @@ struct PCIExpressDevice { Software Notification of Hot-Plug Events, an interrupt is sent whenever the logical and of these conditions transitions from false to true. */ + + /* AER */ + uint16_t aer_cap; + PCIEAERLog aer_log; + unsigned int aer_intx; /* INTx for error reporting + * default is 0 = INTA# + * If the chip wants to use other interrupt + * line, initialize this member with the + * desired number. + * If the chip dynamically changes this member, + * also initialize it when loaded as + * appropreately. + */ }; /* PCI express capability helper functions */ diff --git a/hw/pcie_aer.c b/hw/pcie_aer.c new file mode 100644 index 0000000000..235ac534d7 --- /dev/null +++ b/hw/pcie_aer.c @@ -0,0 +1,815 @@ +/* + * pcie_aer.c + * + * Copyright (c) 2010 Isaku Yamahata <yamahata at valinux co jp> + * VA Linux Systems Japan K.K. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License along + * with this program; if not, see <http://www.gnu.org/licenses/>. + */ + +#include "sysemu.h" +#include "pci_bridge.h" +#include "pcie.h" +#include "msix.h" +#include "msi.h" +#include "pci_internals.h" +#include "pcie_regs.h" + +//#define DEBUG_PCIE +#ifdef DEBUG_PCIE +# define PCIE_DPRINTF(fmt, ...) \ + fprintf(stderr, "%s:%d " fmt, __func__, __LINE__, ## __VA_ARGS__) +#else +# define PCIE_DPRINTF(fmt, ...) do {} while (0) +#endif +#define PCIE_DEV_PRINTF(dev, fmt, ...) \ + PCIE_DPRINTF("%s:%x "fmt, (dev)->name, (dev)->devfn, ## __VA_ARGS__) + +/* From 6.2.7 Error Listing and Rules. Table 6-2, 6-3 and 6-4 */ +static uint32_t pcie_aer_uncor_default_severity(uint32_t status) +{ + switch (status) { + case PCI_ERR_UNC_INTN: + case PCI_ERR_UNC_DLP: + case PCI_ERR_UNC_SDN: + case PCI_ERR_UNC_RX_OVER: + case PCI_ERR_UNC_FCP: + case PCI_ERR_UNC_MALF_TLP: + return PCI_ERR_ROOT_CMD_FATAL_EN; + case PCI_ERR_UNC_POISON_TLP: + case PCI_ERR_UNC_ECRC: + case PCI_ERR_UNC_UNSUP: + case PCI_ERR_UNC_COMP_TIME: + case PCI_ERR_UNC_COMP_ABORT: + case PCI_ERR_UNC_UNX_COMP: + case PCI_ERR_UNC_ACSV: + case PCI_ERR_UNC_MCBTLP: + case PCI_ERR_UNC_ATOP_EBLOCKED: + case PCI_ERR_UNC_TLP_PRF_BLOCKED: + return PCI_ERR_ROOT_CMD_NONFATAL_EN; + default: + abort(); + break; + } + return PCI_ERR_ROOT_CMD_FATAL_EN; +} + +static int aer_log_add_err(PCIEAERLog *aer_log, const PCIEAERErr *err) +{ + if (aer_log->log_num == aer_log->log_max) { + return -1; + } + memcpy(&aer_log->log[aer_log->log_num], err, sizeof *err); + aer_log->log_num++; + return 0; +} + +static void aer_log_del_err(PCIEAERLog *aer_log, PCIEAERErr *err) +{ + assert(aer_log->log_num); + *err = aer_log->log[0]; + aer_log->log_num--; + memmove(&aer_log->log[0], &aer_log->log[1], + aer_log->log_num * sizeof *err); +} + +static void aer_log_clear_all_err(PCIEAERLog *aer_log) +{ + aer_log->log_num = 0; +} + +int pcie_aer_init(PCIDevice *dev, uint16_t offset) +{ + PCIExpressDevice *exp; + + pcie_add_capability(dev, PCI_EXT_CAP_ID_ERR, PCI_ERR_VER, + offset, PCI_ERR_SIZEOF); + exp = &dev->exp; + exp->aer_cap = offset; + + /* log_max is property */ + if (dev->exp.aer_log.log_max == PCIE_AER_LOG_MAX_UNSET) { + dev->exp.aer_log.log_max = PCIE_AER_LOG_MAX_DEFAULT; + } + /* clip down the value to avoid unreasobale memory usage */ + if (dev->exp.aer_log.log_max > PCIE_AER_LOG_MAX_LIMIT) { + return -EINVAL; + } + dev->exp.aer_log.log = qemu_mallocz(sizeof dev->exp.aer_log.log[0] * + dev->exp.aer_log.log_max); + + pci_set_long(dev->w1cmask + offset + PCI_ERR_UNCOR_STATUS, + PCI_ERR_UNC_SUPPORTED); + + pci_set_long(dev->config + offset + PCI_ERR_UNCOR_SEVER, + PCI_ERR_UNC_SEVERITY_DEFAULT); + pci_set_long(dev->wmask + offset + PCI_ERR_UNCOR_SEVER, + PCI_ERR_UNC_SUPPORTED); + + pci_long_test_and_set_mask(dev->w1cmask + offset + PCI_ERR_COR_STATUS, + PCI_ERR_COR_STATUS); + + pci_set_long(dev->config + offset + PCI_ERR_COR_MASK, + PCI_ERR_COR_MASK_DEFAULT); + pci_set_long(dev->wmask + offset + PCI_ERR_COR_MASK, + PCI_ERR_COR_SUPPORTED); + + /* capabilities and control. multiple header logging is supported */ + if (dev->exp.aer_log.log_max > 0) { + pci_set_long(dev->config + offset + PCI_ERR_CAP, + PCI_ERR_CAP_ECRC_GENC | PCI_ERR_CAP_ECRC_CHKC | + PCI_ERR_CAP_MHRC); + pci_set_long(dev->wmask + offset + PCI_ERR_CAP, + PCI_ERR_CAP_ECRC_GENE | PCI_ERR_CAP_ECRC_CHKE | + PCI_ERR_CAP_MHRE); + } else { + pci_set_long(dev->config + offset + PCI_ERR_CAP, + PCI_ERR_CAP_ECRC_GENC | PCI_ERR_CAP_ECRC_CHKC); + pci_set_long(dev->wmask + offset + PCI_ERR_CAP, + PCI_ERR_CAP_ECRC_GENE | PCI_ERR_CAP_ECRC_CHKE); + } + + switch (pcie_cap_get_type(dev)) { + case PCI_EXP_TYPE_ROOT_PORT: + /* this case will be set by pcie_aer_root_init() */ + /* fallthrough */ + case PCI_EXP_TYPE_DOWNSTREAM: + case PCI_EXP_TYPE_UPSTREAM: + pci_word_test_and_set_mask(dev->wmask + PCI_BRIDGE_CONTROL, + PCI_BRIDGE_CTL_SERR); + pci_long_test_and_set_mask(dev->w1cmask + PCI_STATUS, + PCI_SEC_STATUS_RCV_SYSTEM_ERROR); + break; + default: + /* nothing */ + break; + } + return 0; +} + +void pcie_aer_exit(PCIDevice *dev) +{ + qemu_free(dev->exp.aer_log.log); +} + +static void pcie_aer_update_uncor_status(PCIDevice *dev) +{ + uint8_t *aer_cap = dev->config + dev->exp.aer_cap; + PCIEAERLog *aer_log = &dev->exp.aer_log; + + uint16_t i; + for (i = 0; i < aer_log->log_num; i++) { + pci_long_test_and_set_mask(aer_cap + PCI_ERR_UNCOR_STATUS, + dev->exp.aer_log.log[i].status); + } +} + +/* + * return value: + * true: error message needs to be sent up + * false: error message is masked + * + * 6.2.6 Error Message Control + * Figure 6-3 + * all pci express devices part + */ +static bool +pcie_aer_msg_alldev(PCIDevice *dev, const PCIEAERMsg *msg) +{ + if (!(pcie_aer_msg_is_uncor(msg) && + (pci_get_word(dev->config + PCI_COMMAND) & PCI_COMMAND_SERR))) { + return false; + } + + /* Signaled System Error + * + * 7.5.1.1 Command register + * Bit 8 SERR# Enable + * + * When Set, this bit enables reporting of Non-fatal and Fatal + * errors detected by the Function to the Root Complex. Note that + * errors are reported if enabled either through this bit or through + * the PCI Express specific bits in the Device Control register (see + * Section 7.8.4). + */ + pci_word_test_and_set_mask(dev->config + PCI_STATUS, + PCI_STATUS_SIG_SYSTEM_ERROR); + + if (!(msg->severity & + pci_get_word(dev->config + dev->exp.exp_cap + PCI_EXP_DEVCTL))) { + return false; + } + + /* send up error message */ + return true; +} + +/* + * return value: + * true: error message is sent up + * false: error message is masked + * + * 6.2.6 Error Message Control + * Figure 6-3 + * virtual pci bridge part + */ +static bool pcie_aer_msg_vbridge(PCIDevice *dev, const PCIEAERMsg *msg) +{ + uint16_t bridge_control = pci_get_word(dev->config + PCI_BRIDGE_CONTROL); + + if (pcie_aer_msg_is_uncor(msg)) { + /* Received System Error */ + pci_word_test_and_set_mask(dev->config + PCI_SEC_STATUS, + PCI_SEC_STATUS_RCV_SYSTEM_ERROR); + } + + if (!(bridge_control & PCI_BRIDGE_CTL_SERR)) { + return false; + } + return true; +} + +void pcie_aer_root_set_vector(PCIDevice *dev, unsigned int vector) +{ + uint8_t *aer_cap = dev->config + dev->exp.aer_cap; + assert(vector < PCI_ERR_ROOT_IRQ_MAX); + pci_long_test_and_clear_mask(aer_cap + PCI_ERR_ROOT_STATUS, + PCI_ERR_ROOT_IRQ); + pci_long_test_and_set_mask(aer_cap + PCI_ERR_ROOT_STATUS, + vector << PCI_ERR_ROOT_IRQ_SHIFT); +} + +static unsigned int pcie_aer_root_get_vector(PCIDevice *dev) +{ + uint8_t *aer_cap = dev->config + dev->exp.aer_cap; + uint32_t root_status = pci_get_long(aer_cap + PCI_ERR_ROOT_STATUS); + return (root_status & PCI_ERR_ROOT_IRQ) >> PCI_ERR_ROOT_IRQ_SHIFT; +} + +/* + * return value: + * true: error message is sent up + * false: error message is masked + * + * 6.2.6 Error Message Control + * Figure 6-3 + * root port part + */ +static bool pcie_aer_msg_root_port(PCIDevice *dev, const PCIEAERMsg *msg) +{ + bool msg_sent; + uint16_t cmd; + uint8_t *aer_cap; + uint32_t root_cmd; + uint32_t root_status; + bool msi_trigger; + + msg_sent = false; + cmd = pci_get_word(dev->config + PCI_COMMAND); + aer_cap = dev->config + dev->exp.aer_cap; + root_cmd = pci_get_long(aer_cap + PCI_ERR_ROOT_COMMAND); + root_status = pci_get_long(aer_cap + PCI_ERR_ROOT_STATUS); + msi_trigger = false; + + if (cmd & PCI_COMMAND_SERR) { + /* System Error. + * + * The way to report System Error is platform specific and + * it isn't implemented in qemu right now. + * So just discard the error for now. + * OS which cares of aer would receive errors via + * native aer mechanims, so this wouldn't matter. + */ + } + + /* Errro Message Received: Root Error Status register */ + switch (msg->severity) { + case PCI_ERR_ROOT_CMD_COR_EN: + if (root_status & PCI_ERR_ROOT_COR_RCV) { + root_status |= PCI_ERR_ROOT_MULTI_COR_RCV; + } else { + if (root_cmd & PCI_ERR_ROOT_CMD_COR_EN) { + msi_trigger = true; + } + pci_set_word(aer_cap + PCI_ERR_ROOT_COR_SRC, msg->source_id); + } + root_status |= PCI_ERR_ROOT_COR_RCV; + break; + case PCI_ERR_ROOT_CMD_NONFATAL_EN: + if (!(root_status & PCI_ERR_ROOT_NONFATAL_RCV) && + root_cmd & PCI_ERR_ROOT_CMD_NONFATAL_EN) { + msi_trigger = true; + } + root_status |= PCI_ERR_ROOT_NONFATAL_RCV; + break; + case PCI_ERR_ROOT_CMD_FATAL_EN: + if (!(root_status & PCI_ERR_ROOT_FATAL_RCV) && + root_cmd & PCI_ERR_ROOT_CMD_FATAL_EN) { + msi_trigger = true; + } + if (!(root_status & PCI_ERR_ROOT_UNCOR_RCV)) { + root_status |= PCI_ERR_ROOT_FIRST_FATAL; + } + root_status |= PCI_ERR_ROOT_FATAL_RCV; + break; + default: + abort(); + break; + } + if (pcie_aer_msg_is_uncor(msg)) { + if (root_status & PCI_ERR_ROOT_UNCOR_RCV) { + root_status |= PCI_ERR_ROOT_MULTI_UNCOR_RCV; + } else { + pci_set_word(aer_cap + PCI_ERR_ROOT_SRC, msg->source_id); + } + root_status |= PCI_ERR_ROOT_UNCOR_RCV; + } + pci_set_long(aer_cap + PCI_ERR_ROOT_STATUS, root_status); + + if (root_cmd & msg->severity) { + /* 6.2.4.1.2 Interrupt Generation */ + if (pci_msi_enabled(dev)) { + if (msi_trigger) { + pci_msi_notify(dev, pcie_aer_root_get_vector(dev)); + } + } else { + qemu_set_irq(dev->irq[dev->exp.aer_intx], 1); + } + msg_sent = true; + } + return msg_sent; +} + +/* + * 6.2.6 Error Message Control Figure 6-3 + * + * Walk up the bus tree from the device, propagate the error message. + */ +static void pcie_aer_msg(PCIDevice *dev, const PCIEAERMsg *msg) +{ + uint8_t type; + + while (dev) { + if (!pci_is_express(dev)) { + /* just ignore it */ + /* TODO: Shouldn't we set PCI_STATUS_SIG_SYSTEM_ERROR? + * Consider e.g. a PCI bridge above a PCI Express device. */ + return; + } + + type = pcie_cap_get_type(dev); + if ((type == PCI_EXP_TYPE_ROOT_PORT || + type == PCI_EXP_TYPE_UPSTREAM || + type == PCI_EXP_TYPE_DOWNSTREAM) && + !pcie_aer_msg_vbridge(dev, msg)) { + return; + } + if (!pcie_aer_msg_alldev(dev, msg)) { + return; + } + if (type == PCI_EXP_TYPE_ROOT_PORT) { + pcie_aer_msg_root_port(dev, msg); + /* Root port can notify system itself, + or send the error message to root complex event collector. */ + /* + * if root port is associated with an event collector, + * return the root complex event collector here. + * For now root complex event collector isn't supported. + */ + return; + } + dev = pci_bridge_get_device(dev->bus); + } +} + +static void pcie_aer_update_log(PCIDevice *dev, const PCIEAERErr *err) +{ + uint8_t *aer_cap = dev->config + dev->exp.aer_cap; + uint8_t first_bit = ffsl(err->status) - 1; + uint32_t errcap = pci_get_long(aer_cap + PCI_ERR_CAP); + int i; + + assert(err->status); + assert(err->status & (err->status - 1)); + + errcap &= ~(PCI_ERR_CAP_FEP_MASK | PCI_ERR_CAP_TLP); + errcap |= PCI_ERR_CAP_FEP(first_bit); + + if (err->flags & PCIE_AER_ERR_HEADER_VALID) { + for (i = 0; i < ARRAY_SIZE(err->header); ++i) { + /* 7.10.8 Header Log Register */ + uint8_t *header_log = + aer_cap + PCI_ERR_HEADER_LOG + i * sizeof err->header[0]; + cpu_to_be32wu((uint32_t*)header_log, err->header[i]); + } + } else { + assert(!(err->flags & PCIE_AER_ERR_TLP_PREFIX_PRESENT)); + memset(aer_cap + PCI_ERR_HEADER_LOG, 0, PCI_ERR_HEADER_LOG_SIZE); + } + + if ((err->flags & PCIE_AER_ERR_TLP_PREFIX_PRESENT) && + (pci_get_long(dev->config + dev->exp.exp_cap + PCI_EXP_DEVCTL2) & + PCI_EXP_DEVCAP2_EETLPP)) { + for (i = 0; i < ARRAY_SIZE(err->prefix); ++i) { + /* 7.10.12 tlp prefix log register */ + uint8_t *prefix_log = + aer_cap + PCI_ERR_TLP_PREFIX_LOG + i * sizeof err->prefix[0]; + cpu_to_be32wu((uint32_t*)prefix_log, err->prefix[i]); + } + errcap |= PCI_ERR_CAP_TLP; + } else { + memset(aer_cap + PCI_ERR_TLP_PREFIX_LOG, 0, + PCI_ERR_TLP_PREFIX_LOG_SIZE); + } + pci_set_long(aer_cap + PCI_ERR_CAP, errcap); +} + +static void pcie_aer_clear_log(PCIDevice *dev) +{ + uint8_t *aer_cap = dev->config + dev->exp.aer_cap; + + pci_long_test_and_clear_mask(aer_cap + PCI_ERR_CAP, + PCI_ERR_CAP_FEP_MASK | PCI_ERR_CAP_TLP); + + memset(aer_cap + PCI_ERR_HEADER_LOG, 0, PCI_ERR_HEADER_LOG_SIZE); + memset(aer_cap + PCI_ERR_TLP_PREFIX_LOG, 0, PCI_ERR_TLP_PREFIX_LOG_SIZE); +} + +static void pcie_aer_clear_error(PCIDevice *dev) +{ + uint8_t *aer_cap = dev->config + dev->exp.aer_cap; + uint32_t errcap = pci_get_long(aer_cap + PCI_ERR_CAP); + PCIEAERLog *aer_log = &dev->exp.aer_log; + PCIEAERErr err; + + if (!(errcap & PCI_ERR_CAP_MHRE) || !aer_log->log_num) { + pcie_aer_clear_log(dev); + return; + } + + /* + * If more errors are queued, set corresponding bits in uncorrectable + * error status. + * We emulate uncorrectable error status register as W1CS. + * So set bit in uncorrectable error status here again for multiple + * error recording support. + * + * 6.2.4.2 Multiple Error Handling(Advanced Error Reporting Capability) + */ + pcie_aer_update_uncor_status(dev); + + aer_log_del_err(aer_log, &err); + pcie_aer_update_log(dev, &err); +} + +static int pcie_aer_record_error(PCIDevice *dev, + const PCIEAERErr *err) +{ + uint8_t *aer_cap = dev->config + dev->exp.aer_cap; + uint32_t errcap = pci_get_long(aer_cap + PCI_ERR_CAP); + int fep = PCI_ERR_CAP_FEP(errcap); + + assert(err->status); + assert(err->status & (err->status - 1)); + + if (errcap & PCI_ERR_CAP_MHRE && + (pci_get_long(aer_cap + PCI_ERR_UNCOR_STATUS) & (1U << fep))) { + /* Not first error. queue error */ + if (aer_log_add_err(&dev->exp.aer_log, err) < 0) { + /* overflow */ + return -1; + } + return 0; + } + + pcie_aer_update_log(dev, err); + return 0; +} + +typedef struct PCIEAERInject { + PCIDevice *dev; + uint8_t *aer_cap; + const PCIEAERErr *err; + uint16_t devctl; + uint16_t devsta; + uint32_t error_status; + bool unsupported_request; + bool log_overflow; + PCIEAERMsg msg; +} PCIEAERInject; + +static bool pcie_aer_inject_cor_error(PCIEAERInject *inj, + uint32_t uncor_status, + bool is_advisory_nonfatal) +{ + PCIDevice *dev = inj->dev; + + inj->devsta |= PCI_EXP_DEVSTA_CED; + if (inj->unsupported_request) { + inj->devsta |= PCI_EXP_DEVSTA_URD; + } + pci_set_word(dev->config + dev->exp.exp_cap + PCI_EXP_DEVSTA, inj->devsta); + + if (inj->aer_cap) { + uint32_t mask; + pci_long_test_and_set_mask(inj->aer_cap + PCI_ERR_COR_STATUS, + inj->error_status); + mask = pci_get_long(inj->aer_cap + PCI_ERR_COR_MASK); + if (mask & inj->error_status) { + return false; + } + if (is_advisory_nonfatal) { + uint32_t uncor_mask = + pci_get_long(inj->aer_cap + PCI_ERR_UNCOR_MASK); + if (!(uncor_mask & uncor_status)) { + inj->log_overflow = !!pcie_aer_record_error(dev, inj->err); + } + pci_long_test_and_set_mask(inj->aer_cap + PCI_ERR_UNCOR_STATUS, + uncor_status); + } + } + + if (inj->unsupported_request && !(inj->devctl & PCI_EXP_DEVCTL_URRE)) { + return false; + } + if (!(inj->devctl & PCI_EXP_DEVCTL_CERE)) { + return false; + } + + inj->msg.severity = PCI_ERR_ROOT_CMD_COR_EN; + return true; +} + +static bool pcie_aer_inject_uncor_error(PCIEAERInject *inj, bool is_fatal) +{ + PCIDevice *dev = inj->dev; + uint16_t cmd; + + if (is_fatal) { + inj->devsta |= PCI_EXP_DEVSTA_FED; + } else { + inj->devsta |= PCI_EXP_DEVSTA_NFED; + } + if (inj->unsupported_request) { + inj->devsta |= PCI_EXP_DEVSTA_URD; + } + pci_set_long(dev->config + dev->exp.exp_cap + PCI_EXP_DEVSTA, inj->devsta); + + if (inj->aer_cap) { + uint32_t mask = pci_get_long(inj->aer_cap + PCI_ERR_UNCOR_MASK); + if (mask & inj->error_status) { + pci_long_test_and_set_mask(inj->aer_cap + PCI_ERR_UNCOR_STATUS, + inj->error_status); + return false; + } + + inj->log_overflow = !!pcie_aer_record_error(dev, inj->err); + pci_long_test_and_set_mask(inj->aer_cap + PCI_ERR_UNCOR_STATUS, + inj->error_status); + } + + cmd = pci_get_word(dev->config + PCI_COMMAND); + if (inj->unsupported_request && + !(inj->devctl & PCI_EXP_DEVCTL_URRE) && !(cmd & PCI_COMMAND_SERR)) { + return false; + } + if (is_fatal) { + if (!((cmd & PCI_COMMAND_SERR) || + (inj->devctl & PCI_EXP_DEVCTL_FERE))) { + return false; + } + inj->msg.severity = PCI_ERR_ROOT_CMD_FATAL_EN; + } else { + if (!((cmd & PCI_COMMAND_SERR) || + (inj->devctl & PCI_EXP_DEVCTL_NFERE))) { + return false; + } + inj->msg.severity = PCI_ERR_ROOT_CMD_NONFATAL_EN; + } + return true; +} + +/* + * non-Function specific error must be recorded in all functions. + * It is the responsibility of the caller of this function. + * It is also caller's responsiblity to determine which function should + * report the rerror. + * + * 6.2.4 Error Logging + * 6.2.5 Sqeunce of Device Error Signaling and Logging Operations + * table 6-2: Flowchard Showing Sequence of Device Error Signaling and Logging + * Operations + */ +int pcie_aer_inject_error(PCIDevice *dev, const PCIEAERErr *err) +{ + uint8_t *aer_cap = NULL; + uint16_t devctl = 0; + uint16_t devsta = 0; + uint32_t error_status = err->status; + PCIEAERInject inj; + + if (!pci_is_express(dev)) { + return -ENOSYS; + } + + if (err->flags & PCIE_AER_ERR_IS_CORRECTABLE) { + error_status &= PCI_ERR_COR_SUPPORTED; + } else { + error_status &= PCI_ERR_UNC_SUPPORTED; + } + + /* invalid status bit. one and only one bit must be set */ + if (!error_status || (error_status & (error_status - 1))) { + return -EINVAL; + } + + if (dev->exp.aer_cap) { + uint8_t *exp_cap = dev->config + dev->exp.exp_cap; + aer_cap = dev->config + dev->exp.aer_cap; + devctl = pci_get_long(exp_cap + PCI_EXP_DEVCTL); + devsta = pci_get_long(exp_cap + PCI_EXP_DEVSTA); + } + + inj.dev = dev; + inj.aer_cap = aer_cap; + inj.err = err; + inj.devctl = devctl; + inj.devsta = devsta; + inj.error_status = error_status; + inj.unsupported_request = !(err->flags & PCIE_AER_ERR_IS_CORRECTABLE) && + err->status == PCI_ERR_UNC_UNSUP; + inj.log_overflow = false; + + if (err->flags & PCIE_AER_ERR_IS_CORRECTABLE) { + if (!pcie_aer_inject_cor_error(&inj, 0, false)) { + return 0; + } + } else { + bool is_fatal = + pcie_aer_uncor_default_severity(error_status) == + PCI_ERR_ROOT_CMD_FATAL_EN; + if (aer_cap) { + is_fatal = + error_status & pci_get_long(aer_cap + PCI_ERR_UNCOR_SEVER); + } + if (!is_fatal && (err->flags & PCIE_AER_ERR_MAYBE_ADVISORY)) { + inj.error_status = PCI_ERR_COR_ADV_NONFATAL; + if (!pcie_aer_inject_cor_error(&inj, error_status, true)) { + return 0; + } + } else { + if (!pcie_aer_inject_uncor_error(&inj, is_fatal)) { + return 0; + } + } + } + + /* send up error message */ + inj.msg.source_id = err->source_id; + pcie_aer_msg(dev, &inj.msg); + + if (inj.log_overflow) { + PCIEAERErr header_log_overflow = { + .status = PCI_ERR_COR_HL_OVERFLOW, + .flags = PCIE_AER_ERR_IS_CORRECTABLE, + }; + int ret = pcie_aer_inject_error(dev, &header_log_overflow); + assert(!ret); + } + return 0; +} + +void pcie_aer_write_config(PCIDevice *dev, + uint32_t addr, uint32_t val, int len) +{ + uint8_t *aer_cap = dev->config + dev->exp.aer_cap; + uint32_t errcap = pci_get_long(aer_cap + PCI_ERR_CAP); + uint32_t first_error = 1U << PCI_ERR_CAP_FEP(errcap); + uint32_t uncorsta = pci_get_long(aer_cap + PCI_ERR_UNCOR_STATUS); + + /* uncorrectable error */ + if (!(uncorsta & first_error)) { + /* the bit that corresponds to the first error is cleared */ + pcie_aer_clear_error(dev); + } else if (errcap & PCI_ERR_CAP_MHRE) { + /* When PCI_ERR_CAP_MHRE is enabled and the first error isn't cleared + * nothing should happen. So we have to revert the modification to + * the register. + */ + pcie_aer_update_uncor_status(dev); + } else { + /* capability & control + * PCI_ERR_CAP_MHRE might be cleared, so clear of header log. + */ + aer_log_clear_all_err(&dev->exp.aer_log); + } +} + +void pcie_aer_root_init(PCIDevice *dev) +{ + uint16_t pos = dev->exp.aer_cap; + + pci_set_long(dev->wmask + pos + PCI_ERR_ROOT_COMMAND, + PCI_ERR_ROOT_CMD_EN_MASK); + pci_set_long(dev->w1cmask + pos + PCI_ERR_ROOT_STATUS, + PCI_ERR_ROOT_STATUS_REPORT_MASK); +} + +void pcie_aer_root_reset(PCIDevice *dev) +{ + uint8_t* aer_cap = dev->config + dev->exp.aer_cap; + + pci_set_long(aer_cap + PCI_ERR_ROOT_COMMAND, 0); + + /* + * Advanced Error Interrupt Message Number in Root Error Status Register + * must be updated by chip dependent code because it's chip dependent + * which number is used. + */ +} + +static bool pcie_aer_root_does_trigger(uint32_t cmd, uint32_t status) +{ + return + ((cmd & PCI_ERR_ROOT_CMD_COR_EN) && (status & PCI_ERR_ROOT_COR_RCV)) || + ((cmd & PCI_ERR_ROOT_CMD_NONFATAL_EN) && + (status & PCI_ERR_ROOT_NONFATAL_RCV)) || + ((cmd & PCI_ERR_ROOT_CMD_FATAL_EN) && + (status & PCI_ERR_ROOT_FATAL_RCV)); +} + +void pcie_aer_root_write_config(PCIDevice *dev, + uint32_t addr, uint32_t val, int len, + uint32_t root_cmd_prev) +{ + uint8_t *aer_cap = dev->config + dev->exp.aer_cap; + + /* root command register */ + uint32_t root_cmd = pci_get_long(aer_cap + PCI_ERR_ROOT_COMMAND); + if (root_cmd & PCI_ERR_ROOT_CMD_EN_MASK) { + /* 6.2.4.1.2 Interrupt Generation */ + + /* 0 -> 1 */ + uint32_t root_cmd_set = (root_cmd_prev ^ root_cmd) & root_cmd; + uint32_t root_status = pci_get_long(aer_cap + PCI_ERR_ROOT_STATUS); + + if (pci_msi_enabled(dev)) { + if (pcie_aer_root_does_trigger(root_cmd_set, root_status)) { + pci_msi_notify(dev, pcie_aer_root_get_vector(dev)); + } + } else { + int int_level = pcie_aer_root_does_trigger(root_cmd, root_status); + qemu_set_irq(dev->irq[dev->exp.aer_intx], int_level); + } + } +} + +static const VMStateDescription vmstate_pcie_aer_err = { + .name = "PCIE_AER_ERROR", + .version_id = 1, + .minimum_version_id = 1, + .minimum_version_id_old = 1, + .fields = (VMStateField[]) { + VMSTATE_UINT32(status, PCIEAERErr), + VMSTATE_UINT16(source_id, PCIEAERErr), + VMSTATE_UINT16(flags, PCIEAERErr), + VMSTATE_UINT32_ARRAY(header, PCIEAERErr, 4), + VMSTATE_UINT32_ARRAY(prefix, PCIEAERErr, 4), + VMSTATE_END_OF_LIST() + } +}; + +#define VMSTATE_PCIE_AER_ERRS(_field, _state, _field_num, _vmsd, _type) { \ + .name = (stringify(_field)), \ + .version_id = 0, \ + .num_offset = vmstate_offset_value(_state, _field_num, uint16_t), \ + .size = sizeof(_type), \ + .vmsd = &(_vmsd), \ + .flags = VMS_POINTER | VMS_VARRAY_UINT16 | VMS_STRUCT, \ + .offset = vmstate_offset_pointer(_state, _field, _type), \ +} + +const VMStateDescription vmstate_pcie_aer_log = { + .name = "PCIE_AER_ERROR_LOG", + .version_id = 1, + .minimum_version_id = 1, + .minimum_version_id_old = 1, + .fields = (VMStateField[]) { + VMSTATE_UINT16(log_num, PCIEAERLog), + VMSTATE_UINT16(log_max, PCIEAERLog), + VMSTATE_PCIE_AER_ERRS(log, PCIEAERLog, log_num, + vmstate_pcie_aer_err, PCIEAERErr), + VMSTATE_END_OF_LIST() + } +}; diff --git a/hw/pcie_aer.h b/hw/pcie_aer.h new file mode 100644 index 0000000000..7539500cd8 --- /dev/null +++ b/hw/pcie_aer.h @@ -0,0 +1,106 @@ +/* + * pcie_aer.h + * + * Copyright (c) 2010 Isaku Yamahata <yamahata at valinux co jp> + * VA Linux Systems Japan K.K. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License along + * with this program; if not, see <http://www.gnu.org/licenses/>. + */ + +#ifndef QEMU_PCIE_AER_H +#define QEMU_PCIE_AER_H + +#include "hw.h" + +/* definitions which PCIExpressDevice uses */ + +/* AER log */ +struct PCIEAERLog { + /* This structure is saved/loaded. + So explicitly size them instead of unsigned int */ + + /* the number of currently recorded log in log member */ + uint16_t log_num; + + /* + * The maximum number of the log. Errors can be logged up to this. + * + * This is configurable property. + * The specified value will be clipped down to PCIE_AER_LOG_MAX_LIMIT + * to avoid unreasonable memory usage. + * I bet that 128 log size would be big enough, otherwise too many errors + * for system to function normaly. But could consecutive errors occur? + */ +#define PCIE_AER_LOG_MAX_DEFAULT 8 +#define PCIE_AER_LOG_MAX_LIMIT 128 +#define PCIE_AER_LOG_MAX_UNSET 0xffff + uint16_t log_max; + + /* Error log. log_max-sized array */ + PCIEAERErr *log; +}; + +/* aer error message: error signaling message has only error sevirity and + source id. See 2.2.8.3 error signaling messages */ +struct PCIEAERMsg { + /* + * PCI_ERR_ROOT_CMD_{COR, NONFATAL, FATAL}_EN + * = PCI_EXP_DEVCTL_{CERE, NFERE, FERE} + */ + uint32_t severity; + + uint16_t source_id; /* bdf */ +}; + +static inline bool +pcie_aer_msg_is_uncor(const PCIEAERMsg *msg) +{ + return msg->severity == PCI_ERR_ROOT_CMD_NONFATAL_EN || + msg->severity == PCI_ERR_ROOT_CMD_FATAL_EN; +} + +/* error */ +struct PCIEAERErr { + uint32_t status; /* error status bits */ + uint16_t source_id; /* bdf */ + +#define PCIE_AER_ERR_IS_CORRECTABLE 0x1 /* correctable/uncorrectable */ +#define PCIE_AER_ERR_MAYBE_ADVISORY 0x2 /* maybe advisory non-fatal */ +#define PCIE_AER_ERR_HEADER_VALID 0x4 /* TLP header is logged */ +#define PCIE_AER_ERR_TLP_PREFIX_PRESENT 0x8 /* TLP Prefix is logged */ + uint16_t flags; + + uint32_t header[4]; /* TLP header */ + uint32_t prefix[4]; /* TLP header prefix */ +}; + +extern const VMStateDescription vmstate_pcie_aer_log; + +int pcie_aer_init(PCIDevice *dev, uint16_t offset); +void pcie_aer_exit(PCIDevice *dev); +void pcie_aer_write_config(PCIDevice *dev, + uint32_t addr, uint32_t val, int len); + +/* aer root port */ +void pcie_aer_root_set_vector(PCIDevice *dev, unsigned int vector); +void pcie_aer_root_init(PCIDevice *dev); +void pcie_aer_root_reset(PCIDevice *dev); +void pcie_aer_root_write_config(PCIDevice *dev, + uint32_t addr, uint32_t val, int len, + uint32_t root_cmd_prev); + +/* error injection */ +int pcie_aer_inject_error(PCIDevice *dev, const PCIEAERErr *err); + +#endif /* QEMU_PCIE_AER_H */ diff --git a/hw/pcie_port.c b/hw/pcie_port.c index 117de6186e..340dcdb3c4 100644 --- a/hw/pcie_port.c +++ b/hw/pcie_port.c @@ -27,6 +27,14 @@ void pcie_port_init_reg(PCIDevice *d) pci_set_word(d->config + PCI_STATUS, 0); pci_set_word(d->config + PCI_SEC_STATUS, 0); + /* Unlike conventional pci bridge, some bits are hardwared to 0. */ + pci_set_word(d->wmask + PCI_BRIDGE_CONTROL, + PCI_BRIDGE_CTL_PARITY | + PCI_BRIDGE_CTL_ISA | + PCI_BRIDGE_CTL_VGA | + PCI_BRIDGE_CTL_SERR | + PCI_BRIDGE_CTL_BUS_RESET); + /* 7.5.3.5 Prefetchable Memory Base Limit * The Prefetchable Memory Base and Prefetchable Memory Limit registers * must indicate that 64-bit addresses are supported, as defined in diff --git a/hw/pcie_regs.h b/hw/pcie_regs.h index 3461a1b6b8..4d123d9fcc 100644 --- a/hw/pcie_regs.h +++ b/hw/pcie_regs.h @@ -94,7 +94,9 @@ #define PCI_ERR_CAP_MHRE 0x00000400 #define PCI_ERR_CAP_TLP 0x00000800 +#define PCI_ERR_HEADER_LOG_SIZE 16 #define PCI_ERR_TLP_PREFIX_LOG 0x38 +#define PCI_ERR_TLP_PREFIX_LOG_SIZE 16 #define PCI_SEC_STATUS_RCV_SYSTEM_ERROR 0x4000 @@ -257,13 +257,6 @@ DeviceState *qdev_device_add(QemuOpts *opts) return qdev; } -static void qdev_reset(void *opaque) -{ - DeviceState *dev = opaque; - if (dev->info->reset) - dev->info->reset(dev); -} - /* Initialize a device. Device properties should be set before calling this function. IRQs and MMIO regions should be connected/mapped after calling this function. @@ -279,7 +272,6 @@ int qdev_init(DeviceState *dev) qdev_free(dev); return rc; } - qemu_register_reset(qdev_reset, dev); if (dev->info->vmsd) { vmstate_register_with_alias_id(dev, -1, dev->info->vmsd, dev, dev->instance_id_alias, @@ -308,6 +300,38 @@ int qdev_unplug(DeviceState *dev) return dev->info->unplug(dev); } +static int qdev_reset_one(DeviceState *dev, void *opaque) +{ + if (dev->info->reset) { + dev->info->reset(dev); + } + + return 0; +} + +BusState *sysbus_get_default(void) +{ + return main_system_bus; +} + +static int qbus_reset_one(BusState *bus, void *opaque) +{ + if (bus->info->reset) { + return bus->info->reset(bus); + } + return 0; +} + +void qdev_reset_all(DeviceState *dev) +{ + qdev_walk_children(dev, qdev_reset_one, qbus_reset_one, NULL); +} + +void qbus_reset_all(BusState *bus) +{ + qbus_walk_children(bus, qdev_reset_one, qbus_reset_one, NULL); +} + /* can be used as ->unplug() callback for the simple cases */ int qdev_simple_unplug_cb(DeviceState *dev) { @@ -351,7 +375,6 @@ void qdev_free(DeviceState *dev) if (dev->opts) qemu_opts_del(dev->opts); } - qemu_unregister_reset(qdev_reset, dev); QLIST_REMOVE(dev, sibling); for (prop = dev->info->props; prop && prop->name; prop++) { if (prop->info->free) { @@ -449,6 +472,52 @@ BusState *qdev_get_child_bus(DeviceState *dev, const char *name) return NULL; } +int qbus_walk_children(BusState *bus, qdev_walkerfn *devfn, + qbus_walkerfn *busfn, void *opaque) +{ + DeviceState *dev; + int err; + + if (busfn) { + err = busfn(bus, opaque); + if (err) { + return err; + } + } + + QLIST_FOREACH(dev, &bus->children, sibling) { + err = qdev_walk_children(dev, devfn, busfn, opaque); + if (err < 0) { + return err; + } + } + + return 0; +} + +int qdev_walk_children(DeviceState *dev, qdev_walkerfn *devfn, + qbus_walkerfn *busfn, void *opaque) +{ + BusState *bus; + int err; + + if (devfn) { + err = devfn(dev, opaque); + if (err) { + return err; + } + } + + QLIST_FOREACH(bus, &dev->child_bus, sibling) { + err = qbus_walk_children(bus, devfn, busfn, opaque); + if (err < 0) { + return err; + } + } + + return 0; +} + static BusState *qbus_find_recursive(BusState *bus, const char *name, const BusInfo *info) { @@ -49,12 +49,14 @@ struct DeviceState { typedef void (*bus_dev_printfn)(Monitor *mon, DeviceState *dev, int indent); typedef char *(*bus_get_dev_path)(DeviceState *dev); +typedef int (qbus_resetfn)(BusState *bus); struct BusInfo { const char *name; size_t size; bus_dev_printfn print_dev; bus_get_dev_path get_dev_path; + qbus_resetfn *reset; Property *props; }; @@ -173,13 +175,29 @@ BusState *qdev_get_parent_bus(DeviceState *dev); /*** BUS API. ***/ +/* Returns 0 to walk children, > 0 to skip walk, < 0 to terminate walk. */ +typedef int (qbus_walkerfn)(BusState *bus, void *opaque); +typedef int (qdev_walkerfn)(DeviceState *dev, void *opaque); + void qbus_create_inplace(BusState *bus, BusInfo *info, DeviceState *parent, const char *name); BusState *qbus_create(BusInfo *info, DeviceState *parent, const char *name); +/* Returns > 0 if either devfn or busfn skip walk somewhere in cursion, + * < 0 if either devfn or busfn terminate walk somewhere in cursion, + * 0 otherwise. */ +int qbus_walk_children(BusState *bus, qdev_walkerfn *devfn, + qbus_walkerfn *busfn, void *opaque); +int qdev_walk_children(DeviceState *dev, qdev_walkerfn *devfn, + qbus_walkerfn *busfn, void *opaque); +void qdev_reset_all(DeviceState *dev); +void qbus_reset_all(BusState *bus); void qbus_free(BusState *bus); #define FROM_QBUS(type, dev) DO_UPCAST(type, qbus, dev) +/* This should go away once we get rid of the NULL bus hack */ +BusState *sysbus_get_default(void); + /*** monitor commands ***/ void do_info_qtree(Monitor *mon); diff --git a/hw/usb-ohci.c b/hw/usb-ohci.c index c60fd8deef..8fb2f83f0f 100644 --- a/hw/usb-ohci.c +++ b/hw/usb-ohci.c @@ -1741,7 +1741,7 @@ static int usb_ohci_initfn_pci(struct PCIDevice *dev) ohci->state.irq = ohci->pci_dev.irq[0]; /* TODO: avoid cast below by using dev */ - pci_register_bar((struct PCIDevice *)ohci, 0, 256, + pci_register_bar(&ohci->pci_dev, 0, 256, PCI_BASE_ADDRESS_SPACE_MEMORY, ohci_mapfunc); return 0; } diff --git a/hw/vhost.c b/hw/vhost.c index 8586f66bac..6082da287e 100644 --- a/hw/vhost.c +++ b/hw/vhost.c @@ -37,6 +37,7 @@ static void vhost_dev_sync_region(struct vhost_dev *dev, /* We first check with non-atomic: much cheaper, * and we expect non-dirty to be the common case. */ if (!*from) { + addr += VHOST_LOG_CHUNK; continue; } /* Data must be read atomically. We don't really diff --git a/hw/virtio.c b/hw/virtio.c index 849a60faaa..07dbf868fd 100644 --- a/hw/virtio.c +++ b/hw/virtio.c @@ -682,7 +682,6 @@ int virtio_load(VirtIODevice *vdev, QEMUFile *f) uint32_t features; uint32_t supported_features = vdev->binding->get_features(vdev->binding_opaque); - uint16_t num_heads; if (vdev->binding->load_config) { ret = vdev->binding->load_config(vdev->binding_opaque, f); @@ -713,17 +712,23 @@ int virtio_load(VirtIODevice *vdev, QEMUFile *f) qemu_get_be16s(f, &vdev->vq[i].last_avail_idx); if (vdev->vq[i].pa) { + uint16_t nheads; virtqueue_init(&vdev->vq[i]); - } - num_heads = vring_avail_idx(&vdev->vq[i]) - vdev->vq[i].last_avail_idx; - /* Check it isn't doing very strange things with descriptor numbers. */ - if (num_heads > vdev->vq[i].vring.num) { - error_report("VQ %d size 0x%x Guest index 0x%x " - "inconsistent with Host index 0x%x: delta 0x%x", - i, vdev->vq[i].vring.num, - vring_avail_idx(&vdev->vq[i]), - vdev->vq[i].last_avail_idx, num_heads); - return -1; + nheads = vring_avail_idx(&vdev->vq[i]) - vdev->vq[i].last_avail_idx; + /* Check it isn't doing very strange things with descriptor numbers. */ + if (nheads > vdev->vq[i].vring.num) { + error_report("VQ %d size 0x%x Guest index 0x%x " + "inconsistent with Host index 0x%x: delta 0x%x\n", + i, vdev->vq[i].vring.num, + vring_avail_idx(&vdev->vq[i]), + vdev->vq[i].last_avail_idx, nheads); + return -1; + } + } else if (vdev->vq[i].last_avail_idx) { + error_report("VQ %d address 0x0 " + "inconsistent with Host index 0x%x\n", + i, vdev->vq[i].last_avail_idx); + return -1; } if (vdev->binding->load_queue) { ret = vdev->binding->load_queue(vdev->binding_opaque, i, f); diff --git a/hw/xio3130_downstream.c b/hw/xio3130_downstream.c index 854eba8931..1a2d258bd2 100644 --- a/hw/xio3130_downstream.c +++ b/hw/xio3130_downstream.c @@ -42,7 +42,7 @@ static void xio3130_downstream_write_config(PCIDevice *d, uint32_t address, pcie_cap_flr_write_config(d, address, val, len); pcie_cap_slot_write_config(d, address, val, len); msi_write_config(d, address, val, len); - /* TODO: AER */ + pcie_aer_write_config(d, address, val, len); } static void xio3130_downstream_reset(DeviceState *qdev) @@ -61,6 +61,7 @@ static int xio3130_downstream_initfn(PCIDevice *d) PCIEPort *p = DO_UPCAST(PCIEPort, br, br); PCIESlot *s = DO_UPCAST(PCIESlot, port, p); int rc; + int tmp; rc = pci_bridge_initfn(d); if (rc < 0) { @@ -76,17 +77,17 @@ static int xio3130_downstream_initfn(PCIDevice *d) XIO3130_MSI_SUPPORTED_FLAGS & PCI_MSI_FLAGS_64BIT, XIO3130_MSI_SUPPORTED_FLAGS & PCI_MSI_FLAGS_MASKBIT); if (rc < 0) { - return rc; + goto err_bridge; } rc = pci_bridge_ssvid_init(d, XIO3130_SSVID_OFFSET, XIO3130_SSVID_SVID, XIO3130_SSVID_SSID); if (rc < 0) { - return rc; + goto err_bridge; } rc = pcie_cap_init(d, XIO3130_EXP_OFFSET, PCI_EXP_TYPE_DOWNSTREAM, p->port); if (rc < 0) { - return rc; + goto err_msi; } pcie_cap_flr_init(d); /* TODO: implement FLR */ pcie_cap_deverr_init(d); @@ -94,19 +95,38 @@ static int xio3130_downstream_initfn(PCIDevice *d) pcie_chassis_create(s->chassis); rc = pcie_chassis_add_slot(s); if (rc < 0) { - return rc; + goto err_pcie_cap; } pcie_cap_ari_init(d); - /* TODO: AER */ + rc = pcie_aer_init(d, XIO3130_AER_OFFSET); + if (rc < 0) { + goto err; + } return 0; + +err: + pcie_chassis_del_slot(s); +err_pcie_cap: + pcie_cap_exit(d); +err_msi: + msi_uninit(d); +err_bridge: + tmp = pci_bridge_exitfn(d); + assert(!tmp); + return rc; } static int xio3130_downstream_exitfn(PCIDevice *d) { - /* TODO: AER */ - msi_uninit(d); + PCIBridge* br = DO_UPCAST(PCIBridge, dev, d); + PCIEPort *p = DO_UPCAST(PCIEPort, br, br); + PCIESlot *s = DO_UPCAST(PCIESlot, port, p); + + pcie_aer_exit(d); + pcie_chassis_del_slot(s); pcie_cap_exit(d); + msi_uninit(d); return pci_bridge_exitfn(d); } @@ -144,7 +164,8 @@ static const VMStateDescription vmstate_xio3130_downstream = { .post_load = pcie_cap_slot_post_load, .fields = (VMStateField[]) { VMSTATE_PCIE_DEVICE(port.br.dev, PCIESlot), - /* TODO: AER */ + VMSTATE_STRUCT(port.br.dev.exp.aer_log, PCIESlot, 0, + vmstate_pcie_aer_log, PCIEAERLog), VMSTATE_END_OF_LIST() } }; @@ -166,7 +187,9 @@ static PCIDeviceInfo xio3130_downstream_info = { DEFINE_PROP_UINT8("port", PCIESlot, port.port, 0), DEFINE_PROP_UINT8("chassis", PCIESlot, chassis, 0), DEFINE_PROP_UINT16("slot", PCIESlot, slot, 0), - /* TODO: AER */ + DEFINE_PROP_UINT16("aer_log_max", PCIESlot, + port.br.dev.exp.aer_log.log_max, + PCIE_AER_LOG_MAX_DEFAULT), DEFINE_PROP_END_OF_LIST(), } }; diff --git a/hw/xio3130_upstream.c b/hw/xio3130_upstream.c index d9d637fdae..387bf6c77e 100644 --- a/hw/xio3130_upstream.c +++ b/hw/xio3130_upstream.c @@ -41,7 +41,7 @@ static void xio3130_upstream_write_config(PCIDevice *d, uint32_t address, pci_bridge_write_config(d, address, val, len); pcie_cap_flr_write_config(d, address, val, len); msi_write_config(d, address, val, len); - /* TODO: AER */ + pcie_aer_write_config(d, address, val, len); } static void xio3130_upstream_reset(DeviceState *qdev) @@ -57,6 +57,7 @@ static int xio3130_upstream_initfn(PCIDevice *d) PCIBridge* br = DO_UPCAST(PCIBridge, dev, d); PCIEPort *p = DO_UPCAST(PCIEPort, br, br); int rc; + int tmp; rc = pci_bridge_initfn(d); if (rc < 0) { @@ -72,33 +73,45 @@ static int xio3130_upstream_initfn(PCIDevice *d) XIO3130_MSI_SUPPORTED_FLAGS & PCI_MSI_FLAGS_64BIT, XIO3130_MSI_SUPPORTED_FLAGS & PCI_MSI_FLAGS_MASKBIT); if (rc < 0) { - return rc; + goto err_bridge; } rc = pci_bridge_ssvid_init(d, XIO3130_SSVID_OFFSET, XIO3130_SSVID_SVID, XIO3130_SSVID_SSID); if (rc < 0) { - return rc; + goto err_bridge; } rc = pcie_cap_init(d, XIO3130_EXP_OFFSET, PCI_EXP_TYPE_UPSTREAM, p->port); if (rc < 0) { - return rc; + goto err_msi; } /* TODO: implement FLR */ pcie_cap_flr_init(d); pcie_cap_deverr_init(d); - /* TODO: AER */ + rc = pcie_aer_init(d, XIO3130_AER_OFFSET); + if (rc < 0) { + goto err; + } return 0; + +err: + pcie_cap_exit(d); +err_msi: + msi_uninit(d); +err_bridge: + tmp = pci_bridge_exitfn(d); + assert(!tmp); + return rc; } static int xio3130_upstream_exitfn(PCIDevice *d) { - /* TODO: AER */ - msi_uninit(d); + pcie_aer_exit(d); pcie_cap_exit(d); + msi_uninit(d); return pci_bridge_exitfn(d); } @@ -131,7 +144,8 @@ static const VMStateDescription vmstate_xio3130_upstream = { .minimum_version_id_old = 1, .fields = (VMStateField[]) { VMSTATE_PCIE_DEVICE(br.dev, PCIEPort), - /* TODO: AER */ + VMSTATE_STRUCT(br.dev.exp.aer_log, PCIEPort, 0, vmstate_pcie_aer_log, + PCIEAERLog), VMSTATE_END_OF_LIST() } }; @@ -151,7 +165,8 @@ static PCIDeviceInfo xio3130_upstream_info = { .qdev.props = (Property[]) { DEFINE_PROP_UINT8("port", PCIEPort, port, 0), - /* TODO: AER */ + DEFINE_PROP_UINT16("aer_log_max", PCIEPort, br.dev.exp.aer_log.log_max, + PCIE_AER_LOG_MAX_DEFAULT), DEFINE_PROP_END_OF_LIST(), } }; @@ -162,7 +162,7 @@ static int kvm_set_user_memory_region(KVMState *s, KVMSlot *slot) mem.slot = slot->slot; mem.guest_phys_addr = slot->start_addr; mem.memory_size = slot->memory_size; - mem.userspace_addr = (unsigned long)qemu_get_ram_ptr(slot->phys_offset); + mem.userspace_addr = (unsigned long)qemu_safe_ram_ptr(slot->phys_offset); mem.flags = slot->flags; if (s->migration_log) { mem.flags |= KVM_MEM_LOG_DIRTY_PAGES; diff --git a/migration.c b/migration.c index 9ee8b179c0..622a9d2d95 100644 --- a/migration.c +++ b/migration.c @@ -32,7 +32,7 @@ #endif /* Migration speed throttling */ -static uint32_t max_throttle = (32 << 20); +static int64_t max_throttle = (32 << 20); static MigrationState *current_migration; @@ -136,7 +136,9 @@ int do_migrate_set_speed(Monitor *mon, const QDict *qdict, QObject **ret_data) FdMigrationState *s; d = qdict_get_int(qdict, "value"); - d = MAX(0, MIN(UINT32_MAX, d)); + if (d < 0) { + d = 0; + } max_throttle = d; s = migrate_to_fms(current_migration); diff --git a/pc-bios/README b/pc-bios/README index 3172cf7896..4b019e08a1 100644 --- a/pc-bios/README +++ b/pc-bios/README @@ -16,7 +16,7 @@ - The PXE roms come from Rom-o-Matic gPXE 0.9.9 with BANNER_TIMEOUT=0 e1000 8086:100E - eepro100 8086:1209, 8086:1229 + eepro100 8086:1209 (also used for 8086:1229 and 8086:2449) ns8390 1050:0940 pcnet32 1022:2000 rtl8139 10ec:8139 diff --git a/pc-bios/gpxe-eepro100-80861229.rom b/pc-bios/gpxe-eepro100-80861229.rom Binary files differdeleted file mode 100644 index 9cf397e3de..0000000000 --- a/pc-bios/gpxe-eepro100-80861229.rom +++ /dev/null diff --git a/qemu-common.h b/qemu-common.h index b3957f1859..de82c2ea13 100644 --- a/qemu-common.h +++ b/qemu-common.h @@ -240,6 +240,9 @@ typedef struct PCIBus PCIBus; typedef struct PCIDevice PCIDevice; typedef struct PCIExpressDevice PCIExpressDevice; typedef struct PCIBridge PCIBridge; +typedef struct PCIEAERMsg PCIEAERMsg; +typedef struct PCIEAERLog PCIEAERLog; +typedef struct PCIEAERErr PCIEAERErr; typedef struct PCIEPort PCIEPort; typedef struct PCIESlot PCIESlot; typedef struct SerialState SerialState; @@ -611,7 +611,7 @@ int qemu_file_rate_limit(QEMUFile *f) return 0; } -size_t qemu_file_get_rate_limit(QEMUFile *f) +int64_t qemu_file_get_rate_limit(QEMUFile *f) { if (f->get_rate_limit) return f->get_rate_limit(f->opaque); @@ -619,7 +619,7 @@ size_t qemu_file_get_rate_limit(QEMUFile *f) return 0; } -size_t qemu_file_set_rate_limit(QEMUFile *f, size_t new_rate) +int64_t qemu_file_set_rate_limit(QEMUFile *f, int64_t new_rate) { /* any failed or completed migration keeps its state to allow probing of * migration data, but has no associated file anymore */ @@ -2980,6 +2980,7 @@ int main(int argc, char **argv, char **envp) exit(1); } + qemu_register_reset((void *)qbus_reset_all, sysbus_get_default()); qemu_system_reset(); if (loadvm) { if (load_vmstate(loadvm) < 0) { |