diff options
32 files changed, 920 insertions, 108 deletions
diff --git a/docs/pci_expander_bridge.txt b/docs/pci_expander_bridge.txt new file mode 100644 index 0000000000..d7913fb4ae --- /dev/null +++ b/docs/pci_expander_bridge.txt @@ -0,0 +1,58 @@ +PCI EXPANDER BRIDGE (PXB) +========================= + +Description +=========== +PXB is a "light-weight" host bridge in the same PCI domain +as the main host bridge whose purpose is to enable +the main host bridge to support multiple PCI root buses. +It is implemented only for i440fx and can be placed only +on bus 0 (pci.0). + +As opposed to PCI-2-PCI bridge's secondary bus, PXB's bus +is a primary bus and can be associated with a NUMA node +(different from the main host bridge) allowing the guest OS +to recognize the proximity of a pass-through device to +other resources as RAM and CPUs. + +Usage +===== +A detailed command line would be: + +[qemu-bin + storage options] +-m 2G +-object memory-backend-ram,size=1024M,policy=bind,host-nodes=0,id=ram-node0 -numa node,nodeid=0,cpus=0,memdev=ram-node0 +-object memory-backend-ram,size=1024M,policy=bind,host-nodes=1,id=ram-node1 -numa node,nodeid=1,cpus=1,memdev=ram-node1 +-device pxb,id=bridge1,bus=pci.0,numa_node=1,bus_nr=4 -netdev user,id=nd-device e1000,bus=bridge1,addr=0x4,netdev=nd +-device pxb,id=bridge2,bus=pci.0,numa_node=0,bus_nr=8,bus=pci.0 -device e1000,bus=bridge2,addr=0x3 +-device pxb,id=bridge3,bus=pci.0,bus_nr=40,bus=pci.0 -drive if=none,id=drive0,file=[img] -device virtio-blk-pci,drive=drive0,scsi=off,bus=bridge3,addr=1 + +Here you have: + - 2 NUMA nodes for the guest, 0 and 1. (both mapped to the same NUMA node in host, but you can and should put it in different host NUMA nodes) + - a pxb host bridge attached to NUMA 1 with an e1000 behind it + - a pxb host bridge attached to NUMA 0 with an e1000 behind it + - a pxb host bridge not attached to any NUMA with a hard drive behind it. + +Limitations +=========== +Please observe that we specified the bus "pci.0" for the second and third pxb. +This is because when no bus is given, another pxb can be selected by QEMU as default bus, +however, PXBs can be placed only under the root bus. + +Implementation +============== +The PXB is composed by: +- HostBridge (TYPE_PXB_HOST) + The host bridge allows to register and query the PXB's rPCI root bus in QEMU. +- PXBDev(TYPE_PXB_DEVICE) + It is a regular PCI Device that resides on the piix host-bridge bus and its bus uses the same PCI domain. + However, the bus behind is exposed through ACPI as a primary PCI bus and starts a new PCI hierarchy. + The interrupts from devices behind the PXB are routed through this device the same as if it were a + PCI-2-PCI bridge. The _PRT follows the i440fx model. +- PCIBridgeDev(TYPE_PCI_BRIDGE_DEV) + Created automatically as part of init sequence. + When adding a device to PXB it is attached to the bridge for two reasons: + - Using the bridge will enable hotplug support + - All the devices behind the bridge will use bridge's IO/MEM windows compacting + the PCI address space. + diff --git a/hw/acpi/aml-build.c b/hw/acpi/aml-build.c index 2bebf23db2..0d4b3247b7 100644 --- a/hw/acpi/aml-build.c +++ b/hw/acpi/aml-build.c @@ -687,6 +687,14 @@ Aml *aml_else(void) return var; } +/* ACPI 1.0b: 16.2.5.3 Type 1 Opcodes Encoding: DefWhile */ +Aml *aml_while(Aml *predicate) +{ + Aml *var = aml_bundle(0xA2 /* WhileOp */, AML_PACKAGE); + aml_append(var, predicate); + return var; +} + /* ACPI 1.0b: 16.2.5.2 Named Objects Encoding: DefMethod */ Aml *aml_method(const char *name, int arg_count) { diff --git a/hw/acpi/core.c b/hw/acpi/core.c index 51913d6932..0f201d8c6d 100644 --- a/hw/acpi/core.c +++ b/hw/acpi/core.c @@ -22,6 +22,7 @@ #include "hw/hw.h" #include "hw/i386/pc.h" #include "hw/acpi/acpi.h" +#include "hw/nvram/fw_cfg.h" #include "qemu/config-file.h" #include "qapi/opts-visitor.h" #include "qapi/dealloc-visitor.h" @@ -592,14 +593,26 @@ static const MemoryRegionOps acpi_pm_cnt_ops = { .endianness = DEVICE_LITTLE_ENDIAN, }; -void acpi_pm1_cnt_init(ACPIREGS *ar, MemoryRegion *parent, uint8_t s4_val) +void acpi_pm1_cnt_init(ACPIREGS *ar, MemoryRegion *parent, + bool disable_s3, bool disable_s4, uint8_t s4_val) { + FWCfgState *fw_cfg; + ar->pm1.cnt.s4_val = s4_val; ar->wakeup.notify = acpi_notify_wakeup; qemu_register_wakeup_notifier(&ar->wakeup); memory_region_init_io(&ar->pm1.cnt.io, memory_region_owner(parent), &acpi_pm_cnt_ops, ar, "acpi-cnt", 2); memory_region_add_subregion(parent, 4, &ar->pm1.cnt.io); + + fw_cfg = fw_cfg_find(); + if (fw_cfg) { + uint8_t suspend[6] = {128, 0, 0, 129, 128, 128}; + suspend[3] = 1 | ((!disable_s3) << 7); + suspend[4] = s4_val | ((!disable_s4) << 7); + + fw_cfg_add_file(fw_cfg, "etc/system-states", g_memdup(suspend, 6), 6); + } } void acpi_pm1_cnt_reset(ACPIREGS *ar) @@ -666,6 +679,13 @@ uint32_t acpi_gpe_ioport_readb(ACPIREGS *ar, uint32_t addr) return val; } +void acpi_send_gpe_event(ACPIREGS *ar, qemu_irq irq, + AcpiGPEStatusBits status) +{ + ar->gpe.sts[0] |= status; + acpi_update_sci(ar, irq); +} + void acpi_update_sci(ACPIREGS *regs, qemu_irq irq) { int sci_level, pm1a_sts; diff --git a/hw/acpi/cpu_hotplug.c b/hw/acpi/cpu_hotplug.c index b8ebfadc30..f5b9972f2d 100644 --- a/hw/acpi/cpu_hotplug.c +++ b/hw/acpi/cpu_hotplug.c @@ -59,8 +59,7 @@ void acpi_cpu_plug_cb(ACPIREGS *ar, qemu_irq irq, return; } - ar->gpe.sts[0] |= ACPI_CPU_HOTPLUG_STATUS; - acpi_update_sci(ar, irq); + acpi_send_gpe_event(ar, irq, ACPI_CPU_HOTPLUG_STATUS); } void acpi_cpu_hotplug_init(MemoryRegion *parent, Object *owner, diff --git a/hw/acpi/ich9.c b/hw/acpi/ich9.c index 84e5bb8d39..799351ea44 100644 --- a/hw/acpi/ich9.c +++ b/hw/acpi/ich9.c @@ -219,7 +219,8 @@ void ich9_pm_init(PCIDevice *lpc_pci, ICH9LPCPMRegs *pm, acpi_pm_tmr_init(&pm->acpi_regs, ich9_pm_update_sci_fn, &pm->io); acpi_pm1_evt_init(&pm->acpi_regs, ich9_pm_update_sci_fn, &pm->io); - acpi_pm1_cnt_init(&pm->acpi_regs, &pm->io, pm->s4_val); + acpi_pm1_cnt_init(&pm->acpi_regs, &pm->io, pm->disable_s3, pm->disable_s4, + pm->s4_val); acpi_gpe_init(&pm->acpi_regs, ICH9_PMIO_GPE0_LEN); memory_region_init_io(&pm->io_gpe, OBJECT(lpc_pci), &ich9_gpe_ops, pm, diff --git a/hw/acpi/memory_hotplug.c b/hw/acpi/memory_hotplug.c index 34cef1e5c3..2ff0d5ce1b 100644 --- a/hw/acpi/memory_hotplug.c +++ b/hw/acpi/memory_hotplug.c @@ -241,8 +241,7 @@ void acpi_memory_plug_cb(ACPIREGS *ar, qemu_irq irq, MemHotplugState *mem_st, mdev->is_inserting = true; /* do ACPI magic */ - ar->gpe.sts[0] |= ACPI_MEMORY_HOTPLUG_STATUS; - acpi_update_sci(ar, irq); + acpi_send_gpe_event(ar, irq, ACPI_MEMORY_HOTPLUG_STATUS); return; } @@ -260,8 +259,7 @@ void acpi_memory_unplug_request_cb(ACPIREGS *ar, qemu_irq irq, mdev->is_removing = true; /* Do ACPI magic */ - ar->gpe.sts[0] |= ACPI_MEMORY_HOTPLUG_STATUS; - acpi_update_sci(ar, irq); + acpi_send_gpe_event(ar, irq, ACPI_MEMORY_HOTPLUG_STATUS); } void acpi_memory_unplug_cb(MemHotplugState *mem_st, diff --git a/hw/acpi/pcihp.c b/hw/acpi/pcihp.c index 1e11af906d..fbbc4dde4f 100644 --- a/hw/acpi/pcihp.c +++ b/hw/acpi/pcihp.c @@ -45,7 +45,6 @@ # define ACPI_PCIHP_DPRINTF(format, ...) do { } while (0) #endif -#define ACPI_PCI_HOTPLUG_STATUS 2 #define ACPI_PCIHP_ADDR 0xae00 #define ACPI_PCIHP_SIZE 0x0014 #define ACPI_PCIHP_LEGACY_SIZE 0x000f @@ -202,8 +201,7 @@ void acpi_pcihp_device_plug_cb(ACPIREGS *ar, qemu_irq irq, AcpiPciHpState *s, s->acpi_pcihp_pci_status[bsel].up |= (1U << slot); - ar->gpe.sts[0] |= ACPI_PCI_HOTPLUG_STATUS; - acpi_update_sci(ar, irq); + acpi_send_gpe_event(ar, irq, ACPI_PCI_HOTPLUG_STATUS); } void acpi_pcihp_device_unplug_cb(ACPIREGS *ar, qemu_irq irq, AcpiPciHpState *s, @@ -220,8 +218,7 @@ void acpi_pcihp_device_unplug_cb(ACPIREGS *ar, qemu_irq irq, AcpiPciHpState *s, s->acpi_pcihp_pci_status[bsel].down |= (1U << slot); - ar->gpe.sts[0] |= ACPI_PCI_HOTPLUG_STATUS; - acpi_update_sci(ar, irq); + acpi_send_gpe_event(ar, irq, ACPI_PCI_HOTPLUG_STATUS); } static uint64_t pci_read(void *opaque, hwaddr addr, unsigned int size) diff --git a/hw/acpi/piix4.c b/hw/acpi/piix4.c index 1b28481bbd..b730ca6ced 100644 --- a/hw/acpi/piix4.c +++ b/hw/acpi/piix4.c @@ -475,7 +475,7 @@ static void piix4_pm_realize(PCIDevice *dev, Error **errp) acpi_pm_tmr_init(&s->ar, pm_tmr_timer, &s->io); acpi_pm1_evt_init(&s->ar, pm_tmr_timer, &s->io); - acpi_pm1_cnt_init(&s->ar, &s->io, s->s4_val); + acpi_pm1_cnt_init(&s->ar, &s->io, s->disable_s3, s->disable_s4, s->s4_val); acpi_gpe_init(&s->ar, GPE_LEN); s->powerdown_notifier.notify = piix4_pm_powerdown_req; @@ -503,8 +503,7 @@ Object *piix4_pm_find(void) I2CBus *piix4_pm_init(PCIBus *bus, int devfn, uint32_t smb_io_base, qemu_irq sci_irq, qemu_irq smi_irq, - int kvm_enabled, FWCfgState *fw_cfg, - DeviceState **piix4_pm) + int kvm_enabled, DeviceState **piix4_pm) { DeviceState *dev; PIIX4PMState *s; @@ -525,14 +524,6 @@ I2CBus *piix4_pm_init(PCIBus *bus, int devfn, uint32_t smb_io_base, qdev_init_nofail(dev); - if (fw_cfg) { - uint8_t suspend[6] = {128, 0, 0, 129, 128, 128}; - suspend[3] = 1 | ((!s->disable_s3) << 7); - suspend[4] = s->s4_val | ((!s->disable_s4) << 7); - - fw_cfg_add_file(fw_cfg, "etc/system-states", g_memdup(suspend, 6), 6); - } - return s->smb.smbus; } diff --git a/hw/i386/acpi-build.c b/hw/i386/acpi-build.c index 15fd4c551e..5593e41f84 100644 --- a/hw/i386/acpi-build.c +++ b/hw/i386/acpi-build.c @@ -240,13 +240,32 @@ static void acpi_get_misc_info(AcpiMiscInfo *info) info->applesmc_io_base = applesmc_port(); } +/* + * Because of the PXB hosts we cannot simply query TYPE_PCI_HOST_BRIDGE. + * On i386 arch we only have two pci hosts, so we can look only for them. + */ +static Object *acpi_get_i386_pci_host(void) +{ + PCIHostState *host; + + host = OBJECT_CHECK(PCIHostState, + object_resolve_path("/machine/i440fx", NULL), + TYPE_PCI_HOST_BRIDGE); + if (!host) { + host = OBJECT_CHECK(PCIHostState, + object_resolve_path("/machine/q35", NULL), + TYPE_PCI_HOST_BRIDGE); + } + + return OBJECT(host); +} + static void acpi_get_pci_info(PcPciInfo *info) { Object *pci_host; - bool ambiguous; - pci_host = object_resolve_path_type("", TYPE_PCI_HOST_BRIDGE, &ambiguous); - g_assert(!ambiguous); + + pci_host = acpi_get_i386_pci_host(); g_assert(pci_host); info->w32.begin = object_property_get_int(pci_host, @@ -599,6 +618,290 @@ static void build_append_pci_bus_devices(Aml *parent_scope, PCIBus *bus, qobject_decref(bsel); } +/* + * initialize_route - Initialize the interrupt routing rule + * through a specific LINK: + * if (lnk_idx == idx) + * route using link 'link_name' + */ +static Aml *initialize_route(Aml *route, const char *link_name, + Aml *lnk_idx, int idx) +{ + Aml *if_ctx = aml_if(aml_equal(lnk_idx, aml_int(idx))); + Aml *pkg = aml_package(4); + + aml_append(pkg, aml_int(0)); + aml_append(pkg, aml_int(0)); + aml_append(pkg, aml_name("%s", link_name)); + aml_append(pkg, aml_int(0)); + aml_append(if_ctx, aml_store(pkg, route)); + + return if_ctx; +} + +/* + * build_prt - Define interrupt rounting rules + * + * Returns an array of 128 routes, one for each device, + * based on device location. + * The main goal is to equaly distribute the interrupts + * over the 4 existing ACPI links (works only for i440fx). + * The hash function is (slot + pin) & 3 -> "LNK[D|A|B|C]". + * + */ +static Aml *build_prt(void) +{ + Aml *method, *while_ctx, *pin, *res; + + method = aml_method("_PRT", 0); + res = aml_local(0); + pin = aml_local(1); + aml_append(method, aml_store(aml_package(128), res)); + aml_append(method, aml_store(aml_int(0), pin)); + + /* while (pin < 128) */ + while_ctx = aml_while(aml_lless(pin, aml_int(128))); + { + Aml *slot = aml_local(2); + Aml *lnk_idx = aml_local(3); + Aml *route = aml_local(4); + + /* slot = pin >> 2 */ + aml_append(while_ctx, + aml_store(aml_shiftright(pin, aml_int(2)), slot)); + /* lnk_idx = (slot + pin) & 3 */ + aml_append(while_ctx, + aml_store(aml_and(aml_add(pin, slot), aml_int(3)), lnk_idx)); + + /* route[2] = "LNK[D|A|B|C]", selection based on pin % 3 */ + aml_append(while_ctx, initialize_route(route, "LNKD", lnk_idx, 0)); + aml_append(while_ctx, initialize_route(route, "LNKA", lnk_idx, 1)); + aml_append(while_ctx, initialize_route(route, "LNKB", lnk_idx, 2)); + aml_append(while_ctx, initialize_route(route, "LNKC", lnk_idx, 3)); + + /* route[0] = 0x[slot]FFFF */ + aml_append(while_ctx, + aml_store(aml_or(aml_shiftleft(slot, aml_int(16)), aml_int(0xFFFF)), + aml_index(route, aml_int(0)))); + /* route[1] = pin & 3 */ + aml_append(while_ctx, + aml_store(aml_and(pin, aml_int(3)), aml_index(route, aml_int(1)))); + /* res[pin] = route */ + aml_append(while_ctx, aml_store(route, aml_index(res, pin))); + /* pin++ */ + aml_append(while_ctx, aml_increment(pin)); + } + aml_append(method, while_ctx); + /* return res*/ + aml_append(method, aml_return(res)); + + return method; +} + +typedef struct CrsRangeEntry { + uint64_t base; + uint64_t limit; +} CrsRangeEntry; + +static void crs_range_insert(GPtrArray *ranges, uint64_t base, uint64_t limit) +{ + CrsRangeEntry *entry; + + entry = g_malloc(sizeof(*entry)); + entry->base = base; + entry->limit = limit; + + g_ptr_array_add(ranges, entry); +} + +static void crs_range_free(gpointer data) +{ + CrsRangeEntry *entry = (CrsRangeEntry *)data; + g_free(entry); +} + +static gint crs_range_compare(gconstpointer a, gconstpointer b) +{ + CrsRangeEntry *entry_a = *(CrsRangeEntry **)a; + CrsRangeEntry *entry_b = *(CrsRangeEntry **)b; + + return (int64_t)entry_a->base - (int64_t)entry_b->base; +} + +/* + * crs_replace_with_free_ranges - given the 'used' ranges within [start - end] + * interval, computes the 'free' ranges from the same interval. + * Example: If the input array is { [a1 - a2],[b1 - b2] }, the function + * will return { [base - a1], [a2 - b1], [b2 - limit] }. + */ +static void crs_replace_with_free_ranges(GPtrArray *ranges, + uint64_t start, uint64_t end) +{ + GPtrArray *free_ranges = g_ptr_array_new_with_free_func(crs_range_free); + uint64_t free_base = start; + int i; + + g_ptr_array_sort(ranges, crs_range_compare); + for (i = 0; i < ranges->len; i++) { + CrsRangeEntry *used = g_ptr_array_index(ranges, i); + + if (free_base < used->base) { + crs_range_insert(free_ranges, free_base, used->base - 1); + } + + free_base = used->limit + 1; + } + + if (free_base < end) { + crs_range_insert(free_ranges, free_base, end); + } + + g_ptr_array_set_size(ranges, 0); + for (i = 0; i < free_ranges->len; i++) { + g_ptr_array_add(ranges, g_ptr_array_index(free_ranges, i)); + } + + g_ptr_array_free(free_ranges, false); +} + +static Aml *build_crs(PCIHostState *host, + GPtrArray *io_ranges, GPtrArray *mem_ranges) +{ + Aml *crs = aml_resource_template(); + uint8_t max_bus = pci_bus_num(host->bus); + uint8_t type; + int devfn; + + for (devfn = 0; devfn < ARRAY_SIZE(host->bus->devices); devfn++) { + int i; + uint64_t range_base, range_limit; + PCIDevice *dev = host->bus->devices[devfn]; + + if (!dev) { + continue; + } + + for (i = 0; i < PCI_NUM_REGIONS; i++) { + PCIIORegion *r = &dev->io_regions[i]; + + range_base = r->addr; + range_limit = r->addr + r->size - 1; + + /* + * Work-around for old bioses + * that do not support multiple root buses + */ + if (!range_base || range_base > range_limit) { + continue; + } + + if (r->type & PCI_BASE_ADDRESS_SPACE_IO) { + aml_append(crs, + aml_word_io(AML_MIN_FIXED, AML_MAX_FIXED, + AML_POS_DECODE, AML_ENTIRE_RANGE, + 0, + range_base, + range_limit, + 0, + range_limit - range_base + 1)); + crs_range_insert(io_ranges, range_base, range_limit); + } else { /* "memory" */ + aml_append(crs, + aml_dword_memory(AML_POS_DECODE, AML_MIN_FIXED, + AML_MAX_FIXED, AML_NON_CACHEABLE, + AML_READ_WRITE, + 0, + range_base, + range_limit, + 0, + range_limit - range_base + 1)); + crs_range_insert(mem_ranges, range_base, range_limit); + } + } + + type = dev->config[PCI_HEADER_TYPE] & ~PCI_HEADER_TYPE_MULTI_FUNCTION; + if (type == PCI_HEADER_TYPE_BRIDGE) { + uint8_t subordinate = dev->config[PCI_SUBORDINATE_BUS]; + if (subordinate > max_bus) { + max_bus = subordinate; + } + + range_base = pci_bridge_get_base(dev, PCI_BASE_ADDRESS_SPACE_IO); + range_limit = pci_bridge_get_limit(dev, PCI_BASE_ADDRESS_SPACE_IO); + + /* + * Work-around for old bioses + * that do not support multiple root buses + */ + if (range_base || range_base > range_limit) { + aml_append(crs, + aml_word_io(AML_MIN_FIXED, AML_MAX_FIXED, + AML_POS_DECODE, AML_ENTIRE_RANGE, + 0, + range_base, + range_limit, + 0, + range_limit - range_base + 1)); + crs_range_insert(io_ranges, range_base, range_limit); + } + + range_base = + pci_bridge_get_base(dev, PCI_BASE_ADDRESS_SPACE_MEMORY); + range_limit = + pci_bridge_get_limit(dev, PCI_BASE_ADDRESS_SPACE_MEMORY); + + /* + * Work-around for old bioses + * that do not support multiple root buses + */ + if (range_base || range_base > range_limit) { + aml_append(crs, + aml_dword_memory(AML_POS_DECODE, AML_MIN_FIXED, + AML_MAX_FIXED, AML_NON_CACHEABLE, + AML_READ_WRITE, + 0, + range_base, + range_limit, + 0, + range_limit - range_base + 1)); + crs_range_insert(mem_ranges, range_base, range_limit); + } + + range_base = + pci_bridge_get_base(dev, PCI_BASE_ADDRESS_MEM_PREFETCH); + range_limit = + pci_bridge_get_limit(dev, PCI_BASE_ADDRESS_MEM_PREFETCH); + + /* + * Work-around for old bioses + * that do not support multiple root buses + */ + if (range_base || range_base > range_limit) { + aml_append(crs, + aml_dword_memory(AML_POS_DECODE, AML_MIN_FIXED, + AML_MAX_FIXED, AML_NON_CACHEABLE, + AML_READ_WRITE, + 0, + range_base, + range_limit, + 0, + range_limit - range_base + 1)); + crs_range_insert(mem_ranges, range_base, range_limit); + } + } + } + + aml_append(crs, + aml_word_bus_number(AML_MIN_FIXED, AML_MAX_FIXED, AML_POS_DECODE, + 0, + pci_bus_num(host->bus), + max_bus, + 0, + max_bus - pci_bus_num(host->bus) + 1)); + + return crs; +} + static void build_ssdt(GArray *table_data, GArray *linker, AcpiCpuInfo *cpu, AcpiPmInfo *pm, AcpiMiscInfo *misc, @@ -608,6 +911,11 @@ build_ssdt(GArray *table_data, GArray *linker, uint32_t nr_mem = machine->ram_slots; unsigned acpi_cpus = guest_info->apic_id_limit; Aml *ssdt, *sb_scope, *scope, *pkg, *dev, *method, *crs, *field, *ifctx; + PCIBus *bus = NULL; + GPtrArray *io_ranges = g_ptr_array_new_with_free_func(crs_range_free); + GPtrArray *mem_ranges = g_ptr_array_new_with_free_func(crs_range_free); + CrsRangeEntry *entry; + int root_bus_limit = 0xFF; int i; ssdt = init_aml_allocator(); @@ -619,31 +927,81 @@ build_ssdt(GArray *table_data, GArray *linker, /* Reserve space for header */ acpi_data_push(ssdt->buf, sizeof(AcpiTableHeader)); + /* Extra PCI root buses are implemented only for i440fx */ + bus = find_i440fx(); + if (bus) { + QLIST_FOREACH(bus, &bus->child, sibling) { + uint8_t bus_num = pci_bus_num(bus); + uint8_t numa_node = pci_bus_numa_node(bus); + + /* look only for expander root buses */ + if (!pci_bus_is_root(bus)) { + continue; + } + + if (bus_num < root_bus_limit) { + root_bus_limit = bus_num - 1; + } + + scope = aml_scope("\\_SB"); + dev = aml_device("PC%.02X", bus_num); + aml_append(dev, + aml_name_decl("_UID", aml_string("PC%.02X", bus_num))); + aml_append(dev, aml_name_decl("_HID", aml_string("PNP0A03"))); + aml_append(dev, aml_name_decl("_BBN", aml_int(bus_num))); + + if (numa_node != NUMA_NODE_UNASSIGNED) { + aml_append(dev, aml_name_decl("_PXM", aml_int(numa_node))); + } + + aml_append(dev, build_prt()); + crs = build_crs(PCI_HOST_BRIDGE(BUS(bus)->parent), + io_ranges, mem_ranges); + aml_append(dev, aml_name_decl("_CRS", crs)); + aml_append(scope, dev); + aml_append(ssdt, scope); + } + } + scope = aml_scope("\\_SB.PCI0"); /* build PCI0._CRS */ crs = aml_resource_template(); aml_append(crs, aml_word_bus_number(AML_MIN_FIXED, AML_MAX_FIXED, AML_POS_DECODE, - 0x0000, 0x0000, 0x00FF, 0x0000, 0x0100)); + 0x0000, 0x0, root_bus_limit, + 0x0000, root_bus_limit + 1)); aml_append(crs, aml_io(AML_DECODE16, 0x0CF8, 0x0CF8, 0x01, 0x08)); aml_append(crs, aml_word_io(AML_MIN_FIXED, AML_MAX_FIXED, AML_POS_DECODE, AML_ENTIRE_RANGE, 0x0000, 0x0000, 0x0CF7, 0x0000, 0x0CF8)); - aml_append(crs, - aml_word_io(AML_MIN_FIXED, AML_MAX_FIXED, - AML_POS_DECODE, AML_ENTIRE_RANGE, - 0x0000, 0x0D00, 0xFFFF, 0x0000, 0xF300)); + + crs_replace_with_free_ranges(io_ranges, 0x0D00, 0xFFFF); + for (i = 0; i < io_ranges->len; i++) { + entry = g_ptr_array_index(io_ranges, i); + aml_append(crs, + aml_word_io(AML_MIN_FIXED, AML_MAX_FIXED, + AML_POS_DECODE, AML_ENTIRE_RANGE, + 0x0000, entry->base, entry->limit, + 0x0000, entry->limit - entry->base + 1)); + } + aml_append(crs, aml_dword_memory(AML_POS_DECODE, AML_MIN_FIXED, AML_MAX_FIXED, AML_CACHEABLE, AML_READ_WRITE, 0, 0x000A0000, 0x000BFFFF, 0, 0x00020000)); - aml_append(crs, - aml_dword_memory(AML_POS_DECODE, AML_MIN_FIXED, AML_MAX_FIXED, - AML_NON_CACHEABLE, AML_READ_WRITE, - 0, pci->w32.begin, pci->w32.end - 1, 0, - pci->w32.end - pci->w32.begin)); + + crs_replace_with_free_ranges(mem_ranges, pci->w32.begin, pci->w32.end - 1); + for (i = 0; i < mem_ranges->len; i++) { + entry = g_ptr_array_index(mem_ranges, i); + aml_append(crs, + aml_dword_memory(AML_POS_DECODE, AML_MIN_FIXED, AML_MAX_FIXED, + AML_NON_CACHEABLE, AML_READ_WRITE, + 0, entry->base, entry->limit, + 0, entry->limit - entry->base + 1)); + } + if (pci->w64.begin) { aml_append(crs, aml_qword_memory(AML_POS_DECODE, AML_MIN_FIXED, AML_MAX_FIXED, @@ -666,6 +1024,9 @@ build_ssdt(GArray *table_data, GArray *linker, aml_append(dev, aml_name_decl("_CRS", crs)); aml_append(scope, dev); + g_ptr_array_free(io_ranges, true); + g_ptr_array_free(mem_ranges, true); + /* reserve PCIHP resources */ if (pm->pcihp_io_len) { dev = aml_device("PHPR"); @@ -958,10 +1319,9 @@ build_ssdt(GArray *table_data, GArray *linker, { Object *pci_host; PCIBus *bus = NULL; - bool ambiguous; - pci_host = object_resolve_path_type("", TYPE_PCI_HOST_BRIDGE, &ambiguous); - if (!ambiguous && pci_host) { + pci_host = acpi_get_i386_pci_host(); + if (pci_host) { bus = PCI_HOST_BRIDGE(pci_host)->bus; } @@ -1273,10 +1633,8 @@ static bool acpi_get_mcfg(AcpiMcfgInfo *mcfg) { Object *pci_host; QObject *o; - bool ambiguous; - pci_host = object_resolve_path_type("", TYPE_PCI_HOST_BRIDGE, &ambiguous); - g_assert(!ambiguous); + pci_host = acpi_get_i386_pci_host(); g_assert(pci_host); o = object_property_get_qobject(pci_host, PCIE_HOST_MCFG_BASE, NULL); diff --git a/hw/i386/pc.c b/hw/i386/pc.c index 973ee6ba8b..2baff4a660 100644 --- a/hw/i386/pc.c +++ b/hw/i386/pc.c @@ -30,6 +30,7 @@ #include "hw/block/fdc.h" #include "hw/ide.h" #include "hw/pci/pci.h" +#include "hw/pci/pci_bus.h" #include "monitor/monitor.h" #include "hw/nvram/fw_cfg.h" #include "hw/timer/hpet.h" @@ -1126,6 +1127,25 @@ void pc_guest_info_machine_done(Notifier *notifier, void *data) PcGuestInfoState *guest_info_state = container_of(notifier, PcGuestInfoState, machine_done); + PCIBus *bus = find_i440fx(); + + if (bus) { + int extra_hosts = 0; + + QLIST_FOREACH(bus, &bus->child, sibling) { + /* look for expander root buses */ + if (pci_bus_is_root(bus)) { + extra_hosts++; + } + } + if (extra_hosts && guest_info_state->info.fw_cfg) { + uint64_t *val = g_malloc(sizeof(*val)); + *val = cpu_to_le64(extra_hosts); + fw_cfg_add_file(guest_info_state->info.fw_cfg, + "etc/extra-pci-roots", val, sizeof(*val)); + } + } + acpi_setup(&guest_info_state->info); } diff --git a/hw/i386/pc_piix.c b/hw/i386/pc_piix.c index 768b09be3b..5253e6d4fa 100644 --- a/hw/i386/pc_piix.c +++ b/hw/i386/pc_piix.c @@ -98,7 +98,6 @@ static void pc_init1(MachineState *machine) MemoryRegion *pci_memory; MemoryRegion *rom_memory; DeviceState *icc_bridge; - FWCfgState *fw_cfg = NULL; PcGuestInfo *guest_info; ram_addr_t lowmem; @@ -179,16 +178,16 @@ static void pc_init1(MachineState *machine) /* allocate ram and load rom/bios */ if (!xen_enabled()) { - fw_cfg = pc_memory_init(machine, system_memory, - below_4g_mem_size, above_4g_mem_size, - rom_memory, &ram_memory, guest_info); + pc_memory_init(machine, system_memory, + below_4g_mem_size, above_4g_mem_size, + rom_memory, &ram_memory, guest_info); } else if (machine->kernel_filename != NULL) { /* For xen HVM direct kernel boot, load linux here */ - fw_cfg = xen_load_linux(machine->kernel_filename, - machine->kernel_cmdline, - machine->initrd_filename, - below_4g_mem_size, - guest_info); + xen_load_linux(machine->kernel_filename, + machine->kernel_cmdline, + machine->initrd_filename, + below_4g_mem_size, + guest_info); } gsi_state = g_malloc0(sizeof(*gsi_state)); @@ -287,7 +286,7 @@ static void pc_init1(MachineState *machine) /* TODO: Populate SPD eeprom data. */ smbus = piix4_pm_init(pci_bus, piix3_devfn + 3, 0xb100, gsi[9], smi_irq, - kvm_enabled(), fw_cfg, &piix4_pm); + kvm_enabled(), &piix4_pm); smbus_eeprom_init(smbus, 8, NULL, 0); object_property_add_link(OBJECT(machine), PC_MACHINE_ACPI_DEVICE_PROP, diff --git a/hw/isa/lpc_ich9.c b/hw/isa/lpc_ich9.c index 144b210081..18718d772e 100644 --- a/hw/isa/lpc_ich9.c +++ b/hw/isa/lpc_ich9.c @@ -491,7 +491,7 @@ static void ich9_lpc_machine_ready(Notifier *n, void *opaque) /* lpt */ pci_conf[0x82] |= 0x04; } - if (memory_region_present(io_as, 0x3f0)) { + if (memory_region_present(io_as, 0x3f2)) { /* floppy */ pci_conf[0x82] |= 0x08; } diff --git a/hw/isa/vt82c686.c b/hw/isa/vt82c686.c index b8197b115d..b2ba870427 100644 --- a/hw/isa/vt82c686.c +++ b/hw/isa/vt82c686.c @@ -356,7 +356,7 @@ static void vt82c686b_pm_realize(PCIDevice *dev, Error **errp) acpi_pm_tmr_init(&s->ar, pm_tmr_timer, &s->io); acpi_pm1_evt_init(&s->ar, pm_tmr_timer, &s->io); - acpi_pm1_cnt_init(&s->ar, &s->io, 2); + acpi_pm1_cnt_init(&s->ar, &s->io, false, false, 2); } I2CBus *vt82c686b_pm_init(PCIBus *bus, int devfn, uint32_t smb_io_base, diff --git a/hw/mem/pc-dimm.c b/hw/mem/pc-dimm.c index 39f0c97c0c..e70633d29f 100644 --- a/hw/mem/pc-dimm.c +++ b/hw/mem/pc-dimm.c @@ -211,7 +211,6 @@ uint64_t pc_dimm_get_free_addr(uint64_t address_space_start, uint64_t address_space_end = address_space_start + address_space_size; g_assert(QEMU_ALIGN_UP(address_space_start, align) == address_space_start); - g_assert(QEMU_ALIGN_UP(address_space_size, align) == address_space_size); if (!address_space_size) { error_setg(errp, "memory hotplug is not enabled, " diff --git a/hw/mips/mips_malta.c b/hw/mips/mips_malta.c index 482250d85d..5140882c00 100644 --- a/hw/mips/mips_malta.c +++ b/hw/mips/mips_malta.c @@ -1161,7 +1161,7 @@ void mips_malta_init(MachineState *machine) pci_piix4_ide_init(pci_bus, hd, piix4_devfn + 1); pci_create_simple(pci_bus, piix4_devfn + 2, "piix4-usb-uhci"); smbus = piix4_pm_init(pci_bus, piix4_devfn + 3, 0x1100, - isa_get_irq(NULL, 9), NULL, 0, NULL, NULL); + isa_get_irq(NULL, 9), NULL, 0, NULL); smbus_eeprom_init(smbus, 8, smbus_eeprom_buf, smbus_eeprom_size); g_free(smbus_eeprom_buf); pit = pit_init(isa_bus, 0x40, 0, NULL); diff --git a/hw/net/virtio-net.c b/hw/net/virtio-net.c index 012ab7fae8..0d3bf0fb89 100644 --- a/hw/net/virtio-net.c +++ b/hw/net/virtio-net.c @@ -511,7 +511,7 @@ static inline uint64_t virtio_net_supported_guest_offloads(VirtIONet *n) return virtio_net_guest_offloads_by_features(vdev->guest_features); } -static void virtio_net_set_features(VirtIODevice *vdev, uint32_t features) +static void virtio_net_set_features(VirtIODevice *vdev, uint64_t features) { VirtIONet *n = VIRTIO_NET(vdev); int i; diff --git a/hw/pci-bridge/Makefile.objs b/hw/pci-bridge/Makefile.objs index 96c596eb31..f2adfe348c 100644 --- a/hw/pci-bridge/Makefile.objs +++ b/hw/pci-bridge/Makefile.objs @@ -1,4 +1,5 @@ common-obj-y += pci_bridge_dev.o +common-obj-y += pci_expander_bridge.o common-obj-$(CONFIG_XIO3130) += xio3130_upstream.o xio3130_downstream.o common-obj-$(CONFIG_IOH3420) += ioh3420.o common-obj-$(CONFIG_I82801B11) += i82801b11.o diff --git a/hw/pci-bridge/pci_expander_bridge.c b/hw/pci-bridge/pci_expander_bridge.c new file mode 100644 index 0000000000..ec2bb458f7 --- /dev/null +++ b/hw/pci-bridge/pci_expander_bridge.c @@ -0,0 +1,231 @@ +/* + * PCI Expander Bridge Device Emulation + * + * Copyright (C) 2015 Red Hat Inc + * + * Authors: + * Marcel Apfelbaum <marcel@redhat.com> + * + * This work is licensed under the terms of the GNU GPL, version 2 or later. + * See the COPYING file in the top-level directory. + */ + +#include "hw/pci/pci.h" +#include "hw/pci/pci_bus.h" +#include "hw/pci/pci_host.h" +#include "hw/pci/pci_bus.h" +#include "hw/i386/pc.h" +#include "qemu/range.h" +#include "qemu/error-report.h" +#include "sysemu/numa.h" + +#define TYPE_PXB_BUS "pxb-bus" +#define PXB_BUS(obj) OBJECT_CHECK(PXBBus, (obj), TYPE_PXB_BUS) + +typedef struct PXBBus { + /*< private >*/ + PCIBus parent_obj; + /*< public >*/ + + char bus_path[8]; +} PXBBus; + +#define TYPE_PXB_DEVICE "pxb" +#define PXB_DEV(obj) OBJECT_CHECK(PXBDev, (obj), TYPE_PXB_DEVICE) + +typedef struct PXBDev { + /*< private >*/ + PCIDevice parent_obj; + /*< public >*/ + + uint8_t bus_nr; + uint16_t numa_node; +} PXBDev; + +#define TYPE_PXB_HOST "pxb-host" + +static int pxb_bus_num(PCIBus *bus) +{ + PXBDev *pxb = PXB_DEV(bus->parent_dev); + + return pxb->bus_nr; +} + +static bool pxb_is_root(PCIBus *bus) +{ + return true; /* by definition */ +} + +static uint16_t pxb_bus_numa_node(PCIBus *bus) +{ + PXBDev *pxb = PXB_DEV(bus->parent_dev); + + return pxb->numa_node; +} + +static void pxb_bus_class_init(ObjectClass *class, void *data) +{ + PCIBusClass *pbc = PCI_BUS_CLASS(class); + + pbc->bus_num = pxb_bus_num; + pbc->is_root = pxb_is_root; + pbc->numa_node = pxb_bus_numa_node; +} + +static const TypeInfo pxb_bus_info = { + .name = TYPE_PXB_BUS, + .parent = TYPE_PCI_BUS, + .instance_size = sizeof(PXBBus), + .class_init = pxb_bus_class_init, +}; + +static const char *pxb_host_root_bus_path(PCIHostState *host_bridge, + PCIBus *rootbus) +{ + PXBBus *bus = PXB_BUS(rootbus); + + snprintf(bus->bus_path, 8, "0000:%02x", pxb_bus_num(rootbus)); + return bus->bus_path; +} + +static void pxb_host_class_init(ObjectClass *class, void *data) +{ + DeviceClass *dc = DEVICE_CLASS(class); + PCIHostBridgeClass *hc = PCI_HOST_BRIDGE_CLASS(class); + + dc->fw_name = "pci"; + hc->root_bus_path = pxb_host_root_bus_path; +} + +static const TypeInfo pxb_host_info = { + .name = TYPE_PXB_HOST, + .parent = TYPE_PCI_HOST_BRIDGE, + .class_init = pxb_host_class_init, +}; + +/* + * Registers the PXB bus as a child of the i440fx root bus. + * + * Returns 0 on successs, -1 if i440fx host was not + * found or the bus number is already in use. + */ +static int pxb_register_bus(PCIDevice *dev, PCIBus *pxb_bus) +{ + PCIBus *bus = dev->bus; + int pxb_bus_num = pci_bus_num(pxb_bus); + + if (bus->parent_dev) { + error_report("PXB devices can be attached only to root bus."); + return -1; + } + + QLIST_FOREACH(bus, &bus->child, sibling) { + if (pci_bus_num(bus) == pxb_bus_num) { + error_report("Bus %d is already in use.", pxb_bus_num); + return -1; + } + } + QLIST_INSERT_HEAD(&dev->bus->child, pxb_bus, sibling); + + return 0; +} + +static int pxb_map_irq_fn(PCIDevice *pci_dev, int pin) +{ + PCIDevice *pxb = pci_dev->bus->parent_dev; + + /* + * The bios does not index the pxb slot number when + * it computes the IRQ because it resides on bus 0 + * and not on the current bus. + * However QEMU routes the irq through bus 0 and adds + * the pxb slot to the IRQ computation of the PXB + * device. + * + * Synchronize between bios and QEMU by canceling + * pxb's effect. + */ + return pin - PCI_SLOT(pxb->devfn); +} + +static int pxb_dev_initfn(PCIDevice *dev) +{ + PXBDev *pxb = PXB_DEV(dev); + DeviceState *ds, *bds; + PCIBus *bus; + const char *dev_name = NULL; + + if (pxb->numa_node != NUMA_NODE_UNASSIGNED && + pxb->numa_node >= nb_numa_nodes) { + error_report("Illegal numa node %d.", pxb->numa_node); + return -EINVAL; + } + + if (dev->qdev.id && *dev->qdev.id) { + dev_name = dev->qdev.id; + } + + ds = qdev_create(NULL, TYPE_PXB_HOST); + bus = pci_bus_new(ds, "pxb-internal", NULL, NULL, 0, TYPE_PXB_BUS); + + bus->parent_dev = dev; + bus->address_space_mem = dev->bus->address_space_mem; + bus->address_space_io = dev->bus->address_space_io; + bus->map_irq = pxb_map_irq_fn; + + bds = qdev_create(BUS(bus), "pci-bridge"); + bds->id = dev_name; + qdev_prop_set_uint8(bds, "chassis_nr", pxb->bus_nr); + + PCI_HOST_BRIDGE(ds)->bus = bus; + + if (pxb_register_bus(dev, bus)) { + return -EINVAL; + } + + qdev_init_nofail(ds); + qdev_init_nofail(bds); + + pci_word_test_and_set_mask(dev->config + PCI_STATUS, + PCI_STATUS_66MHZ | PCI_STATUS_FAST_BACK); + pci_config_set_class(dev->config, PCI_CLASS_BRIDGE_HOST); + + return 0; +} + +static Property pxb_dev_properties[] = { + /* Note: 0 is not a legal a PXB bus number. */ + DEFINE_PROP_UINT8("bus_nr", PXBDev, bus_nr, 0), + DEFINE_PROP_UINT16("numa_node", PXBDev, numa_node, NUMA_NODE_UNASSIGNED), + DEFINE_PROP_END_OF_LIST(), +}; + +static void pxb_dev_class_init(ObjectClass *klass, void *data) +{ + DeviceClass *dc = DEVICE_CLASS(klass); + PCIDeviceClass *k = PCI_DEVICE_CLASS(klass); + + k->init = pxb_dev_initfn; + k->vendor_id = PCI_VENDOR_ID_REDHAT; + k->device_id = PCI_DEVICE_ID_REDHAT_PXB; + k->class_id = PCI_CLASS_BRIDGE_HOST; + + dc->desc = "PCI Expander Bridge"; + dc->props = pxb_dev_properties; +} + +static const TypeInfo pxb_dev_info = { + .name = TYPE_PXB_DEVICE, + .parent = TYPE_PCI_DEVICE, + .instance_size = sizeof(PXBDev), + .class_init = pxb_dev_class_init, +}; + +static void pxb_register_types(void) +{ + type_register_static(&pxb_bus_info); + type_register_static(&pxb_host_info); + type_register_static(&pxb_dev_info); +} + +type_init(pxb_register_types) diff --git a/hw/pci/pci.c b/hw/pci/pci.c index 3423c3a1eb..750f3dacea 100644 --- a/hw/pci/pci.c +++ b/hw/pci/pci.c @@ -88,9 +88,28 @@ static void pci_bus_unrealize(BusState *qbus, Error **errp) vmstate_unregister(NULL, &vmstate_pcibus, bus); } +static bool pcibus_is_root(PCIBus *bus) +{ + return !bus->parent_dev; +} + +static int pcibus_num(PCIBus *bus) +{ + if (pcibus_is_root(bus)) { + return 0; /* pci host bridge */ + } + return bus->parent_dev->config[PCI_SECONDARY_BUS]; +} + +static uint16_t pcibus_numa_node(PCIBus *bus) +{ + return NUMA_NODE_UNASSIGNED; +} + static void pci_bus_class_init(ObjectClass *klass, void *data) { BusClass *k = BUS_CLASS(klass); + PCIBusClass *pbc = PCI_BUS_CLASS(klass); k->print_dev = pcibus_dev_print; k->get_dev_path = pcibus_get_dev_path; @@ -98,12 +117,17 @@ static void pci_bus_class_init(ObjectClass *klass, void *data) k->realize = pci_bus_realize; k->unrealize = pci_bus_unrealize; k->reset = pcibus_reset; + + pbc->is_root = pcibus_is_root; + pbc->bus_num = pcibus_num; + pbc->numa_node = pcibus_numa_node; } static const TypeInfo pci_bus_info = { .name = TYPE_PCI_BUS, .parent = TYPE_BUS, .instance_size = sizeof(PCIBus), + .class_size = sizeof(PCIBusClass), .class_init = pci_bus_class_init, }; @@ -278,7 +302,10 @@ PCIBus *pci_device_root_bus(const PCIDevice *d) { PCIBus *bus = d->bus; - while ((d = bus->parent_dev) != NULL) { + while (!pci_bus_is_root(bus)) { + d = bus->parent_dev; + assert(d != NULL); + bus = d->bus; } @@ -291,7 +318,6 @@ const char *pci_root_bus_path(PCIDevice *dev) PCIHostState *host_bridge = PCI_HOST_BRIDGE(rootbus->qbus.parent); PCIHostBridgeClass *hc = PCI_HOST_BRIDGE_GET_CLASS(host_bridge); - assert(!rootbus->parent_dev); assert(host_bridge->bus == rootbus); if (hc->root_bus_path) { @@ -325,7 +351,7 @@ bool pci_bus_is_express(PCIBus *bus) bool pci_bus_is_root(PCIBus *bus) { - return !bus->parent_dev; + return PCI_BUS_GET_CLASS(bus)->is_root(bus); } void pci_bus_new_inplace(PCIBus *bus, size_t bus_size, DeviceState *parent, @@ -379,9 +405,12 @@ PCIBus *pci_register_bus(DeviceState *parent, const char *name, int pci_bus_num(PCIBus *s) { - if (pci_bus_is_root(s)) - return 0; /* pci host bridge */ - return s->parent_dev->config[PCI_SECONDARY_BUS]; + return PCI_BUS_GET_CLASS(s)->bus_num(s); +} + +int pci_bus_numa_node(PCIBus *bus) +{ + return PCI_BUS_GET_CLASS(bus)->numa_node(bus); } static int get_pci_config_device(QEMUFile *f, void *pv, size_t size) @@ -1576,7 +1605,8 @@ PciInfoList *qmp_query_pci(Error **errp) QLIST_FOREACH(host_bridge, &pci_host_bridges, next) { info = g_malloc0(sizeof(*info)); - info->value = qmp_query_pci_bus(host_bridge->bus, 0); + info->value = qmp_query_pci_bus(host_bridge->bus, + pci_bus_num(host_bridge->bus)); /* XXX: waiting for the qapi to support GSList */ if (!cur_item) { @@ -1681,10 +1711,28 @@ static bool pci_secondary_bus_in_range(PCIDevice *dev, int bus_num) { return !(pci_get_word(dev->config + PCI_BRIDGE_CONTROL) & PCI_BRIDGE_CTL_BUS_RESET) /* Don't walk the bus if it's reset. */ && - dev->config[PCI_SECONDARY_BUS] < bus_num && + dev->config[PCI_SECONDARY_BUS] <= bus_num && bus_num <= dev->config[PCI_SUBORDINATE_BUS]; } +/* Whether a given bus number is in a range of a root bus */ +static bool pci_root_bus_in_range(PCIBus *bus, int bus_num) +{ + int i; + + for (i = 0; i < ARRAY_SIZE(bus->devices); ++i) { + PCIDevice *dev = bus->devices[i]; + + if (dev && PCI_DEVICE_GET_CLASS(dev)->is_bridge) { + if (pci_secondary_bus_in_range(dev, bus_num)) { + return true; + } + } + } + + return false; +} + static PCIBus *pci_find_bus_nr(PCIBus *bus, int bus_num) { PCIBus *sec; @@ -1706,12 +1754,18 @@ static PCIBus *pci_find_bus_nr(PCIBus *bus, int bus_num) /* try child bus */ for (; bus; bus = sec) { QLIST_FOREACH(sec, &bus->child, sibling) { - assert(!pci_bus_is_root(sec)); - if (sec->parent_dev->config[PCI_SECONDARY_BUS] == bus_num) { + if (pci_bus_num(sec) == bus_num) { return sec; } - if (pci_secondary_bus_in_range(sec->parent_dev, bus_num)) { - break; + /* PXB buses assumed to be children of bus 0 */ + if (pci_bus_is_root(sec)) { + if (pci_root_bus_in_range(sec, bus_num)) { + break; + } + } else { + if (pci_secondary_bus_in_range(sec->parent_dev, bus_num)) { + break; + } } } } diff --git a/hw/virtio/vhost.c b/hw/virtio/vhost.c index 54851b7614..01f1e0490f 100644 --- a/hw/virtio/vhost.c +++ b/hw/virtio/vhost.c @@ -22,15 +22,19 @@ #include "hw/virtio/virtio-bus.h" #include "migration/migration.h" +static struct vhost_log *vhost_log; + static void vhost_dev_sync_region(struct vhost_dev *dev, MemoryRegionSection *section, uint64_t mfirst, uint64_t mlast, uint64_t rfirst, uint64_t rlast) { + vhost_log_chunk_t *log = dev->log->log; + uint64_t start = MAX(mfirst, rfirst); uint64_t end = MIN(mlast, rlast); - vhost_log_chunk_t *from = dev->log + start / VHOST_LOG_CHUNK; - vhost_log_chunk_t *to = dev->log + end / VHOST_LOG_CHUNK + 1; + vhost_log_chunk_t *from = log + start / VHOST_LOG_CHUNK; + vhost_log_chunk_t *to = log + end / VHOST_LOG_CHUNK + 1; uint64_t addr = (start / VHOST_LOG_CHUNK) * VHOST_LOG_CHUNK; if (end < start) { @@ -280,22 +284,57 @@ static uint64_t vhost_get_log_size(struct vhost_dev *dev) } return log_size; } +static struct vhost_log *vhost_log_alloc(uint64_t size) +{ + struct vhost_log *log = g_malloc0(sizeof *log + size * sizeof(*(log->log))); + + log->size = size; + log->refcnt = 1; + + return log; +} + +static struct vhost_log *vhost_log_get(uint64_t size) +{ + if (!vhost_log || vhost_log->size != size) { + vhost_log = vhost_log_alloc(size); + } else { + ++vhost_log->refcnt; + } + + return vhost_log; +} + +static void vhost_log_put(struct vhost_dev *dev, bool sync) +{ + struct vhost_log *log = dev->log; + + if (!log) { + return; + } + + --log->refcnt; + if (log->refcnt == 0) { + /* Sync only the range covered by the old log */ + if (dev->log_size && sync) { + vhost_log_sync_range(dev, 0, dev->log_size * VHOST_LOG_CHUNK - 1); + } + if (vhost_log == log) { + vhost_log = NULL; + } + g_free(log); + } +} static inline void vhost_dev_log_resize(struct vhost_dev* dev, uint64_t size) { - vhost_log_chunk_t *log; - uint64_t log_base; + struct vhost_log *log = vhost_log_get(size); + uint64_t log_base = (uintptr_t)log->log; int r; - log = g_malloc0(size * sizeof *log); - log_base = (uintptr_t)log; r = dev->vhost_ops->vhost_call(dev, VHOST_SET_LOG_BASE, &log_base); assert(r >= 0); - /* Sync only the range covered by the old log */ - if (dev->log_size) { - vhost_log_sync_range(dev, 0, dev->log_size * VHOST_LOG_CHUNK - 1); - } - g_free(dev->log); + vhost_log_put(dev, true); dev->log = log; dev->log_size = size; } @@ -601,7 +640,7 @@ static int vhost_migration_log(MemoryListener *listener, int enable) if (r < 0) { return r; } - g_free(dev->log); + vhost_log_put(dev, false); dev->log = NULL; dev->log_size = 0; } else { @@ -1060,10 +1099,10 @@ int vhost_dev_start(struct vhost_dev *hdev, VirtIODevice *vdev) uint64_t log_base; hdev->log_size = vhost_get_log_size(hdev); - hdev->log = hdev->log_size ? - g_malloc0(hdev->log_size * sizeof *hdev->log) : NULL; - log_base = (uintptr_t)hdev->log; - r = hdev->vhost_ops->vhost_call(hdev, VHOST_SET_LOG_BASE, &log_base); + hdev->log = vhost_log_get(hdev->log_size); + log_base = (uintptr_t)hdev->log->log; + r = hdev->vhost_ops->vhost_call(hdev, VHOST_SET_LOG_BASE, + hdev->log_size ? &log_base : NULL); if (r < 0) { r = -errno; goto fail_log; @@ -1072,6 +1111,9 @@ int vhost_dev_start(struct vhost_dev *hdev, VirtIODevice *vdev) return 0; fail_log: + if (hdev->log_size) { + vhost_log_put(hdev, false); + } fail_vq: while (--i >= 0) { vhost_virtqueue_stop(hdev, @@ -1098,10 +1140,9 @@ void vhost_dev_stop(struct vhost_dev *hdev, VirtIODevice *vdev) hdev->vqs + i, hdev->vq_index + i); } - vhost_log_sync_range(hdev, 0, ~0x0ull); + vhost_log_put(hdev, true); hdev->started = false; - g_free(hdev->log); hdev->log = NULL; hdev->log_size = 0; } diff --git a/hw/virtio/virtio-pci.c b/hw/virtio/virtio-pci.c index d1ddc39b6f..6d4f64e282 100644 --- a/hw/virtio/virtio-pci.c +++ b/hw/virtio/virtio-pci.c @@ -632,21 +632,26 @@ static int virtio_pci_vector_unmask(PCIDevice *dev, unsigned vector, if (!virtio_queue_get_num(vdev, index)) { break; } - ret = virtio_pci_vq_vector_unmask(proxy, index, vector, msg); - if (ret < 0) { - goto undo; + if (index < proxy->nvqs_with_notifiers) { + ret = virtio_pci_vq_vector_unmask(proxy, index, vector, msg); + if (ret < 0) { + goto undo; + } + ++unmasked; } vq = virtio_vector_next_queue(vq); - ++unmasked; } return 0; undo: vq = virtio_vector_first_queue(vdev, vector); - while (vq && --unmasked >= 0) { + while (vq && unmasked >= 0) { index = virtio_get_queue_index(vq); - virtio_pci_vq_vector_mask(proxy, index, vector); + if (index < proxy->nvqs_with_notifiers) { + virtio_pci_vq_vector_mask(proxy, index, vector); + --unmasked; + } vq = virtio_vector_next_queue(vq); } return ret; @@ -664,7 +669,9 @@ static void virtio_pci_vector_mask(PCIDevice *dev, unsigned vector) if (!virtio_queue_get_num(vdev, index)) { break; } - virtio_pci_vq_vector_mask(proxy, index, vector); + if (index < proxy->nvqs_with_notifiers) { + virtio_pci_vq_vector_mask(proxy, index, vector); + } vq = virtio_vector_next_queue(vq); } } diff --git a/hw/virtio/virtio.c b/hw/virtio/virtio.c index 596e3d8aaf..8ac6156861 100644 --- a/hw/virtio/virtio.c +++ b/hw/virtio/virtio.c @@ -1003,7 +1003,7 @@ void virtio_save(VirtIODevice *vdev, QEMUFile *f) vmstate_save_state(f, &vmstate_virtio, vdev, NULL); } -int virtio_set_features(VirtIODevice *vdev, uint32_t val) +int virtio_set_features(VirtIODevice *vdev, uint64_t val) { VirtioDeviceClass *k = VIRTIO_DEVICE_GET_CLASS(vdev); bool bad = (val & ~(vdev->host_features)) != 0; diff --git a/include/hw/acpi/acpi.h b/include/hw/acpi/acpi.h index 1f678b4bf2..b20bd55a67 100644 --- a/include/hw/acpi/acpi.h +++ b/include/hw/acpi/acpi.h @@ -91,6 +91,13 @@ /* PM2_CNT */ #define ACPI_BITMASK_ARB_DISABLE 0x0001 +/* These values are part of guest ABI, and can not be changed */ +typedef enum { + ACPI_PCI_HOTPLUG_STATUS = 2, + ACPI_CPU_HOTPLUG_STATUS = 4, + ACPI_MEMORY_HOTPLUG_STATUS = 8, +} AcpiGPEStatusBits; + /* structs */ typedef struct ACPIPMTimer ACPIPMTimer; typedef struct ACPIPM1EVT ACPIPM1EVT; @@ -160,7 +167,8 @@ void acpi_pm1_evt_init(ACPIREGS *ar, acpi_update_sci_fn update_sci, MemoryRegion *parent); /* PM1a_CNT: piix and ich9 don't implement PM1b CNT. */ -void acpi_pm1_cnt_init(ACPIREGS *ar, MemoryRegion *parent, uint8_t s4_val); +void acpi_pm1_cnt_init(ACPIREGS *ar, MemoryRegion *parent, + bool disable_s3, bool disable_s4, uint8_t s4_val); void acpi_pm1_cnt_update(ACPIREGS *ar, bool sci_enable, bool sci_disable); void acpi_pm1_cnt_reset(ACPIREGS *ar); @@ -172,6 +180,9 @@ void acpi_gpe_reset(ACPIREGS *ar); void acpi_gpe_ioport_writeb(ACPIREGS *ar, uint32_t addr, uint32_t val); uint32_t acpi_gpe_ioport_readb(ACPIREGS *ar, uint32_t addr); +void acpi_send_gpe_event(ACPIREGS *ar, qemu_irq irq, + AcpiGPEStatusBits status); + void acpi_update_sci(ACPIREGS *acpi_regs, qemu_irq irq); /* acpi.c */ diff --git a/include/hw/acpi/memory_hotplug.h b/include/hw/acpi/memory_hotplug.h index 986223b16c..1342adb08f 100644 --- a/include/hw/acpi/memory_hotplug.h +++ b/include/hw/acpi/memory_hotplug.h @@ -5,8 +5,6 @@ #include "hw/acpi/acpi.h" #include "migration/vmstate.h" -#define ACPI_MEMORY_HOTPLUG_STATUS 8 - /** * MemStatus: * @is_removing: the memory device in slot has been requested to be ejected. diff --git a/include/hw/acpi/pc-hotplug.h b/include/hw/acpi/pc-hotplug.h index 0513c1c158..77b1569007 100644 --- a/include/hw/acpi/pc-hotplug.h +++ b/include/hw/acpi/pc-hotplug.h @@ -16,7 +16,6 @@ * ONLY DEFINEs are permited in this file since it's shared * between C and ASL code. */ -#define ACPI_CPU_HOTPLUG_STATUS 4 /* Limit for CPU arch IDs for CPU hotplug. All hotpluggable CPUs should * have CPUClass.get_arch_id() < ACPI_CPU_HOTPLUG_ID_LIMIT. diff --git a/include/hw/i386/pc.h b/include/hw/i386/pc.h index 261155fe1a..bec6de1ddf 100644 --- a/include/hw/i386/pc.h +++ b/include/hw/i386/pc.h @@ -218,8 +218,7 @@ void ioapic_init_gsi(GSIState *gsi_state, const char *parent_name); I2CBus *piix4_pm_init(PCIBus *bus, int devfn, uint32_t smb_io_base, qemu_irq sci_irq, qemu_irq smi_irq, - int kvm_enabled, FWCfgState *fw_cfg, - DeviceState **piix4_pm); + int kvm_enabled, DeviceState **piix4_pm); void piix4_smbus_register_device(SMBusDevice *dev, uint8_t addr); /* hpet.c */ diff --git a/include/hw/pci/pci.h b/include/hw/pci/pci.h index 6c2af0d46e..d44bc84d1e 100644 --- a/include/hw/pci/pci.h +++ b/include/hw/pci/pci.h @@ -91,6 +91,7 @@ #define PCI_DEVICE_ID_REDHAT_ROCKER 0x0006 #define PCI_DEVICE_ID_REDHAT_SDHCI 0x0007 #define PCI_DEVICE_ID_REDHAT_PCIE_HOST 0x0008 +#define PCI_DEVICE_ID_REDHAT_PXB 0x0009 #define PCI_DEVICE_ID_REDHAT_QXL 0x0100 #define FMT_PCIBUS PRIx64 @@ -346,6 +347,8 @@ typedef PCIINTxRoute (*pci_route_irq_fn)(void *opaque, int pin); #define TYPE_PCI_BUS "PCI" #define PCI_BUS(obj) OBJECT_CHECK(PCIBus, (obj), TYPE_PCI_BUS) +#define PCI_BUS_CLASS(klass) OBJECT_CLASS_CHECK(PCIBusClass, (klass), TYPE_PCI_BUS) +#define PCI_BUS_GET_CLASS(obj) OBJECT_GET_CLASS(PCIBusClass, (obj), TYPE_PCI_BUS) #define TYPE_PCIE_BUS "PCIE" bool pci_bus_is_express(PCIBus *bus); @@ -385,6 +388,7 @@ PCIDevice *pci_nic_init_nofail(NICInfo *nd, PCIBus *rootbus, PCIDevice *pci_vga_init(PCIBus *bus); int pci_bus_num(PCIBus *s); +int pci_bus_numa_node(PCIBus *bus); void pci_for_each_device(PCIBus *bus, int bus_num, void (*fn)(PCIBus *bus, PCIDevice *d, void *opaque), void *opaque); diff --git a/include/hw/pci/pci_bus.h b/include/hw/pci/pci_bus.h index fabaeee86b..403fec6e58 100644 --- a/include/hw/pci/pci_bus.h +++ b/include/hw/pci/pci_bus.h @@ -8,6 +8,16 @@ * use accessor functions in pci.h, pci_bridge.h */ +typedef struct PCIBusClass { + /*< private >*/ + BusClass parent_class; + /*< public >*/ + + bool (*is_root)(PCIBus *bus); + int (*bus_num)(PCIBus *bus); + uint16_t (*numa_node)(PCIBus *bus); +} PCIBusClass; + struct PCIBus { BusState qbus; PCIIOMMUFunc iommu_fn; diff --git a/include/hw/virtio/vhost.h b/include/hw/virtio/vhost.h index 8f04888e72..816a2e8db2 100644 --- a/include/hw/virtio/vhost.h +++ b/include/hw/virtio/vhost.h @@ -28,6 +28,12 @@ typedef unsigned long vhost_log_chunk_t; #define VHOST_LOG_CHUNK (VHOST_LOG_PAGE * VHOST_LOG_BITS) #define VHOST_INVALID_FEATURE_BIT (0xff) +struct vhost_log { + unsigned long long size; + int refcnt; + vhost_log_chunk_t log[0]; +}; + struct vhost_memory; struct vhost_dev { MemoryListener memory_listener; @@ -43,7 +49,6 @@ struct vhost_dev { unsigned long long backend_features; bool started; bool log_enabled; - vhost_log_chunk_t *log; unsigned long long log_size; Error *migration_blocker; bool force; @@ -52,6 +57,7 @@ struct vhost_dev { hwaddr mem_changed_end_addr; const VhostOps *vhost_ops; void *opaque; + struct vhost_log *log; }; int vhost_dev_init(struct vhost_dev *hdev, void *opaque, diff --git a/include/hw/virtio/virtio.h b/include/hw/virtio/virtio.h index 7222a904dc..2bb7c1ad1a 100644 --- a/include/hw/virtio/virtio.h +++ b/include/hw/virtio/virtio.h @@ -98,7 +98,7 @@ typedef struct VirtioDeviceClass { DeviceUnrealize unrealize; uint64_t (*get_features)(VirtIODevice *vdev, uint64_t requested_features); uint64_t (*bad_features)(VirtIODevice *vdev); - void (*set_features)(VirtIODevice *vdev, uint32_t val); + void (*set_features)(VirtIODevice *vdev, uint64_t val); void (*get_config)(VirtIODevice *vdev, uint8_t *config); void (*set_config)(VirtIODevice *vdev, const uint8_t *config); void (*reset)(VirtIODevice *vdev); @@ -184,7 +184,7 @@ void virtio_queue_set_vector(VirtIODevice *vdev, int n, uint16_t vector); void virtio_set_status(VirtIODevice *vdev, uint8_t val); void virtio_reset(void *opaque); void virtio_update_irq(VirtIODevice *vdev); -int virtio_set_features(VirtIODevice *vdev, uint32_t val); +int virtio_set_features(VirtIODevice *vdev, uint64_t val); /* Base devices. */ typedef struct VirtIOBlkConf VirtIOBlkConf; @@ -230,19 +230,19 @@ VirtQueue *virtio_vector_next_queue(VirtQueue *vq); static inline void virtio_add_feature(uint64_t *features, unsigned int fbit) { assert(fbit < 64); - *features |= (1 << fbit); + *features |= (1ULL << fbit); } static inline void virtio_clear_feature(uint64_t *features, unsigned int fbit) { assert(fbit < 64); - *features &= ~(1 << fbit); + *features &= ~(1ULL << fbit); } static inline bool __virtio_has_feature(uint64_t features, unsigned int fbit) { assert(fbit < 64); - return !!(features & (1 << fbit)); + return !!(features & (1ULL << fbit)); } static inline bool virtio_has_feature(VirtIODevice *vdev, unsigned int fbit) diff --git a/include/sysemu/sysemu.h b/include/sysemu/sysemu.h index e10c2c5217..853d90a317 100644 --- a/include/sysemu/sysemu.h +++ b/include/sysemu/sysemu.h @@ -137,6 +137,7 @@ extern const char *mem_path; extern int mem_prealloc; #define MAX_NODES 128 +#define NUMA_NODE_UNASSIGNED MAX_NODES /* The following shall be true for all CPUs: * cpu->cpu_index < max_cpus <= MAX_CPUMASK_BITS diff --git a/include/sysemu/tpm.h b/include/sysemu/tpm.h index c1438907de..c8afa179e5 100644 --- a/include/sysemu/tpm.h +++ b/include/sysemu/tpm.h @@ -32,11 +32,13 @@ TPMVersion tpm_tis_get_tpm_version(Object *obj); static inline TPMVersion tpm_get_version(void) { +#ifdef CONFIG_TPM Object *obj = object_resolve_path_type("", TYPE_TPM_TIS, NULL); if (obj) { return tpm_tis_get_tpm_version(obj); } +#endif return TPM_VERSION_UNSPEC; } |