diff options
Diffstat (limited to 'hw')
49 files changed, 1560 insertions, 500 deletions
diff --git a/hw/acpi/Makefile.objs b/hw/acpi/Makefile.objs index 29d46d8786..7d3230c2a5 100644 --- a/hw/acpi/Makefile.objs +++ b/hw/acpi/Makefile.objs @@ -1,4 +1,5 @@ -common-obj-$(CONFIG_ACPI_X86) += core.o piix4.o ich9.o pcihp.o +common-obj-$(CONFIG_ACPI_X86) += core.o piix4.o pcihp.o +common-obj-$(CONFIG_ACPI_X86_ICH) += ich9.o tco.o common-obj-$(CONFIG_ACPI_CPU_HOTPLUG) += cpu_hotplug.o common-obj-$(CONFIG_ACPI_MEMORY_HOTPLUG) += memory_hotplug.o common-obj-$(CONFIG_ACPI) += acpi_interface.o diff --git a/hw/acpi/ich9.c b/hw/acpi/ich9.c index f4dc7a84be..5fb7a879d6 100644 --- a/hw/acpi/ich9.c +++ b/hw/acpi/ich9.c @@ -30,6 +30,7 @@ #include "qemu/timer.h" #include "sysemu/sysemu.h" #include "hw/acpi/acpi.h" +#include "hw/acpi/tco.h" #include "sysemu/kvm.h" #include "exec/address-spaces.h" @@ -92,8 +93,16 @@ static void ich9_smi_writel(void *opaque, hwaddr addr, uint64_t val, unsigned width) { ICH9LPCPMRegs *pm = opaque; + TCOIORegs *tr = &pm->tco_regs; + uint64_t tco_en; + switch (addr) { case 0: + tco_en = pm->smi_en & ICH9_PMIO_SMI_EN_TCO_EN; + /* once TCO_LOCK bit is set, TCO_EN bit cannot be overwritten */ + if (tr->tco.cnt1 & TCO_LOCK) { + val = (val & ~ICH9_PMIO_SMI_EN_TCO_EN) | tco_en; + } pm->smi_en &= ~pm->smi_en_wmask; pm->smi_en |= (val & pm->smi_en_wmask); break; @@ -159,6 +168,25 @@ static const VMStateDescription vmstate_memhp_state = { } }; +static bool vmstate_test_use_tco(void *opaque) +{ + ICH9LPCPMRegs *s = opaque; + return s->enable_tco; +} + +static const VMStateDescription vmstate_tco_io_state = { + .name = "ich9_pm/tco", + .version_id = 1, + .minimum_version_id = 1, + .minimum_version_id_old = 1, + .needed = vmstate_test_use_tco, + .fields = (VMStateField[]) { + VMSTATE_STRUCT(tco_regs, ICH9LPCPMRegs, 1, vmstate_tco_io_sts, + TCOIORegs), + VMSTATE_END_OF_LIST() + } +}; + const VMStateDescription vmstate_ich9_pm = { .name = "ich9_pm", .version_id = 1, @@ -179,6 +207,10 @@ const VMStateDescription vmstate_ich9_pm = { .subsections = (const VMStateDescription*[]) { &vmstate_memhp_state, NULL + }, + .subsections = (const VMStateDescription*[]) { + &vmstate_tco_io_state, + NULL } }; @@ -209,7 +241,8 @@ static void pm_powerdown_req(Notifier *n, void *opaque) acpi_pm1_evt_power_down(&pm->acpi_regs); } -void ich9_pm_init(PCIDevice *lpc_pci, ICH9LPCPMRegs *pm, bool smm_enabled, +void ich9_pm_init(PCIDevice *lpc_pci, ICH9LPCPMRegs *pm, + bool smm_enabled, bool enable_tco, qemu_irq sci_irq) { memory_region_init(&pm->io, OBJECT(lpc_pci), "ich9-pm", ICH9_PMIO_SIZE); @@ -232,6 +265,12 @@ void ich9_pm_init(PCIDevice *lpc_pci, ICH9LPCPMRegs *pm, bool smm_enabled, memory_region_add_subregion(&pm->io, ICH9_PMIO_SMI_EN, &pm->io_smi); pm->smm_enabled = smm_enabled; + + pm->enable_tco = enable_tco; + if (pm->enable_tco) { + acpi_pm_tco_init(&pm->tco_regs, &pm->io); + } + pm->irq = sci_irq; qemu_register_reset(pm_reset, pm); pm->powerdown_notifier.notify = pm_powerdown_req; @@ -352,6 +391,18 @@ out: error_propagate(errp, local_err); } +static bool ich9_pm_get_enable_tco(Object *obj, Error **errp) +{ + ICH9LPCState *s = ICH9_LPC_DEVICE(obj); + return s->pm.enable_tco; +} + +static void ich9_pm_set_enable_tco(Object *obj, bool value, Error **errp) +{ + ICH9LPCState *s = ICH9_LPC_DEVICE(obj); + s->pm.enable_tco = value; +} + void ich9_pm_add_properties(Object *obj, ICH9LPCPMRegs *pm, Error **errp) { static const uint32_t gpe0_len = ICH9_PMIO_GPE0_LEN; @@ -383,6 +434,10 @@ void ich9_pm_add_properties(Object *obj, ICH9LPCPMRegs *pm, Error **errp) ich9_pm_get_s4_val, ich9_pm_set_s4_val, NULL, pm, NULL); + object_property_add_bool(obj, ACPI_PM_PROP_TCO_ENABLED, + ich9_pm_get_enable_tco, + ich9_pm_set_enable_tco, + NULL); } void ich9_pm_device_plug_cb(ICH9LPCPMRegs *pm, DeviceState *dev, Error **errp) diff --git a/hw/acpi/tco.c b/hw/acpi/tco.c new file mode 100644 index 0000000000..7a026c255b --- /dev/null +++ b/hw/acpi/tco.c @@ -0,0 +1,264 @@ +/* + * QEMU ICH9 TCO emulation + * + * Copyright (c) 2015 Paulo Alcantara <pcacjr@zytor.com> + * + * This work is licensed under the terms of the GNU GPL, version 2 or later. + * See the COPYING file in the top-level directory. + */ +#include "qemu-common.h" +#include "sysemu/watchdog.h" +#include "hw/i386/ich9.h" + +#include "hw/acpi/tco.h" + +//#define DEBUG + +#ifdef DEBUG +#define TCO_DEBUG(fmt, ...) \ + do { \ + fprintf(stderr, "%s "fmt, __func__, ## __VA_ARGS__); \ + } while (0) +#else +#define TCO_DEBUG(fmt, ...) do { } while (0) +#endif + +enum { + TCO_RLD_DEFAULT = 0x0000, + TCO_DAT_IN_DEFAULT = 0x00, + TCO_DAT_OUT_DEFAULT = 0x00, + TCO1_STS_DEFAULT = 0x0000, + TCO2_STS_DEFAULT = 0x0000, + TCO1_CNT_DEFAULT = 0x0000, + TCO2_CNT_DEFAULT = 0x0008, + TCO_MESSAGE1_DEFAULT = 0x00, + TCO_MESSAGE2_DEFAULT = 0x00, + TCO_WDCNT_DEFAULT = 0x00, + TCO_TMR_DEFAULT = 0x0004, + SW_IRQ_GEN_DEFAULT = 0x03, +}; + +static inline void tco_timer_reload(TCOIORegs *tr) +{ + tr->expire_time = qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL) + + ((int64_t)(tr->tco.tmr & TCO_TMR_MASK) * TCO_TICK_NSEC); + timer_mod(tr->tco_timer, tr->expire_time); +} + +static inline void tco_timer_stop(TCOIORegs *tr) +{ + tr->expire_time = -1; +} + +static void tco_timer_expired(void *opaque) +{ + TCOIORegs *tr = opaque; + ICH9LPCPMRegs *pm = container_of(tr, ICH9LPCPMRegs, tco_regs); + ICH9LPCState *lpc = container_of(pm, ICH9LPCState, pm); + uint32_t gcs = pci_get_long(lpc->chip_config + ICH9_CC_GCS); + + tr->tco.rld = 0; + tr->tco.sts1 |= TCO_TIMEOUT; + if (++tr->timeouts_no == 2) { + tr->tco.sts2 |= TCO_SECOND_TO_STS; + tr->tco.sts2 |= TCO_BOOT_STS; + tr->timeouts_no = 0; + + if (!lpc->pin_strap.spkr_hi && !(gcs & ICH9_CC_GCS_NO_REBOOT)) { + watchdog_perform_action(); + tco_timer_stop(tr); + return; + } + } + + if (pm->smi_en & ICH9_PMIO_SMI_EN_TCO_EN) { + ich9_generate_smi(); + } else { + ich9_generate_nmi(); + } + tr->tco.rld = tr->tco.tmr; + tco_timer_reload(tr); +} + +/* NOTE: values of 0 or 1 will be ignored by ICH */ +static inline int can_start_tco_timer(TCOIORegs *tr) +{ + return !(tr->tco.cnt1 & TCO_TMR_HLT) && tr->tco.tmr > 1; +} + +static uint32_t tco_ioport_readw(TCOIORegs *tr, uint32_t addr) +{ + uint16_t rld; + + switch (addr) { + case TCO_RLD: + if (tr->expire_time != -1) { + int64_t now = qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL); + int64_t elapsed = (tr->expire_time - now) / TCO_TICK_NSEC; + rld = (uint16_t)elapsed | (tr->tco.rld & ~TCO_RLD_MASK); + } else { + rld = tr->tco.rld; + } + return rld; + case TCO_DAT_IN: + return tr->tco.din; + case TCO_DAT_OUT: + return tr->tco.dout; + case TCO1_STS: + return tr->tco.sts1; + case TCO2_STS: + return tr->tco.sts2; + case TCO1_CNT: + return tr->tco.cnt1; + case TCO2_CNT: + return tr->tco.cnt2; + case TCO_MESSAGE1: + return tr->tco.msg1; + case TCO_MESSAGE2: + return tr->tco.msg2; + case TCO_WDCNT: + return tr->tco.wdcnt; + case TCO_TMR: + return tr->tco.tmr; + case SW_IRQ_GEN: + return tr->sw_irq_gen; + } + return 0; +} + +static void tco_ioport_writew(TCOIORegs *tr, uint32_t addr, uint32_t val) +{ + switch (addr) { + case TCO_RLD: + tr->timeouts_no = 0; + if (can_start_tco_timer(tr)) { + tr->tco.rld = tr->tco.tmr; + tco_timer_reload(tr); + } else { + tr->tco.rld = val; + } + break; + case TCO_DAT_IN: + tr->tco.din = val; + tr->tco.sts1 |= SW_TCO_SMI; + ich9_generate_smi(); + break; + case TCO_DAT_OUT: + tr->tco.dout = val; + tr->tco.sts1 |= TCO_INT_STS; + /* TODO: cause an interrupt, as selected by the TCO_INT_SEL bits */ + break; + case TCO1_STS: + tr->tco.sts1 = val & TCO1_STS_MASK; + break; + case TCO2_STS: + tr->tco.sts2 = val & TCO2_STS_MASK; + break; + case TCO1_CNT: + val &= TCO1_CNT_MASK; + /* + * once TCO_LOCK bit is set, it can not be cleared by software. a reset + * is required to change this bit from 1 to 0 -- it defaults to 0. + */ + tr->tco.cnt1 = val | (tr->tco.cnt1 & TCO_LOCK); + if (can_start_tco_timer(tr)) { + tr->tco.rld = tr->tco.tmr; + tco_timer_reload(tr); + } else { + tco_timer_stop(tr); + } + break; + case TCO2_CNT: + tr->tco.cnt2 = val; + break; + case TCO_MESSAGE1: + tr->tco.msg1 = val; + break; + case TCO_MESSAGE2: + tr->tco.msg2 = val; + break; + case TCO_WDCNT: + tr->tco.wdcnt = val; + break; + case TCO_TMR: + tr->tco.tmr = val; + break; + case SW_IRQ_GEN: + tr->sw_irq_gen = val; + break; + } +} + +static uint64_t tco_io_readw(void *opaque, hwaddr addr, unsigned width) +{ + TCOIORegs *tr = opaque; + return tco_ioport_readw(tr, addr); +} + +static void tco_io_writew(void *opaque, hwaddr addr, uint64_t val, + unsigned width) +{ + TCOIORegs *tr = opaque; + tco_ioport_writew(tr, addr, val); +} + +static const MemoryRegionOps tco_io_ops = { + .read = tco_io_readw, + .write = tco_io_writew, + .valid.min_access_size = 1, + .valid.max_access_size = 4, + .impl.min_access_size = 1, + .impl.max_access_size = 2, + .endianness = DEVICE_LITTLE_ENDIAN, +}; + +void acpi_pm_tco_init(TCOIORegs *tr, MemoryRegion *parent) +{ + *tr = (TCOIORegs) { + .tco = { + .rld = TCO_RLD_DEFAULT, + .din = TCO_DAT_IN_DEFAULT, + .dout = TCO_DAT_OUT_DEFAULT, + .sts1 = TCO1_STS_DEFAULT, + .sts2 = TCO2_STS_DEFAULT, + .cnt1 = TCO1_CNT_DEFAULT, + .cnt2 = TCO2_CNT_DEFAULT, + .msg1 = TCO_MESSAGE1_DEFAULT, + .msg2 = TCO_MESSAGE2_DEFAULT, + .wdcnt = TCO_WDCNT_DEFAULT, + .tmr = TCO_TMR_DEFAULT, + }, + .sw_irq_gen = SW_IRQ_GEN_DEFAULT, + .tco_timer = timer_new_ns(QEMU_CLOCK_VIRTUAL, tco_timer_expired, tr), + .expire_time = -1, + .timeouts_no = 0, + }; + memory_region_init_io(&tr->io, memory_region_owner(parent), + &tco_io_ops, tr, "sm-tco", ICH9_PMIO_TCO_LEN); + memory_region_add_subregion(parent, ICH9_PMIO_TCO_RLD, &tr->io); +} + +const VMStateDescription vmstate_tco_io_sts = { + .name = "tco io device status", + .version_id = 1, + .minimum_version_id = 1, + .minimum_version_id_old = 1, + .fields = (VMStateField[]) { + VMSTATE_UINT16(tco.rld, TCOIORegs), + VMSTATE_UINT8(tco.din, TCOIORegs), + VMSTATE_UINT8(tco.dout, TCOIORegs), + VMSTATE_UINT16(tco.sts1, TCOIORegs), + VMSTATE_UINT16(tco.sts2, TCOIORegs), + VMSTATE_UINT16(tco.cnt1, TCOIORegs), + VMSTATE_UINT16(tco.cnt2, TCOIORegs), + VMSTATE_UINT8(tco.msg1, TCOIORegs), + VMSTATE_UINT8(tco.msg2, TCOIORegs), + VMSTATE_UINT8(tco.wdcnt, TCOIORegs), + VMSTATE_UINT16(tco.tmr, TCOIORegs), + VMSTATE_UINT8(sw_irq_gen, TCOIORegs), + VMSTATE_TIMER_PTR(tco_timer, TCOIORegs), + VMSTATE_INT64(expire_time, TCOIORegs), + VMSTATE_UINT8(timeouts_no, TCOIORegs), + VMSTATE_END_OF_LIST() + } +}; diff --git a/hw/char/spapr_vty.c b/hw/char/spapr_vty.c index 4e464bd15a..36b328b9af 100644 --- a/hw/char/spapr_vty.c +++ b/hw/char/spapr_vty.c @@ -74,7 +74,7 @@ static void spapr_vty_realize(VIOsPAPRDevice *sdev, Error **errp) } /* Forward declaration */ -static target_ulong h_put_term_char(PowerPCCPU *cpu, sPAPREnvironment *spapr, +static target_ulong h_put_term_char(PowerPCCPU *cpu, sPAPRMachineState *spapr, target_ulong opcode, target_ulong *args) { target_ulong reg = args[0]; @@ -101,7 +101,7 @@ static target_ulong h_put_term_char(PowerPCCPU *cpu, sPAPREnvironment *spapr, return H_SUCCESS; } -static target_ulong h_get_term_char(PowerPCCPU *cpu, sPAPREnvironment *spapr, +static target_ulong h_get_term_char(PowerPCCPU *cpu, sPAPRMachineState *spapr, target_ulong opcode, target_ulong *args) { target_ulong reg = args[0]; @@ -193,7 +193,7 @@ VIOsPAPRDevice *spapr_vty_get_default(VIOsPAPRBus *bus) DeviceState *iter = kid->child; /* Only look at VTY devices */ - if (!object_dynamic_cast(OBJECT(iter), "spapr-vty")) { + if (!object_dynamic_cast(OBJECT(iter), TYPE_VIO_SPAPR_VTY_DEVICE)) { continue; } @@ -214,7 +214,7 @@ VIOsPAPRDevice *spapr_vty_get_default(VIOsPAPRBus *bus) return selected; } -VIOsPAPRDevice *vty_lookup(sPAPREnvironment *spapr, target_ulong reg) +VIOsPAPRDevice *vty_lookup(sPAPRMachineState *spapr, target_ulong reg) { VIOsPAPRDevice *sdev; @@ -228,6 +228,10 @@ VIOsPAPRDevice *vty_lookup(sPAPREnvironment *spapr, target_ulong reg) return spapr_vty_get_default(spapr->vio_bus); } + if (!object_dynamic_cast(OBJECT(sdev), TYPE_VIO_SPAPR_VTY_DEVICE)) { + return NULL; + } + return sdev; } diff --git a/hw/core/sysbus.c b/hw/core/sysbus.c index 278a2d1bdd..3c58629894 100644 --- a/hw/core/sysbus.c +++ b/hw/core/sysbus.c @@ -109,7 +109,13 @@ qemu_irq sysbus_get_connected_irq(SysBusDevice *dev, int n) void sysbus_connect_irq(SysBusDevice *dev, int n, qemu_irq irq) { + SysBusDeviceClass *sbd = SYS_BUS_DEVICE_GET_CLASS(dev); + qdev_connect_gpio_out_named(DEVICE(dev), SYSBUS_DEVICE_GPIO_IRQ, n, irq); + + if (sbd->connect_irq_notifier) { + sbd->connect_irq_notifier(dev, irq); + } } /* Check whether an MMIO region exists */ diff --git a/hw/display/virtio-gpu-pci.c b/hw/display/virtio-gpu-pci.c index f0f25c7bc9..5bc62cf344 100644 --- a/hw/display/virtio-gpu-pci.c +++ b/hw/display/virtio-gpu-pci.c @@ -17,7 +17,6 @@ #include "hw/virtio/virtio-gpu.h" static Property virtio_gpu_pci_properties[] = { - DEFINE_VIRTIO_GPU_PROPERTIES(VirtIOGPUPCI, vdev.conf), DEFINE_VIRTIO_GPU_PCI_PROPERTIES(VirtIOPCIProxy), DEFINE_PROP_END_OF_LIST(), }; @@ -25,13 +24,21 @@ static Property virtio_gpu_pci_properties[] = { static void virtio_gpu_pci_realize(VirtIOPCIProxy *vpci_dev, Error **errp) { VirtIOGPUPCI *vgpu = VIRTIO_GPU_PCI(vpci_dev); + VirtIOGPU *g = &vgpu->vdev; DeviceState *vdev = DEVICE(&vgpu->vdev); + int i; qdev_set_parent_bus(vdev, BUS(&vpci_dev->bus)); /* force virtio-1.0 */ vpci_dev->flags &= ~VIRTIO_PCI_FLAG_DISABLE_MODERN; vpci_dev->flags |= VIRTIO_PCI_FLAG_DISABLE_LEGACY; object_property_set_bool(OBJECT(vdev), true, "realized", errp); + + for (i = 0; i < g->conf.max_outputs; i++) { + object_property_set_link(OBJECT(g->scanout[i].con), + OBJECT(vpci_dev), + "device", errp); + } } static void virtio_gpu_pci_class_init(ObjectClass *klass, void *data) @@ -49,8 +56,9 @@ static void virtio_gpu_pci_class_init(ObjectClass *klass, void *data) static void virtio_gpu_initfn(Object *obj) { VirtIOGPUPCI *dev = VIRTIO_GPU_PCI(obj); - object_initialize(&dev->vdev, sizeof(dev->vdev), TYPE_VIRTIO_GPU); - object_property_add_child(obj, "virtio-backend", OBJECT(&dev->vdev), NULL); + + virtio_instance_init_common(obj, &dev->vdev, sizeof(dev->vdev), + TYPE_VIRTIO_GPU); } static const TypeInfo virtio_gpu_pci_info = { diff --git a/hw/display/virtio-gpu.c b/hw/display/virtio-gpu.c index 8c109b79f4..990a26b850 100644 --- a/hw/display/virtio-gpu.c +++ b/hw/display/virtio-gpu.c @@ -871,7 +871,7 @@ static void virtio_gpu_reset(VirtIODevice *vdev) } static Property virtio_gpu_properties[] = { - DEFINE_VIRTIO_GPU_PROPERTIES(VirtIOGPU, conf), + DEFINE_PROP_UINT32("max_outputs", VirtIOGPU, conf.max_outputs, 1), DEFINE_PROP_END_OF_LIST(), }; diff --git a/hw/display/virtio-vga.c b/hw/display/virtio-vga.c index 94f9d0eb5a..f7e539fe93 100644 --- a/hw/display/virtio-vga.c +++ b/hw/display/virtio-vga.c @@ -79,6 +79,7 @@ static void virtio_vga_realize(VirtIOPCIProxy *vpci_dev, Error **errp) VirtIOGPU *g = &vvga->vdev; VGACommonState *vga = &vvga->vga; uint32_t offset; + int i; /* init vga compat bits */ vga->vram_size_mb = 8; @@ -120,6 +121,12 @@ static void virtio_vga_realize(VirtIOPCIProxy *vpci_dev, Error **errp) vga->con = g->scanout[0].con; graphic_console_set_hwops(vga->con, &virtio_vga_ops, vvga); + + for (i = 0; i < g->conf.max_outputs; i++) { + object_property_set_link(OBJECT(g->scanout[i].con), + OBJECT(vpci_dev), + "device", errp); + } } static void virtio_vga_reset(DeviceState *dev) @@ -131,7 +138,6 @@ static void virtio_vga_reset(DeviceState *dev) } static Property virtio_vga_properties[] = { - DEFINE_VIRTIO_GPU_PROPERTIES(VirtIOVGA, vdev.conf), DEFINE_VIRTIO_GPU_PCI_PROPERTIES(VirtIOPCIProxy), DEFINE_PROP_END_OF_LIST(), }; @@ -155,8 +161,9 @@ static void virtio_vga_class_init(ObjectClass *klass, void *data) static void virtio_vga_inst_initfn(Object *obj) { VirtIOVGA *dev = VIRTIO_VGA(obj); - object_initialize(&dev->vdev, sizeof(dev->vdev), TYPE_VIRTIO_GPU); - object_property_add_child(obj, "virtio-backend", OBJECT(&dev->vdev), NULL); + + virtio_instance_init_common(obj, &dev->vdev, sizeof(dev->vdev), + TYPE_VIRTIO_GPU); } static TypeInfo virtio_vga_info = { diff --git a/hw/i386/pc.c b/hw/i386/pc.c index 7959b44b6b..7661ea9cdf 100644 --- a/hw/i386/pc.c +++ b/hw/i386/pc.c @@ -293,11 +293,82 @@ static void pc_boot_set(void *opaque, const char *boot_device, Error **errp) set_boot_dev(opaque, boot_device, errp); } +static void pc_cmos_init_floppy(ISADevice *rtc_state, ISADevice *floppy) +{ + int val, nb, i; + FDriveType fd_type[2] = { FDRIVE_DRV_NONE, FDRIVE_DRV_NONE }; + + /* floppy type */ + if (floppy) { + for (i = 0; i < 2; i++) { + fd_type[i] = isa_fdc_get_drive_type(floppy, i); + } + } + val = (cmos_get_fd_drive_type(fd_type[0]) << 4) | + cmos_get_fd_drive_type(fd_type[1]); + rtc_set_memory(rtc_state, 0x10, val); + + val = rtc_get_memory(rtc_state, REG_EQUIPMENT_BYTE); + nb = 0; + if (fd_type[0] < FDRIVE_DRV_NONE) { + nb++; + } + if (fd_type[1] < FDRIVE_DRV_NONE) { + nb++; + } + switch (nb) { + case 0: + break; + case 1: + val |= 0x01; /* 1 drive, ready for boot */ + break; + case 2: + val |= 0x41; /* 2 drives, ready for boot */ + break; + } + rtc_set_memory(rtc_state, REG_EQUIPMENT_BYTE, val); +} + typedef struct pc_cmos_init_late_arg { ISADevice *rtc_state; BusState *idebus[2]; } pc_cmos_init_late_arg; +typedef struct check_fdc_state { + ISADevice *floppy; + bool multiple; +} CheckFdcState; + +static int check_fdc(Object *obj, void *opaque) +{ + CheckFdcState *state = opaque; + Object *fdc; + uint32_t iobase; + Error *local_err = NULL; + + fdc = object_dynamic_cast(obj, TYPE_ISA_FDC); + if (!fdc) { + return 0; + } + + iobase = object_property_get_int(obj, "iobase", &local_err); + if (local_err || iobase != 0x3f0) { + error_free(local_err); + return 0; + } + + if (state->floppy) { + state->multiple = true; + } else { + state->floppy = ISA_DEVICE(obj); + } + return 0; +} + +static const char * const fdc_container_path[] = { + "/unattached", "/peripheral", "/peripheral-anon" +}; + static void pc_cmos_init_late(void *opaque) { pc_cmos_init_late_arg *arg = opaque; @@ -306,6 +377,8 @@ static void pc_cmos_init_late(void *opaque) int8_t heads, sectors; int val; int i, trans; + Object *container; + CheckFdcState state = { 0 }; val = 0; if (ide_get_geometry(arg->idebus[0], 0, @@ -335,16 +408,32 @@ static void pc_cmos_init_late(void *opaque) } rtc_set_memory(s, 0x39, val); + /* + * Locate the FDC at IO address 0x3f0, and configure the CMOS registers + * accordingly. + */ + for (i = 0; i < ARRAY_SIZE(fdc_container_path); i++) { + container = container_get(qdev_get_machine(), fdc_container_path[i]); + object_child_foreach(container, check_fdc, &state); + } + + if (state.multiple) { + error_report("warning: multiple floppy disk controllers with " + "iobase=0x3f0 have been found;\n" + "the one being picked for CMOS setup might not reflect " + "your intent"); + } + pc_cmos_init_floppy(s, state.floppy); + qemu_unregister_reset(pc_cmos_init_late, opaque); } void pc_cmos_init(ram_addr_t ram_size, ram_addr_t above_4g_mem_size, const char *boot_device, MachineState *machine, - ISADevice *floppy, BusState *idebus0, BusState *idebus1, + BusState *idebus0, BusState *idebus1, ISADevice *s) { - int val, nb, i; - FDriveType fd_type[2] = { FDRIVE_DRV_NONE, FDRIVE_DRV_NONE }; + int val; static pc_cmos_init_late_arg arg; PCMachineState *pc_machine = PC_MACHINE(machine); Error *local_err = NULL; @@ -401,39 +490,12 @@ void pc_cmos_init(ram_addr_t ram_size, ram_addr_t above_4g_mem_size, exit(1); } - /* floppy type */ - if (floppy) { - for (i = 0; i < 2; i++) { - fd_type[i] = isa_fdc_get_drive_type(floppy, i); - } - } - val = (cmos_get_fd_drive_type(fd_type[0]) << 4) | - cmos_get_fd_drive_type(fd_type[1]); - rtc_set_memory(s, 0x10, val); - val = 0; - nb = 0; - if (fd_type[0] < FDRIVE_DRV_NONE) { - nb++; - } - if (fd_type[1] < FDRIVE_DRV_NONE) { - nb++; - } - switch (nb) { - case 0: - break; - case 1: - val |= 0x01; /* 1 drive, ready for boot */ - break; - case 2: - val |= 0x41; /* 2 drives, ready for boot */ - break; - } val |= 0x02; /* FPU is there */ val |= 0x04; /* PS/2 mouse installed */ rtc_set_memory(s, REG_EQUIPMENT_BYTE, val); - /* hard drives */ + /* hard drives and FDC */ arg.rtc_state = s; arg.idebus[0] = idebus0; arg.idebus[1] = idebus1; @@ -1401,7 +1463,6 @@ static const MemoryRegionOps ioportF0_io_ops = { void pc_basic_device_init(ISABus *isa_bus, qemu_irq *gsi, ISADevice **rtc_state, bool create_fdctrl, - ISADevice **floppy, bool no_vmport, uint32 hpet_irqs) { @@ -1497,7 +1558,9 @@ void pc_basic_device_init(ISABus *isa_bus, qemu_irq *gsi, fd[i] = drive_get(IF_FLOPPY, 0, i); create_fdctrl |= !!fd[i]; } - *floppy = create_fdctrl ? fdctrl_init_isa(isa_bus, fd) : NULL; + if (create_fdctrl) { + fdctrl_init_isa(isa_bus, fd); + } } void pc_nic_init(ISABus *isa_bus, PCIBus *pci_bus) diff --git a/hw/i386/pc_piix.c b/hw/i386/pc_piix.c index 56cdcb9661..8167b122f0 100644 --- a/hw/i386/pc_piix.c +++ b/hw/i386/pc_piix.c @@ -94,7 +94,6 @@ static void pc_init1(MachineState *machine) DriveInfo *hd[MAX_IDE_BUS * MAX_IDE_DEVS]; BusState *idebus[MAX_IDE_BUS]; ISADevice *rtc_state; - ISADevice *floppy; MemoryRegion *ram_memory; MemoryRegion *pci_memory; MemoryRegion *rom_memory; @@ -241,7 +240,7 @@ static void pc_init1(MachineState *machine) } /* init basic PC hardware */ - pc_basic_device_init(isa_bus, gsi, &rtc_state, true, &floppy, + pc_basic_device_init(isa_bus, gsi, &rtc_state, true, (pc_machine->vmport != ON_OFF_AUTO_ON), 0x4); pc_nic_init(isa_bus, pci_bus); @@ -273,7 +272,7 @@ static void pc_init1(MachineState *machine) } pc_cmos_init(below_4g_mem_size, above_4g_mem_size, machine->boot_order, - machine, floppy, idebus[0], idebus[1], rtc_state); + machine, idebus[0], idebus[1], rtc_state); if (pci_enabled && usb_enabled()) { pci_create_simple(pci_bus, piix3_devfn + 2, "piix3-usb-uhci"); @@ -312,6 +311,8 @@ static void pc_compat_2_3(MachineState *machine) if (kvm_enabled()) { pcms->smm = ON_OFF_AUTO_OFF; } + global_state_set_optional(); + savevm_skip_configuration(); } static void pc_compat_2_2(MachineState *machine) diff --git a/hw/i386/pc_q35.c b/hw/i386/pc_q35.c index 8aa3a67fdf..974aead5a9 100644 --- a/hw/i386/pc_q35.c +++ b/hw/i386/pc_q35.c @@ -73,7 +73,6 @@ static void pc_q35_init(MachineState *machine) PCIDevice *lpc; BusState *idebus[MAX_SATA_PORTS]; ISADevice *rtc_state; - ISADevice *floppy; MemoryRegion *pci_memory; MemoryRegion *rom_memory; MemoryRegion *ram_memory; @@ -249,11 +248,11 @@ static void pc_q35_init(MachineState *machine) } /* init basic PC hardware */ - pc_basic_device_init(isa_bus, gsi, &rtc_state, !mc->no_floppy, &floppy, + pc_basic_device_init(isa_bus, gsi, &rtc_state, !mc->no_floppy, (pc_machine->vmport != ON_OFF_AUTO_ON), 0xff0104); /* connect pm stuff to lpc */ - ich9_lpc_pm_init(lpc, pc_machine_is_smm_enabled(pc_machine)); + ich9_lpc_pm_init(lpc, pc_machine_is_smm_enabled(pc_machine), !mc->no_tco); /* ahci and SATA device, for q35 1 ahci controller is built-in */ ahci = pci_create_simple_multifunction(host_bus, @@ -278,7 +277,7 @@ static void pc_q35_init(MachineState *machine) 8, NULL, 0); pc_cmos_init(below_4g_mem_size, above_4g_mem_size, machine->boot_order, - machine, floppy, idebus[0], idebus[1], rtc_state); + machine, idebus[0], idebus[1], rtc_state); /* the rest devices to which pci devfn is automatically assigned */ pc_vga_init(isa_bus, host_bus); @@ -295,6 +294,8 @@ static void pc_compat_2_3(MachineState *machine) if (kvm_enabled()) { pcms->smm = ON_OFF_AUTO_OFF; } + global_state_set_optional(); + savevm_skip_configuration(); } static void pc_compat_2_2(MachineState *machine) @@ -397,6 +398,7 @@ static void pc_q35_2_4_machine_options(MachineClass *m) m->default_machine_opts = "firmware=bios-256k.bin"; m->default_display = "std"; m->no_floppy = 1; + m->no_tco = 0; m->alias = "q35"; } @@ -408,6 +410,7 @@ static void pc_q35_2_3_machine_options(MachineClass *m) { pc_q35_2_4_machine_options(m); m->no_floppy = 0; + m->no_tco = 1; m->alias = NULL; SET_MACHINE_COMPAT(m, PC_COMPAT_2_3); } diff --git a/hw/ide/ahci.h b/hw/ide/ahci.h index 9f5b4d20b5..68d5074b33 100644 --- a/hw/ide/ahci.h +++ b/hw/ide/ahci.h @@ -166,7 +166,7 @@ #define AHCI_CMD_HDR_CMD_FIS_LEN 0x1f #define AHCI_CMD_HDR_PRDT_LEN 16 -#define SATA_SIGNATURE_CDROM 0xeb140000 +#define SATA_SIGNATURE_CDROM 0xeb140101 #define SATA_SIGNATURE_DISK 0x00000101 #define AHCI_GENERIC_HOST_CONTROL_REGS_MAX_ADDR 0x20 diff --git a/hw/intc/arm_gic_kvm.c b/hw/intc/arm_gic_kvm.c index 2cb7d255d2..f56bff1afb 100644 --- a/hw/intc/arm_gic_kvm.c +++ b/hw/intc/arm_gic_kvm.c @@ -570,6 +570,12 @@ static void kvm_arm_gic_realize(DeviceState *dev, Error **errp) */ i += (GIC_INTERNAL * s->num_cpu); qdev_init_gpio_in(dev, kvm_arm_gic_set_irq, i); + + for (i = 0; i < s->num_irq - GIC_INTERNAL; i++) { + qemu_irq irq = qdev_get_gpio_in(dev, i); + kvm_irqchip_set_qemuirq_gsi(kvm_state, irq, i); + } + /* We never use our outbound IRQ/FIQ lines but provide them so that * we maintain the same interface as the non-KVM GIC. */ diff --git a/hw/intc/xics.c b/hw/intc/xics.c index 0fd2a84c7b..924b1ae3c9 100644 --- a/hw/intc/xics.c +++ b/hw/intc/xics.c @@ -806,7 +806,7 @@ void xics_free(XICSState *icp, int irq, int num) * Guest interfaces */ -static target_ulong h_cppr(PowerPCCPU *cpu, sPAPREnvironment *spapr, +static target_ulong h_cppr(PowerPCCPU *cpu, sPAPRMachineState *spapr, target_ulong opcode, target_ulong *args) { CPUState *cs = CPU(cpu); @@ -816,7 +816,7 @@ static target_ulong h_cppr(PowerPCCPU *cpu, sPAPREnvironment *spapr, return H_SUCCESS; } -static target_ulong h_ipi(PowerPCCPU *cpu, sPAPREnvironment *spapr, +static target_ulong h_ipi(PowerPCCPU *cpu, sPAPRMachineState *spapr, target_ulong opcode, target_ulong *args) { target_ulong server = get_cpu_index_by_dt_id(args[0]); @@ -830,7 +830,7 @@ static target_ulong h_ipi(PowerPCCPU *cpu, sPAPREnvironment *spapr, return H_SUCCESS; } -static target_ulong h_xirr(PowerPCCPU *cpu, sPAPREnvironment *spapr, +static target_ulong h_xirr(PowerPCCPU *cpu, sPAPRMachineState *spapr, target_ulong opcode, target_ulong *args) { CPUState *cs = CPU(cpu); @@ -840,7 +840,7 @@ static target_ulong h_xirr(PowerPCCPU *cpu, sPAPREnvironment *spapr, return H_SUCCESS; } -static target_ulong h_xirr_x(PowerPCCPU *cpu, sPAPREnvironment *spapr, +static target_ulong h_xirr_x(PowerPCCPU *cpu, sPAPRMachineState *spapr, target_ulong opcode, target_ulong *args) { CPUState *cs = CPU(cpu); @@ -852,7 +852,7 @@ static target_ulong h_xirr_x(PowerPCCPU *cpu, sPAPREnvironment *spapr, return H_SUCCESS; } -static target_ulong h_eoi(PowerPCCPU *cpu, sPAPREnvironment *spapr, +static target_ulong h_eoi(PowerPCCPU *cpu, sPAPRMachineState *spapr, target_ulong opcode, target_ulong *args) { CPUState *cs = CPU(cpu); @@ -862,7 +862,7 @@ static target_ulong h_eoi(PowerPCCPU *cpu, sPAPREnvironment *spapr, return H_SUCCESS; } -static target_ulong h_ipoll(PowerPCCPU *cpu, sPAPREnvironment *spapr, +static target_ulong h_ipoll(PowerPCCPU *cpu, sPAPRMachineState *spapr, target_ulong opcode, target_ulong *args) { CPUState *cs = CPU(cpu); @@ -874,7 +874,7 @@ static target_ulong h_ipoll(PowerPCCPU *cpu, sPAPREnvironment *spapr, return H_SUCCESS; } -static void rtas_set_xive(PowerPCCPU *cpu, sPAPREnvironment *spapr, +static void rtas_set_xive(PowerPCCPU *cpu, sPAPRMachineState *spapr, uint32_t token, uint32_t nargs, target_ulong args, uint32_t nret, target_ulong rets) @@ -902,7 +902,7 @@ static void rtas_set_xive(PowerPCCPU *cpu, sPAPREnvironment *spapr, rtas_st(rets, 0, RTAS_OUT_SUCCESS); } -static void rtas_get_xive(PowerPCCPU *cpu, sPAPREnvironment *spapr, +static void rtas_get_xive(PowerPCCPU *cpu, sPAPRMachineState *spapr, uint32_t token, uint32_t nargs, target_ulong args, uint32_t nret, target_ulong rets) @@ -927,7 +927,7 @@ static void rtas_get_xive(PowerPCCPU *cpu, sPAPREnvironment *spapr, rtas_st(rets, 2, ics->irqs[nr - ics->offset].priority); } -static void rtas_int_off(PowerPCCPU *cpu, sPAPREnvironment *spapr, +static void rtas_int_off(PowerPCCPU *cpu, sPAPRMachineState *spapr, uint32_t token, uint32_t nargs, target_ulong args, uint32_t nret, target_ulong rets) @@ -953,7 +953,7 @@ static void rtas_int_off(PowerPCCPU *cpu, sPAPREnvironment *spapr, rtas_st(rets, 0, RTAS_OUT_SUCCESS); } -static void rtas_int_on(PowerPCCPU *cpu, sPAPREnvironment *spapr, +static void rtas_int_on(PowerPCCPU *cpu, sPAPRMachineState *spapr, uint32_t token, uint32_t nargs, target_ulong args, uint32_t nret, target_ulong rets) diff --git a/hw/intc/xics_kvm.c b/hw/intc/xics_kvm.c index c15453f26f..d58729cfae 100644 --- a/hw/intc/xics_kvm.c +++ b/hw/intc/xics_kvm.c @@ -331,6 +331,15 @@ static void xics_kvm_cpu_setup(XICSState *icp, PowerPCCPU *cpu) abort(); } + /* + * If we are reusing a parked vCPU fd corresponding to the CPU + * which was hot-removed earlier we don't have to renable + * KVM_CAP_IRQ_XICS capability again. + */ + if (ss->cap_irq_xics_enabled) { + return; + } + if (icpkvm->kernel_xics_fd != -1) { int ret; @@ -343,6 +352,7 @@ static void xics_kvm_cpu_setup(XICSState *icp, PowerPCCPU *cpu) kvm_arch_vcpu_id(cs), strerror(errno)); exit(1); } + ss->cap_irq_xics_enabled = true; } } @@ -368,7 +378,7 @@ static void xics_kvm_set_nr_servers(XICSState *icp, uint32_t nr_servers, } } -static void rtas_dummy(PowerPCCPU *cpu, sPAPREnvironment *spapr, +static void rtas_dummy(PowerPCCPU *cpu, sPAPRMachineState *spapr, uint32_t token, uint32_t nargs, target_ulong args, uint32_t nret, target_ulong rets) diff --git a/hw/isa/lpc_ich9.c b/hw/isa/lpc_ich9.c index bd655b8405..360699f6fd 100644 --- a/hw/isa/lpc_ich9.c +++ b/hw/isa/lpc_ich9.c @@ -138,6 +138,7 @@ static void ich9_cc_reset(ICH9LPCState *lpc) pci_set_long(c + ICH9_CC_D27IR, ICH9_CC_DIR_DEFAULT); pci_set_long(c + ICH9_CC_D26IR, ICH9_CC_DIR_DEFAULT); pci_set_long(c + ICH9_CC_D25IR, ICH9_CC_DIR_DEFAULT); + pci_set_long(c + ICH9_CC_GCS, ICH9_CC_GCS_DEFAULT); ich9_cc_update(lpc); } @@ -313,6 +314,16 @@ PCIINTxRoute ich9_route_intx_pin_to_irq(void *opaque, int pirq_pin) return route; } +void ich9_generate_smi(void) +{ + cpu_interrupt(first_cpu, CPU_INTERRUPT_SMI); +} + +void ich9_generate_nmi(void) +{ + cpu_interrupt(first_cpu, CPU_INTERRUPT_NMI); +} + static int ich9_lpc_sci_irq(ICH9LPCState *lpc) { switch (lpc->d.config[ICH9_LPC_ACPI_CTRL] & @@ -357,13 +368,13 @@ static void ich9_set_sci(void *opaque, int irq_num, int level) } } -void ich9_lpc_pm_init(PCIDevice *lpc_pci, bool smm_enabled) +void ich9_lpc_pm_init(PCIDevice *lpc_pci, bool smm_enabled, bool enable_tco) { ICH9LPCState *lpc = ICH9_LPC_DEVICE(lpc_pci); qemu_irq sci_irq; sci_irq = qemu_allocate_irq(ich9_set_sci, lpc, 0); - ich9_pm_init(lpc_pci, &lpc->pm, smm_enabled, sci_irq); + ich9_pm_init(lpc_pci, &lpc->pm, smm_enabled, enable_tco, sci_irq); ich9_lpc_reset(&lpc->d.qdev); } @@ -681,6 +692,11 @@ static const VMStateDescription vmstate_ich9_lpc = { } }; +static Property ich9_lpc_properties[] = { + DEFINE_PROP_BOOL("noreboot", ICH9LPCState, pin_strap.spkr_hi, true), + DEFINE_PROP_END_OF_LIST(), +}; + static void ich9_lpc_class_init(ObjectClass *klass, void *data) { DeviceClass *dc = DEVICE_CLASS(klass); @@ -692,6 +708,7 @@ static void ich9_lpc_class_init(ObjectClass *klass, void *data) dc->reset = ich9_lpc_reset; k->init = ich9_lpc_init; dc->vmsd = &vmstate_ich9_lpc; + dc->props = ich9_lpc_properties; k->config_write = ich9_lpc_config_write; dc->desc = "ICH9 LPC bridge"; k->vendor_id = PCI_VENDOR_ID_INTEL; diff --git a/hw/misc/macio/macio.c b/hw/misc/macio/macio.c index e9037b0c39..e3c0242d41 100644 --- a/hw/misc/macio/macio.c +++ b/hw/misc/macio/macio.c @@ -132,8 +132,6 @@ static void macio_common_realize(PCIDevice *d, Error **errp) SysBusDevice *sysbus_dev; Error *err = NULL; - d->config[0x3d] = 0x01; // interrupt on pin 1 - object_property_set_bool(OBJECT(&s->cuda), true, "realized", &err); if (err) { error_propagate(errp, err); diff --git a/hw/net/e1000.c b/hw/net/e1000.c index bab8e2abfb..5c6bcd0014 100644 --- a/hw/net/e1000.c +++ b/hw/net/e1000.c @@ -185,6 +185,9 @@ e1000_link_up(E1000State *s) { s->mac_reg[STATUS] |= E1000_STATUS_LU; s->phy_reg[PHY_STATUS] |= MII_SR_LINK_STATUS; + + /* E1000_STATUS_LU is tested by e1000_can_receive() */ + qemu_flush_queued_packets(qemu_get_queue(s->nic)); } static bool diff --git a/hw/net/rocker/rocker.c b/hw/net/rocker/rocker.c index 4d25842509..47d080fd33 100644 --- a/hw/net/rocker/rocker.c +++ b/hw/net/rocker/rocker.c @@ -96,7 +96,7 @@ World *rocker_get_world(Rocker *r, enum rocker_world_type type) RockerSwitch *qmp_query_rocker(const char *name, Error **errp) { - RockerSwitch *rocker = g_malloc0(sizeof(*rocker)); + RockerSwitch *rocker; Rocker *r; r = rocker_find(name); @@ -106,6 +106,7 @@ RockerSwitch *qmp_query_rocker(const char *name, Error **errp) return NULL; } + rocker = g_new0(RockerSwitch, 1); rocker->name = g_strdup(r->name); rocker->id = r->switch_id; rocker->ports = r->fp_ports; @@ -192,11 +193,13 @@ static int tx_consume(Rocker *r, DescInfo *info) if (!tlvs[ROCKER_TLV_TX_L3_CSUM_OFF]) { return -ROCKER_EINVAL; } + break; case ROCKER_TX_OFFLOAD_TSO: if (!tlvs[ROCKER_TLV_TX_TSO_MSS] || !tlvs[ROCKER_TLV_TX_TSO_HDR_LEN]) { return -ROCKER_EINVAL; } + break; } if (tlvs[ROCKER_TLV_TX_L3_CSUM_OFF]) { @@ -600,7 +603,7 @@ static DescRing *rocker_get_rx_ring_by_pport(Rocker *r, } int rx_produce(World *world, uint32_t pport, - const struct iovec *iov, int iovcnt) + const struct iovec *iov, int iovcnt, uint8_t copy_to_cpu) { Rocker *r = world_rocker(world); PCIDevice *dev = (PCIDevice *)r; @@ -643,6 +646,10 @@ int rx_produce(World *world, uint32_t pport, goto out; } + if (copy_to_cpu) { + rx_flags |= ROCKER_RX_FLAGS_FWD_OFFLOAD; + } + /* XXX calc rx flags/csum */ tlv_size = rocker_tlv_total_size(sizeof(uint16_t)) + /* flags */ diff --git a/hw/net/rocker/rocker.h b/hw/net/rocker/rocker.h index b3310b61eb..f9c80f8013 100644 --- a/hw/net/rocker/rocker.h +++ b/hw/net/rocker/rocker.h @@ -77,7 +77,7 @@ int rocker_event_link_changed(Rocker *r, uint32_t pport, bool link_up); int rocker_event_mac_vlan_seen(Rocker *r, uint32_t pport, uint8_t *addr, uint16_t vlan_id); int rx_produce(World *world, uint32_t pport, - const struct iovec *iov, int iovcnt); + const struct iovec *iov, int iovcnt, uint8_t copy_to_cpu); int rocker_port_eg(Rocker *r, uint32_t pport, const struct iovec *iov, int iovcnt); diff --git a/hw/net/rocker/rocker_fp.c b/hw/net/rocker/rocker_fp.c index d8d934c396..c693ae5081 100644 --- a/hw/net/rocker/rocker_fp.c +++ b/hw/net/rocker/rocker_fp.c @@ -125,18 +125,21 @@ int fp_port_eg(FpPort *port, const struct iovec *iov, int iovcnt) return ROCKER_OK; } -static int fp_port_can_receive(NetClientState *nc) -{ - FpPort *port = qemu_get_nic_opaque(nc); - - return port->enabled; -} - static ssize_t fp_port_receive_iov(NetClientState *nc, const struct iovec *iov, int iovcnt) { FpPort *port = qemu_get_nic_opaque(nc); + /* If the port is disabled, we want to drop this pkt + * now rather than queing it for later. We don't want + * any stale pkts getting into the device when the port + * transitions to enabled. + */ + + if (!port->enabled) { + return -1; + } + return world_ingress(port->world, port->pport, iov, iovcnt); } @@ -165,7 +168,6 @@ static void fp_port_set_link_status(NetClientState *nc) static NetClientInfo fp_port_info = { .type = NET_CLIENT_OPTIONS_KIND_NIC, .size = sizeof(NICState), - .can_receive = fp_port_can_receive, .receive = fp_port_receive, .receive_iov = fp_port_receive_iov, .cleanup = fp_port_cleanup, diff --git a/hw/net/rocker/rocker_hw.h b/hw/net/rocker/rocker_hw.h index fe639badd4..8c50830325 100644 --- a/hw/net/rocker/rocker_hw.h +++ b/hw/net/rocker/rocker_hw.h @@ -250,6 +250,7 @@ enum { #define ROCKER_RX_FLAGS_TCP (1 << 5) #define ROCKER_RX_FLAGS_UDP (1 << 6) #define ROCKER_RX_FLAGS_TCP_UDP_CSUM_GOOD (1 << 7) +#define ROCKER_RX_FLAGS_FWD_OFFLOAD (1 << 8) /* Tx msg */ enum { diff --git a/hw/net/rocker/rocker_of_dpa.c b/hw/net/rocker/rocker_of_dpa.c index b25a17d6d7..874fb01d69 100644 --- a/hw/net/rocker/rocker_of_dpa.c +++ b/hw/net/rocker/rocker_of_dpa.c @@ -825,6 +825,8 @@ static OfDpaGroup *of_dpa_group_alloc(uint32_t id) static void of_dpa_output_l2_interface(OfDpaFlowContext *fc, OfDpaGroup *group) { + uint8_t copy_to_cpu = fc->action_set.apply.copy_to_cpu; + if (group->l2_interface.pop_vlan) { of_dpa_flow_pkt_strip_vlan(fc); } @@ -837,7 +839,8 @@ static void of_dpa_output_l2_interface(OfDpaFlowContext *fc, */ if (group->l2_interface.out_pport == 0) { - rx_produce(fc->of_dpa->world, fc->in_pport, fc->iov, fc->iovcnt); + rx_produce(fc->of_dpa->world, fc->in_pport, fc->iov, fc->iovcnt, + copy_to_cpu); } else if (group->l2_interface.out_pport != fc->in_pport) { rocker_port_eg(world_rocker(fc->of_dpa->world), group->l2_interface.out_pport, @@ -2525,7 +2528,6 @@ static void of_dpa_group_fill(void *key, void *value, void *user_data) ngroup->has_set_vlan_id = true; ngroup->set_vlan_id = ntohs(group->l2_rewrite.vlan_id); } - break; if (memcmp(group->l2_rewrite.src_mac.a, zero_mac.a, ETH_ALEN)) { ngroup->has_set_eth_src = true; ngroup->set_eth_src = @@ -2536,6 +2538,7 @@ static void of_dpa_group_fill(void *key, void *value, void *user_data) ngroup->set_eth_dst = qemu_mac_strdup_printf(group->l2_rewrite.dst_mac.a); } + break; case ROCKER_OF_DPA_GROUP_TYPE_L2_FLOOD: case ROCKER_OF_DPA_GROUP_TYPE_L2_MCAST: ngroup->has_vlan_id = true; diff --git a/hw/net/rocker/rocker_world.c b/hw/net/rocker/rocker_world.c index b991e871d3..a6b18f1754 100644 --- a/hw/net/rocker/rocker_world.c +++ b/hw/net/rocker/rocker_world.c @@ -32,7 +32,7 @@ ssize_t world_ingress(World *world, uint32_t pport, return world->ops->ig(world, pport, iov, iovcnt); } - return iov_size(iov, iovcnt); + return -1; } int world_do_cmd(World *world, DescInfo *info, diff --git a/hw/net/spapr_llan.c b/hw/net/spapr_llan.c index 2dd5ec1117..1ca5e9ce6a 100644 --- a/hw/net/spapr_llan.c +++ b/hw/net/spapr_llan.c @@ -284,7 +284,7 @@ static int check_bd(VIOsPAPRVLANDevice *dev, vlan_bd_t bd, } static target_ulong h_register_logical_lan(PowerPCCPU *cpu, - sPAPREnvironment *spapr, + sPAPRMachineState *spapr, target_ulong opcode, target_ulong *args) { @@ -349,7 +349,8 @@ static target_ulong h_register_logical_lan(PowerPCCPU *cpu, } -static target_ulong h_free_logical_lan(PowerPCCPU *cpu, sPAPREnvironment *spapr, +static target_ulong h_free_logical_lan(PowerPCCPU *cpu, + sPAPRMachineState *spapr, target_ulong opcode, target_ulong *args) { target_ulong reg = args[0]; @@ -371,7 +372,7 @@ static target_ulong h_free_logical_lan(PowerPCCPU *cpu, sPAPREnvironment *spapr, } static target_ulong h_add_logical_lan_buffer(PowerPCCPU *cpu, - sPAPREnvironment *spapr, + sPAPRMachineState *spapr, target_ulong opcode, target_ulong *args) { @@ -421,7 +422,8 @@ static target_ulong h_add_logical_lan_buffer(PowerPCCPU *cpu, return H_SUCCESS; } -static target_ulong h_send_logical_lan(PowerPCCPU *cpu, sPAPREnvironment *spapr, +static target_ulong h_send_logical_lan(PowerPCCPU *cpu, + sPAPRMachineState *spapr, target_ulong opcode, target_ulong *args) { target_ulong reg = args[0]; @@ -490,7 +492,7 @@ static target_ulong h_send_logical_lan(PowerPCCPU *cpu, sPAPREnvironment *spapr, return H_SUCCESS; } -static target_ulong h_multicast_ctrl(PowerPCCPU *cpu, sPAPREnvironment *spapr, +static target_ulong h_multicast_ctrl(PowerPCCPU *cpu, sPAPRMachineState *spapr, target_ulong opcode, target_ulong *args) { target_ulong reg = args[0]; diff --git a/hw/net/vmxnet3.c b/hw/net/vmxnet3.c index 104a0f599b..706e0606a9 100644 --- a/hw/net/vmxnet3.c +++ b/hw/net/vmxnet3.c @@ -1879,6 +1879,12 @@ vmxnet3_receive(NetClientState *nc, const uint8_t *buf, size_t size) return -1; } + if (s->peer_has_vhdr) { + vmxnet_rx_pkt_set_vhdr(s->rx_pkt, (struct virtio_net_hdr *)buf); + buf += sizeof(struct virtio_net_hdr); + size -= sizeof(struct virtio_net_hdr); + } + /* Pad to minimum Ethernet frame length */ if (size < sizeof(min_buf)) { memcpy(min_buf, buf, size); @@ -1887,12 +1893,6 @@ vmxnet3_receive(NetClientState *nc, const uint8_t *buf, size_t size) size = sizeof(min_buf); } - if (s->peer_has_vhdr) { - vmxnet_rx_pkt_set_vhdr(s->rx_pkt, (struct virtio_net_hdr *)buf); - buf += sizeof(struct virtio_net_hdr); - size -= sizeof(struct virtio_net_hdr); - } - vmxnet_rx_pkt_set_packet_type(s->rx_pkt, get_eth_packet_type(PKT_GET_ETH_HDR(buf))); diff --git a/hw/nvram/spapr_nvram.c b/hw/nvram/spapr_nvram.c index 11332d14ea..fcaa77dd9a 100644 --- a/hw/nvram/spapr_nvram.c +++ b/hw/nvram/spapr_nvram.c @@ -45,7 +45,7 @@ typedef struct sPAPRNVRAM { #define DEFAULT_NVRAM_SIZE 65536 #define MAX_NVRAM_SIZE 1048576 -static void rtas_nvram_fetch(PowerPCCPU *cpu, sPAPREnvironment *spapr, +static void rtas_nvram_fetch(PowerPCCPU *cpu, sPAPRMachineState *spapr, uint32_t token, uint32_t nargs, target_ulong args, uint32_t nret, target_ulong rets) @@ -86,7 +86,7 @@ static void rtas_nvram_fetch(PowerPCCPU *cpu, sPAPREnvironment *spapr, rtas_st(rets, 1, len); } -static void rtas_nvram_store(PowerPCCPU *cpu, sPAPREnvironment *spapr, +static void rtas_nvram_store(PowerPCCPU *cpu, sPAPRMachineState *spapr, uint32_t token, uint32_t nargs, target_ulong args, uint32_t nret, target_ulong rets) diff --git a/hw/pci/pcie.c b/hw/pci/pcie.c index 14c77117f6..6e28985bd1 100644 --- a/hw/pci/pcie.c +++ b/hw/pci/pcie.c @@ -78,7 +78,7 @@ int pcie_cap_init(PCIDevice *dev, uint8_t offset, uint8_t type, uint8_t port) PCI_EXP_LNK_LS_25); pci_set_word(exp_cap + PCI_EXP_LNKSTA, - PCI_EXP_LNK_MLW_1 | PCI_EXP_LNK_LS_25); + PCI_EXP_LNK_MLW_1 | PCI_EXP_LNK_LS_25 |PCI_EXP_LNKSTA_DLLLA); pci_set_long(exp_cap + PCI_EXP_DEVCAP2, PCI_EXP_DEVCAP2_EFF | PCI_EXP_DEVCAP2_EETLPP); diff --git a/hw/ppc/mac_newworld.c b/hw/ppc/mac_newworld.c index 0f3e34122a..77d5c819ef 100644 --- a/hw/ppc/mac_newworld.c +++ b/hw/ppc/mac_newworld.c @@ -145,7 +145,6 @@ static void ppc_core99_reset(void *opaque) static void ppc_core99_init(MachineState *machine) { ram_addr_t ram_size = machine->ram_size; - const char *cpu_model = machine->cpu_model; const char *kernel_filename = machine->kernel_filename; const char *kernel_cmdline = machine->kernel_cmdline; const char *initrd_filename = machine->initrd_filename; @@ -182,14 +181,15 @@ static void ppc_core99_init(MachineState *machine) linux_boot = (kernel_filename != NULL); /* init CPUs */ - if (cpu_model == NULL) + if (machine->cpu_model == NULL) { #ifdef TARGET_PPC64 - cpu_model = "970fx"; + machine->cpu_model = "970fx"; #else - cpu_model = "G4"; + machine->cpu_model = "G4"; #endif + } for (i = 0; i < smp_cpus; i++) { - cpu = cpu_ppc_init(cpu_model); + cpu = cpu_ppc_init(machine->cpu_model); if (cpu == NULL) { fprintf(stderr, "Unable to find PowerPC CPU definition\n"); exit(1); diff --git a/hw/ppc/mac_oldworld.c b/hw/ppc/mac_oldworld.c index 99879dd2d5..06fdbaf588 100644 --- a/hw/ppc/mac_oldworld.c +++ b/hw/ppc/mac_oldworld.c @@ -75,7 +75,6 @@ static void ppc_heathrow_reset(void *opaque) static void ppc_heathrow_init(MachineState *machine) { ram_addr_t ram_size = machine->ram_size; - const char *cpu_model = machine->cpu_model; const char *kernel_filename = machine->kernel_filename; const char *kernel_cmdline = machine->kernel_cmdline; const char *initrd_filename = machine->initrd_filename; @@ -107,10 +106,10 @@ static void ppc_heathrow_init(MachineState *machine) linux_boot = (kernel_filename != NULL); /* init CPUs */ - if (cpu_model == NULL) - cpu_model = "G3"; + if (machine->cpu_model == NULL) + machine->cpu_model = "G3"; for (i = 0; i < smp_cpus; i++) { - cpu = cpu_ppc_init(cpu_model); + cpu = cpu_ppc_init(machine->cpu_model); if (cpu == NULL) { fprintf(stderr, "Unable to find PowerPC CPU definition\n"); exit(1); diff --git a/hw/ppc/ppc440_bamboo.c b/hw/ppc/ppc440_bamboo.c index 778970aa9b..032fa803db 100644 --- a/hw/ppc/ppc440_bamboo.c +++ b/hw/ppc/ppc440_bamboo.c @@ -159,7 +159,6 @@ static void main_cpu_reset(void *opaque) static void bamboo_init(MachineState *machine) { ram_addr_t ram_size = machine->ram_size; - const char *cpu_model = machine->cpu_model; const char *kernel_filename = machine->kernel_filename; const char *kernel_cmdline = machine->kernel_cmdline; const char *initrd_filename = machine->initrd_filename; @@ -184,10 +183,10 @@ static void bamboo_init(MachineState *machine) int i; /* Setup CPU. */ - if (cpu_model == NULL) { - cpu_model = "440EP"; + if (machine->cpu_model == NULL) { + machine->cpu_model = "440EP"; } - cpu = cpu_ppc_init(cpu_model); + cpu = cpu_ppc_init(machine->cpu_model); if (cpu == NULL) { fprintf(stderr, "Unable to initialize CPU!\n"); exit(1); diff --git a/hw/ppc/prep.c b/hw/ppc/prep.c index 998ee2d16b..45b5f62d6e 100644 --- a/hw/ppc/prep.c +++ b/hw/ppc/prep.c @@ -506,7 +506,6 @@ static int PPC_NVRAM_set_params (Nvram *nvram, uint16_t NVRAM_size, static void ppc_prep_init(MachineState *machine) { ram_addr_t ram_size = machine->ram_size; - const char *cpu_model = machine->cpu_model; const char *kernel_filename = machine->kernel_filename; const char *kernel_cmdline = machine->kernel_cmdline; const char *initrd_filename = machine->initrd_filename; @@ -536,10 +535,10 @@ static void ppc_prep_init(MachineState *machine) linux_boot = (kernel_filename != NULL); /* init CPUs */ - if (cpu_model == NULL) - cpu_model = "602"; + if (machine->cpu_model == NULL) + machine->cpu_model = "602"; for (i = 0; i < smp_cpus; i++) { - cpu = cpu_ppc_init(cpu_model); + cpu = cpu_ppc_init(machine->cpu_model); if (cpu == NULL) { fprintf(stderr, "Unable to find PowerPC CPU definition\n"); exit(1); diff --git a/hw/ppc/spapr.c b/hw/ppc/spapr.c index f174e5a0f3..a6f19473cf 100644 --- a/hw/ppc/spapr.c +++ b/hw/ppc/spapr.c @@ -34,6 +34,7 @@ #include "sysemu/cpus.h" #include "sysemu/kvm.h" #include "kvm_ppc.h" +#include "migration/migration.h" #include "mmu-hash64.h" #include "qom/cpu.h" @@ -90,25 +91,6 @@ #define HTAB_SIZE(spapr) (1ULL << ((spapr)->htab_shift)) -typedef struct sPAPRMachineState sPAPRMachineState; - -#define TYPE_SPAPR_MACHINE "spapr-machine" -#define SPAPR_MACHINE(obj) \ - OBJECT_CHECK(sPAPRMachineState, (obj), TYPE_SPAPR_MACHINE) - -/** - * sPAPRMachineState: - */ -struct sPAPRMachineState { - /*< private >*/ - MachineState parent_obj; - - /*< public >*/ - char *kvm_type; -}; - -sPAPREnvironment *spapr; - static XICSState *try_create_xics(const char *type, int nr_servers, int nr_irqs, Error **errp) { @@ -184,7 +166,28 @@ static int spapr_fixup_cpu_smt_dt(void *fdt, int offset, PowerPCCPU *cpu, return ret; } -static int spapr_fixup_cpu_dt(void *fdt, sPAPREnvironment *spapr) +static int spapr_fixup_cpu_numa_dt(void *fdt, int offset, CPUState *cs) +{ + int ret = 0; + PowerPCCPU *cpu = POWERPC_CPU(cs); + int index = ppc_get_vcpu_dt_id(cpu); + uint32_t associativity[] = {cpu_to_be32(0x5), + cpu_to_be32(0x0), + cpu_to_be32(0x0), + cpu_to_be32(0x0), + cpu_to_be32(cs->numa_node), + cpu_to_be32(index)}; + + /* Advertise NUMA via ibm,associativity */ + if (nb_numa_nodes > 1) { + ret = fdt_setprop(fdt, offset, "ibm,associativity", associativity, + sizeof(associativity)); + } + + return ret; +} + +static int spapr_fixup_cpu_dt(void *fdt, sPAPRMachineState *spapr) { int ret = 0, offset, cpus_offset; CPUState *cs; @@ -196,12 +199,6 @@ static int spapr_fixup_cpu_dt(void *fdt, sPAPREnvironment *spapr) PowerPCCPU *cpu = POWERPC_CPU(cs); DeviceClass *dc = DEVICE_GET_CLASS(cs); int index = ppc_get_vcpu_dt_id(cpu); - uint32_t associativity[] = {cpu_to_be32(0x5), - cpu_to_be32(0x0), - cpu_to_be32(0x0), - cpu_to_be32(0x0), - cpu_to_be32(cs->numa_node), - cpu_to_be32(index)}; if ((index % smt) != 0) { continue; @@ -225,20 +222,17 @@ static int spapr_fixup_cpu_dt(void *fdt, sPAPREnvironment *spapr) } } - if (nb_numa_nodes > 1) { - ret = fdt_setprop(fdt, offset, "ibm,associativity", associativity, - sizeof(associativity)); - if (ret < 0) { - return ret; - } - } - ret = fdt_setprop(fdt, offset, "ibm,pft-size", pft_size_prop, sizeof(pft_size_prop)); if (ret < 0) { return ret; } + ret = spapr_fixup_cpu_numa_dt(fdt, offset, cs); + if (ret < 0) { + return ret; + } + ret = spapr_fixup_cpu_smt_dt(fdt, offset, cpu, ppc_get_compat_smt_threads(cpu)); if (ret < 0) { @@ -284,15 +278,18 @@ static size_t create_page_sizes_prop(CPUPPCState *env, uint32_t *prop, static hwaddr spapr_node0_size(void) { + MachineState *machine = MACHINE(qdev_get_machine()); + if (nb_numa_nodes) { int i; for (i = 0; i < nb_numa_nodes; ++i) { if (numa_info[i].node_mem) { - return MIN(pow2floor(numa_info[i].node_mem), ram_size); + return MIN(pow2floor(numa_info[i].node_mem), + machine->ram_size); } } } - return ram_size; + return machine->ram_size; } #define _FDT(exp) \ @@ -318,18 +315,13 @@ static void *spapr_create_fdt_skel(hwaddr initrd_base, uint32_t epow_irq) { void *fdt; - CPUState *cs; uint32_t start_prop = cpu_to_be32(initrd_base); uint32_t end_prop = cpu_to_be32(initrd_base + initrd_size); GString *hypertas = g_string_sized_new(256); GString *qemu_hypertas = g_string_sized_new(256); uint32_t refpoints[] = {cpu_to_be32(0x4), cpu_to_be32(0x4)}; - uint32_t interrupt_server_ranges_prop[] = {0, cpu_to_be32(smp_cpus)}; - int smt = kvmppc_smt_threads(); + uint32_t interrupt_server_ranges_prop[] = {0, cpu_to_be32(max_cpus)}; unsigned char vec5[] = {0x0, 0x0, 0x0, 0x0, 0x0, 0x80}; - QemuOpts *opts = qemu_opts_find(qemu_find_opts("smp-opts"), NULL); - unsigned sockets = opts ? qemu_opt_get_number(opts, "sockets", 0) : 0; - uint32_t cpus_per_socket = sockets ? (smp_cpus / sockets) : 1; char *buf; add_str(hypertas, "hcall-pft"); @@ -415,107 +407,6 @@ static void *spapr_create_fdt_skel(hwaddr initrd_base, _FDT((fdt_end_node(fdt))); - /* cpus */ - _FDT((fdt_begin_node(fdt, "cpus"))); - - _FDT((fdt_property_cell(fdt, "#address-cells", 0x1))); - _FDT((fdt_property_cell(fdt, "#size-cells", 0x0))); - - CPU_FOREACH(cs) { - PowerPCCPU *cpu = POWERPC_CPU(cs); - CPUPPCState *env = &cpu->env; - DeviceClass *dc = DEVICE_GET_CLASS(cs); - PowerPCCPUClass *pcc = POWERPC_CPU_GET_CLASS(cs); - int index = ppc_get_vcpu_dt_id(cpu); - char *nodename; - uint32_t segs[] = {cpu_to_be32(28), cpu_to_be32(40), - 0xffffffff, 0xffffffff}; - uint32_t tbfreq = kvm_enabled() ? kvmppc_get_tbfreq() : TIMEBASE_FREQ; - uint32_t cpufreq = kvm_enabled() ? kvmppc_get_clockfreq() : 1000000000; - uint32_t page_sizes_prop[64]; - size_t page_sizes_prop_size; - - if ((index % smt) != 0) { - continue; - } - - nodename = g_strdup_printf("%s@%x", dc->fw_name, index); - - _FDT((fdt_begin_node(fdt, nodename))); - - g_free(nodename); - - _FDT((fdt_property_cell(fdt, "reg", index))); - _FDT((fdt_property_string(fdt, "device_type", "cpu"))); - - _FDT((fdt_property_cell(fdt, "cpu-version", env->spr[SPR_PVR]))); - _FDT((fdt_property_cell(fdt, "d-cache-block-size", - env->dcache_line_size))); - _FDT((fdt_property_cell(fdt, "d-cache-line-size", - env->dcache_line_size))); - _FDT((fdt_property_cell(fdt, "i-cache-block-size", - env->icache_line_size))); - _FDT((fdt_property_cell(fdt, "i-cache-line-size", - env->icache_line_size))); - - if (pcc->l1_dcache_size) { - _FDT((fdt_property_cell(fdt, "d-cache-size", pcc->l1_dcache_size))); - } else { - fprintf(stderr, "Warning: Unknown L1 dcache size for cpu\n"); - } - if (pcc->l1_icache_size) { - _FDT((fdt_property_cell(fdt, "i-cache-size", pcc->l1_icache_size))); - } else { - fprintf(stderr, "Warning: Unknown L1 icache size for cpu\n"); - } - - _FDT((fdt_property_cell(fdt, "timebase-frequency", tbfreq))); - _FDT((fdt_property_cell(fdt, "clock-frequency", cpufreq))); - _FDT((fdt_property_cell(fdt, "ibm,slb-size", env->slb_nr))); - _FDT((fdt_property_string(fdt, "status", "okay"))); - _FDT((fdt_property(fdt, "64-bit", NULL, 0))); - - if (env->spr_cb[SPR_PURR].oea_read) { - _FDT((fdt_property(fdt, "ibm,purr", NULL, 0))); - } - - if (env->mmu_model & POWERPC_MMU_1TSEG) { - _FDT((fdt_property(fdt, "ibm,processor-segment-sizes", - segs, sizeof(segs)))); - } - - /* Advertise VMX/VSX (vector extensions) if available - * 0 / no property == no vector extensions - * 1 == VMX / Altivec available - * 2 == VSX available */ - if (env->insns_flags & PPC_ALTIVEC) { - uint32_t vmx = (env->insns_flags2 & PPC2_VSX) ? 2 : 1; - - _FDT((fdt_property_cell(fdt, "ibm,vmx", vmx))); - } - - /* Advertise DFP (Decimal Floating Point) if available - * 0 / no property == no DFP - * 1 == DFP available */ - if (env->insns_flags2 & PPC2_DFP) { - _FDT((fdt_property_cell(fdt, "ibm,dfp", 1))); - } - - page_sizes_prop_size = create_page_sizes_prop(env, page_sizes_prop, - sizeof(page_sizes_prop)); - if (page_sizes_prop_size) { - _FDT((fdt_property(fdt, "ibm,segment-page-sizes", - page_sizes_prop, page_sizes_prop_size))); - } - - _FDT((fdt_property_cell(fdt, "ibm,chip-id", - cs->cpu_index / cpus_per_socket))); - - _FDT((fdt_end_node(fdt))); - } - - _FDT((fdt_end_node(fdt))); - /* RTAS */ _FDT((fdt_begin_node(fdt, "rtas"))); @@ -604,7 +495,8 @@ static void *spapr_create_fdt_skel(hwaddr initrd_base, return fdt; } -int spapr_h_cas_compose_response(target_ulong addr, target_ulong size) +int spapr_h_cas_compose_response(sPAPRMachineState *spapr, + target_ulong addr, target_ulong size) { void *fdt, *fdt_skel; sPAPRDeviceTreeUpdateHeader hdr = { .version_id = 1 }; @@ -665,8 +557,9 @@ static void spapr_populate_memory_node(void *fdt, int nodeid, hwaddr start, sizeof(associativity)))); } -static int spapr_populate_memory(sPAPREnvironment *spapr, void *fdt) +static int spapr_populate_memory(sPAPRMachineState *spapr, void *fdt) { + MachineState *machine = MACHINE(spapr); hwaddr mem_start, node_size; int i, nb_nodes = nb_numa_nodes; NodeInfo *nodes = numa_info; @@ -675,7 +568,7 @@ static int spapr_populate_memory(sPAPREnvironment *spapr, void *fdt) /* No NUMA nodes, assume there is just one node with whole RAM */ if (!nb_numa_nodes) { nb_nodes = 1; - ramnode.node_mem = ram_size; + ramnode.node_mem = machine->ram_size; nodes = &ramnode; } @@ -683,12 +576,12 @@ static int spapr_populate_memory(sPAPREnvironment *spapr, void *fdt) if (!nodes[i].node_mem) { continue; } - if (mem_start >= ram_size) { + if (mem_start >= machine->ram_size) { node_size = 0; } else { node_size = nodes[i].node_mem; - if (node_size > ram_size - mem_start) { - node_size = ram_size - mem_start; + if (node_size > machine->ram_size - mem_start) { + node_size = machine->ram_size - mem_start; } } if (!mem_start) { @@ -714,7 +607,138 @@ static int spapr_populate_memory(sPAPREnvironment *spapr, void *fdt) return 0; } -static void spapr_finalize_fdt(sPAPREnvironment *spapr, +static void spapr_populate_cpu_dt(CPUState *cs, void *fdt, int offset, + sPAPRMachineState *spapr) +{ + PowerPCCPU *cpu = POWERPC_CPU(cs); + CPUPPCState *env = &cpu->env; + PowerPCCPUClass *pcc = POWERPC_CPU_GET_CLASS(cs); + int index = ppc_get_vcpu_dt_id(cpu); + uint32_t segs[] = {cpu_to_be32(28), cpu_to_be32(40), + 0xffffffff, 0xffffffff}; + uint32_t tbfreq = kvm_enabled() ? kvmppc_get_tbfreq() : TIMEBASE_FREQ; + uint32_t cpufreq = kvm_enabled() ? kvmppc_get_clockfreq() : 1000000000; + uint32_t page_sizes_prop[64]; + size_t page_sizes_prop_size; + QemuOpts *opts = qemu_opts_find(qemu_find_opts("smp-opts"), NULL); + unsigned sockets = opts ? qemu_opt_get_number(opts, "sockets", 0) : 0; + uint32_t cpus_per_socket = sockets ? (smp_cpus / sockets) : 1; + uint32_t pft_size_prop[] = {0, cpu_to_be32(spapr->htab_shift)}; + + _FDT((fdt_setprop_cell(fdt, offset, "reg", index))); + _FDT((fdt_setprop_string(fdt, offset, "device_type", "cpu"))); + + _FDT((fdt_setprop_cell(fdt, offset, "cpu-version", env->spr[SPR_PVR]))); + _FDT((fdt_setprop_cell(fdt, offset, "d-cache-block-size", + env->dcache_line_size))); + _FDT((fdt_setprop_cell(fdt, offset, "d-cache-line-size", + env->dcache_line_size))); + _FDT((fdt_setprop_cell(fdt, offset, "i-cache-block-size", + env->icache_line_size))); + _FDT((fdt_setprop_cell(fdt, offset, "i-cache-line-size", + env->icache_line_size))); + + if (pcc->l1_dcache_size) { + _FDT((fdt_setprop_cell(fdt, offset, "d-cache-size", + pcc->l1_dcache_size))); + } else { + fprintf(stderr, "Warning: Unknown L1 dcache size for cpu\n"); + } + if (pcc->l1_icache_size) { + _FDT((fdt_setprop_cell(fdt, offset, "i-cache-size", + pcc->l1_icache_size))); + } else { + fprintf(stderr, "Warning: Unknown L1 icache size for cpu\n"); + } + + _FDT((fdt_setprop_cell(fdt, offset, "timebase-frequency", tbfreq))); + _FDT((fdt_setprop_cell(fdt, offset, "clock-frequency", cpufreq))); + _FDT((fdt_setprop_cell(fdt, offset, "ibm,slb-size", env->slb_nr))); + _FDT((fdt_setprop_string(fdt, offset, "status", "okay"))); + _FDT((fdt_setprop(fdt, offset, "64-bit", NULL, 0))); + + if (env->spr_cb[SPR_PURR].oea_read) { + _FDT((fdt_setprop(fdt, offset, "ibm,purr", NULL, 0))); + } + + if (env->mmu_model & POWERPC_MMU_1TSEG) { + _FDT((fdt_setprop(fdt, offset, "ibm,processor-segment-sizes", + segs, sizeof(segs)))); + } + + /* Advertise VMX/VSX (vector extensions) if available + * 0 / no property == no vector extensions + * 1 == VMX / Altivec available + * 2 == VSX available */ + if (env->insns_flags & PPC_ALTIVEC) { + uint32_t vmx = (env->insns_flags2 & PPC2_VSX) ? 2 : 1; + + _FDT((fdt_setprop_cell(fdt, offset, "ibm,vmx", vmx))); + } + + /* Advertise DFP (Decimal Floating Point) if available + * 0 / no property == no DFP + * 1 == DFP available */ + if (env->insns_flags2 & PPC2_DFP) { + _FDT((fdt_setprop_cell(fdt, offset, "ibm,dfp", 1))); + } + + page_sizes_prop_size = create_page_sizes_prop(env, page_sizes_prop, + sizeof(page_sizes_prop)); + if (page_sizes_prop_size) { + _FDT((fdt_setprop(fdt, offset, "ibm,segment-page-sizes", + page_sizes_prop, page_sizes_prop_size))); + } + + _FDT((fdt_setprop_cell(fdt, offset, "ibm,chip-id", + cs->cpu_index / cpus_per_socket))); + + _FDT((fdt_setprop(fdt, offset, "ibm,pft-size", + pft_size_prop, sizeof(pft_size_prop)))); + + _FDT(spapr_fixup_cpu_numa_dt(fdt, offset, cs)); + + _FDT(spapr_fixup_cpu_smt_dt(fdt, offset, cpu, + ppc_get_compat_smt_threads(cpu))); +} + +static void spapr_populate_cpus_dt_node(void *fdt, sPAPRMachineState *spapr) +{ + CPUState *cs; + int cpus_offset; + char *nodename; + int smt = kvmppc_smt_threads(); + + cpus_offset = fdt_add_subnode(fdt, 0, "cpus"); + _FDT(cpus_offset); + _FDT((fdt_setprop_cell(fdt, cpus_offset, "#address-cells", 0x1))); + _FDT((fdt_setprop_cell(fdt, cpus_offset, "#size-cells", 0x0))); + + /* + * We walk the CPUs in reverse order to ensure that CPU DT nodes + * created by fdt_add_subnode() end up in the right order in FDT + * for the guest kernel the enumerate the CPUs correctly. + */ + CPU_FOREACH_REVERSE(cs) { + PowerPCCPU *cpu = POWERPC_CPU(cs); + int index = ppc_get_vcpu_dt_id(cpu); + DeviceClass *dc = DEVICE_GET_CLASS(cs); + int offset; + + if ((index % smt) != 0) { + continue; + } + + nodename = g_strdup_printf("%s@%x", dc->fw_name, index); + offset = fdt_add_subnode(fdt, cpus_offset, nodename); + g_free(nodename); + _FDT(offset); + spapr_populate_cpu_dt(cs, fdt, offset, spapr); + } + +} + +static void spapr_finalize_fdt(sPAPRMachineState *spapr, hwaddr fdt_addr, hwaddr rtas_addr, hwaddr rtas_size) @@ -759,11 +783,8 @@ static void spapr_finalize_fdt(sPAPREnvironment *spapr, fprintf(stderr, "Couldn't set up RTAS device tree properties\n"); } - /* Advertise NUMA via ibm,associativity */ - ret = spapr_fixup_cpu_dt(fdt, spapr); - if (ret < 0) { - fprintf(stderr, "Couldn't finalize CPU device tree properties\n"); - } + /* cpus */ + spapr_populate_cpus_dt_node(fdt, spapr); bootlist = get_boot_devices_list(&cb, true); if (cb && bootlist) { @@ -830,7 +851,7 @@ static void emulate_spapr_hypercall(PowerPCCPU *cpu) #define CLEAN_HPTE(_hpte) ((*(uint64_t *)(_hpte)) &= tswap64(~HPTE64_V_HPTE_DIRTY)) #define DIRTY_HPTE(_hpte) ((*(uint64_t *)(_hpte)) |= tswap64(HPTE64_V_HPTE_DIRTY)) -static void spapr_reset_htab(sPAPREnvironment *spapr) +static void spapr_reset_htab(sPAPRMachineState *spapr) { long shift; int index; @@ -892,7 +913,7 @@ static int find_unknown_sysbus_device(SysBusDevice *sbdev, void *opaque) * A guest reset will cause spapr->htab_fd to become stale if being used. * Reopen the file descriptor to make sure the whole HTAB is properly read. */ -static int spapr_check_htab_fd(sPAPREnvironment *spapr) +static int spapr_check_htab_fd(sPAPRMachineState *spapr) { int rc = 0; @@ -912,6 +933,7 @@ static int spapr_check_htab_fd(sPAPREnvironment *spapr) static void ppc_spapr_reset(void) { + sPAPRMachineState *spapr = SPAPR_MACHINE(qdev_get_machine()); PowerPCCPU *first_ppc_cpu; uint32_t rtas_limit; @@ -945,12 +967,13 @@ static void ppc_spapr_reset(void) first_ppc_cpu->env.gpr[3] = spapr->fdt_addr; first_ppc_cpu->env.gpr[5] = 0; first_cpu->halted = 0; - first_ppc_cpu->env.nip = spapr->entry_point; + first_ppc_cpu->env.nip = SPAPR_ENTRY_POINT; } static void spapr_cpu_reset(void *opaque) { + sPAPRMachineState *spapr = SPAPR_MACHINE(qdev_get_machine()); PowerPCCPU *cpu = opaque; CPUState *cs = CPU(cpu); CPUPPCState *env = &cpu->env; @@ -979,12 +1002,12 @@ static void spapr_cpu_reset(void *opaque) * We have 8 hpte per group, and each hpte is 16 bytes. * ie have 128 bytes per hpte entry. */ - env->htab_mask = (1ULL << ((spapr)->htab_shift - 7)) - 1; + env->htab_mask = (1ULL << (spapr->htab_shift - 7)) - 1; env->spr[SPR_SDR1] = (target_ulong)(uintptr_t)spapr->htab | (spapr->htab_shift - 18); } -static void spapr_create_nvram(sPAPREnvironment *spapr) +static void spapr_create_nvram(sPAPRMachineState *spapr) { DeviceState *dev = qdev_create(&spapr->vio_bus->bus, "spapr-nvram"); DriveInfo *dinfo = drive_get(IF_PFLASH, 0, 0); @@ -998,7 +1021,7 @@ static void spapr_create_nvram(sPAPREnvironment *spapr) spapr->nvram = (struct sPAPRNVRAM *)dev; } -static void spapr_rtc_create(sPAPREnvironment *spapr) +static void spapr_rtc_create(sPAPRMachineState *spapr) { DeviceState *dev = qdev_create(NULL, TYPE_SPAPR_RTC); @@ -1028,7 +1051,7 @@ static int spapr_vga_init(PCIBus *pci_bus) static int spapr_post_load(void *opaque, int version_id) { - sPAPREnvironment *spapr = (sPAPREnvironment *)opaque; + sPAPRMachineState *spapr = (sPAPRMachineState *)opaque; int err = 0; /* In earlier versions, there was no separate qdev for the PAPR @@ -1057,16 +1080,16 @@ static const VMStateDescription vmstate_spapr = { VMSTATE_UNUSED_BUFFER(version_before_3, 0, 4), /* RTC offset */ - VMSTATE_UINT64_TEST(rtc_offset, sPAPREnvironment, version_before_3), + VMSTATE_UINT64_TEST(rtc_offset, sPAPRMachineState, version_before_3), - VMSTATE_PPC_TIMEBASE_V(tb, sPAPREnvironment, 2), + VMSTATE_PPC_TIMEBASE_V(tb, sPAPRMachineState, 2), VMSTATE_END_OF_LIST() }, }; static int htab_save_setup(QEMUFile *f, void *opaque) { - sPAPREnvironment *spapr = opaque; + sPAPRMachineState *spapr = opaque; /* "Iteration" header */ qemu_put_be32(f, spapr->htab_shift); @@ -1090,7 +1113,7 @@ static int htab_save_setup(QEMUFile *f, void *opaque) return 0; } -static void htab_save_first_pass(QEMUFile *f, sPAPREnvironment *spapr, +static void htab_save_first_pass(QEMUFile *f, sPAPRMachineState *spapr, int64_t max_ns) { int htabslots = HTAB_SIZE(spapr) / HASH_PTE_SIZE_64; @@ -1140,7 +1163,7 @@ static void htab_save_first_pass(QEMUFile *f, sPAPREnvironment *spapr, spapr->htab_save_index = index; } -static int htab_save_later_pass(QEMUFile *f, sPAPREnvironment *spapr, +static int htab_save_later_pass(QEMUFile *f, sPAPRMachineState *spapr, int64_t max_ns) { bool final = max_ns < 0; @@ -1222,7 +1245,7 @@ static int htab_save_later_pass(QEMUFile *f, sPAPREnvironment *spapr, static int htab_save_iterate(QEMUFile *f, void *opaque) { - sPAPREnvironment *spapr = opaque; + sPAPRMachineState *spapr = opaque; int rc = 0; /* Iteration header */ @@ -1257,7 +1280,7 @@ static int htab_save_iterate(QEMUFile *f, void *opaque) static int htab_save_complete(QEMUFile *f, void *opaque) { - sPAPREnvironment *spapr = opaque; + sPAPRMachineState *spapr = opaque; /* Iteration header */ qemu_put_be32(f, 0); @@ -1292,7 +1315,7 @@ static int htab_save_complete(QEMUFile *f, void *opaque) static int htab_load(QEMUFile *f, void *opaque, int version_id) { - sPAPREnvironment *spapr = opaque; + sPAPRMachineState *spapr = opaque; uint32_t section_hdr; int fd = -1; @@ -1386,16 +1409,42 @@ static void spapr_boot_set(void *opaque, const char *boot_device, machine->boot_order = g_strdup(boot_device); } +static void spapr_cpu_init(sPAPRMachineState *spapr, PowerPCCPU *cpu) +{ + CPUPPCState *env = &cpu->env; + + /* Set time-base frequency to 512 MHz */ + cpu_ppc_tb_init(env, TIMEBASE_FREQ); + + /* PAPR always has exception vectors in RAM not ROM. To ensure this, + * MSR[IP] should never be set. + */ + env->msr_mask &= ~(1 << 6); + + /* Tell KVM that we're in PAPR mode */ + if (kvm_enabled()) { + kvmppc_set_papr(cpu); + } + + if (cpu->max_compat) { + if (ppc_set_compat(cpu, cpu->max_compat) < 0) { + exit(1); + } + } + + xics_cpu_setup(spapr->icp, cpu); + + qemu_register_reset(spapr_cpu_reset, cpu); +} + /* pSeries LPAR / sPAPR hardware init */ static void ppc_spapr_init(MachineState *machine) { - ram_addr_t ram_size = machine->ram_size; - const char *cpu_model = machine->cpu_model; + sPAPRMachineState *spapr = SPAPR_MACHINE(machine); const char *kernel_filename = machine->kernel_filename; const char *kernel_cmdline = machine->kernel_cmdline; const char *initrd_filename = machine->initrd_filename; PowerPCCPU *cpu; - CPUPPCState *env; PCIHostState *phb; int i; MemoryRegion *sysmem = get_system_memory(); @@ -1412,7 +1461,6 @@ static void ppc_spapr_init(MachineState *machine) msi_supported = true; - spapr = g_malloc0(sizeof(*spapr)); QLIST_INIT(&spapr->phbs); cpu_ppc_hypercall = emulate_spapr_hypercall; @@ -1459,7 +1507,7 @@ static void ppc_spapr_init(MachineState *machine) * more than needed for the Linux guests we support. */ spapr->htab_shift = 18; /* Minimum architected size */ while (spapr->htab_shift <= 46) { - if ((1ULL << (spapr->htab_shift + 7)) >= ram_size) { + if ((1ULL << (spapr->htab_shift + 7)) >= machine->ram_size) { break; } spapr->htab_shift++; @@ -1467,43 +1515,21 @@ static void ppc_spapr_init(MachineState *machine) /* Set up Interrupt Controller before we create the VCPUs */ spapr->icp = xics_system_init(machine, - smp_cpus * kvmppc_smt_threads() / smp_threads, + DIV_ROUND_UP(max_cpus * kvmppc_smt_threads(), + smp_threads), XICS_IRQS); /* init CPUs */ - if (cpu_model == NULL) { - cpu_model = kvm_enabled() ? "host" : "POWER7"; + if (machine->cpu_model == NULL) { + machine->cpu_model = kvm_enabled() ? "host" : "POWER7"; } for (i = 0; i < smp_cpus; i++) { - cpu = cpu_ppc_init(cpu_model); + cpu = cpu_ppc_init(machine->cpu_model); if (cpu == NULL) { fprintf(stderr, "Unable to find PowerPC CPU definition\n"); exit(1); } - env = &cpu->env; - - /* Set time-base frequency to 512 MHz */ - cpu_ppc_tb_init(env, TIMEBASE_FREQ); - - /* PAPR always has exception vectors in RAM not ROM. To ensure this, - * MSR[IP] should never be set. - */ - env->msr_mask &= ~(1 << 6); - - /* Tell KVM that we're in PAPR mode */ - if (kvm_enabled()) { - kvmppc_set_papr(cpu); - } - - if (cpu->max_compat) { - if (ppc_set_compat(cpu, cpu->max_compat) < 0) { - exit(1); - } - } - - xics_cpu_setup(spapr->icp, cpu); - - qemu_register_reset(spapr_cpu_reset, cpu); + spapr_cpu_init(spapr, cpu); } if (kvm_enabled()) { @@ -1512,9 +1538,8 @@ static void ppc_spapr_init(MachineState *machine) } /* allocate RAM */ - spapr->ram_limit = ram_size; memory_region_allocate_system_memory(ram, NULL, "ppc_spapr.ram", - spapr->ram_limit); + machine->ram_size); memory_region_add_subregion(sysmem, 0, ram); if (rma_alloc_size && rma) { @@ -1658,8 +1683,9 @@ static void ppc_spapr_init(MachineState *machine) } g_free(filename); - spapr->entry_point = 0x100; - + /* FIXME: Should register things through the MachineState's qdev + * interface, this is a legacy from the sPAPREnvironment structure + * which predated MachineState but had a similar function */ vmstate_register(NULL, 0, &vmstate_spapr, spapr); register_savevm_live(NULL, "spapr/htab", -1, 1, &savevm_htab_handlers, spapr); @@ -1755,17 +1781,17 @@ static char *spapr_get_fw_dev_path(FWPathProvider *p, BusState *bus, static char *spapr_get_kvm_type(Object *obj, Error **errp) { - sPAPRMachineState *sm = SPAPR_MACHINE(obj); + sPAPRMachineState *spapr = SPAPR_MACHINE(obj); - return g_strdup(sm->kvm_type); + return g_strdup(spapr->kvm_type); } static void spapr_set_kvm_type(Object *obj, const char *value, Error **errp) { - sPAPRMachineState *sm = SPAPR_MACHINE(obj); + sPAPRMachineState *spapr = SPAPR_MACHINE(obj); - g_free(sm->kvm_type); - sm->kvm_type = g_strdup(value); + g_free(spapr->kvm_type); + spapr->kvm_type = g_strdup(value); } static void spapr_machine_initfn(Object *obj) @@ -1820,6 +1846,7 @@ static const TypeInfo spapr_machine_info = { .abstract = true, .instance_size = sizeof(sPAPRMachineState), .instance_init = spapr_machine_initfn, + .class_size = sizeof(sPAPRMachineClass), .class_init = spapr_machine_class_init, .interfaces = (InterfaceInfo[]) { { TYPE_FW_PATH_PROVIDER }, @@ -1851,6 +1878,8 @@ static const TypeInfo spapr_machine_info = { static void spapr_compat_2_3(Object *obj) { + savevm_skip_section_footers(); + global_state_set_optional(); } static void spapr_compat_2_2(Object *obj) diff --git a/hw/ppc/spapr_events.c b/hw/ppc/spapr_events.c index fda9e3590a..f626eb7b34 100644 --- a/hw/ppc/spapr_events.c +++ b/hw/ppc/spapr_events.c @@ -238,6 +238,7 @@ void spapr_events_fdt_skel(void *fdt, uint32_t check_exception_irq) static void rtas_event_log_queue(int log_type, void *data, bool exception) { + sPAPRMachineState *spapr = SPAPR_MACHINE(qdev_get_machine()); sPAPREventLogEntry *entry = g_new(sPAPREventLogEntry, 1); g_assert(data); @@ -250,6 +251,7 @@ static void rtas_event_log_queue(int log_type, void *data, bool exception) static sPAPREventLogEntry *rtas_event_log_dequeue(uint32_t event_mask, bool exception) { + sPAPRMachineState *spapr = SPAPR_MACHINE(qdev_get_machine()); sPAPREventLogEntry *entry = NULL; /* we only queue EPOW events atm. */ @@ -278,6 +280,7 @@ static sPAPREventLogEntry *rtas_event_log_dequeue(uint32_t event_mask, static bool rtas_event_log_contains(uint32_t event_mask, bool exception) { + sPAPRMachineState *spapr = SPAPR_MACHINE(qdev_get_machine()); sPAPREventLogEntry *entry = NULL; /* we only queue EPOW events atm. */ @@ -314,6 +317,7 @@ static void spapr_init_v6hdr(struct rtas_event_log_v6 *v6hdr) static void spapr_init_maina(struct rtas_event_log_v6_maina *maina, int section_count) { + sPAPRMachineState *spapr = SPAPR_MACHINE(qdev_get_machine()); struct tm tm; int year; @@ -336,7 +340,7 @@ static void spapr_init_maina(struct rtas_event_log_v6_maina *maina, static void spapr_powerdown_req(Notifier *n, void *opaque) { - sPAPREnvironment *spapr = container_of(n, sPAPREnvironment, epow_notifier); + sPAPRMachineState *spapr = SPAPR_MACHINE(qdev_get_machine()); struct rtas_error_log *hdr; struct rtas_event_log_v6 *v6hdr; struct rtas_event_log_v6_maina *maina; @@ -384,6 +388,7 @@ static void spapr_powerdown_req(Notifier *n, void *opaque) static void spapr_hotplug_req_event(sPAPRDRConnector *drc, uint8_t hp_action) { + sPAPRMachineState *spapr = SPAPR_MACHINE(qdev_get_machine()); struct hp_log_full *new_hp; struct rtas_error_log *hdr; struct rtas_event_log_v6 *v6hdr; @@ -453,7 +458,7 @@ void spapr_hotplug_req_remove_event(sPAPRDRConnector *drc) spapr_hotplug_req_event(drc, RTAS_LOG_V6_HP_ACTION_REMOVE); } -static void check_exception(PowerPCCPU *cpu, sPAPREnvironment *spapr, +static void check_exception(PowerPCCPU *cpu, sPAPRMachineState *spapr, uint32_t token, uint32_t nargs, target_ulong args, uint32_t nret, target_ulong rets) @@ -508,7 +513,7 @@ out_no_events: rtas_st(rets, 0, RTAS_OUT_NO_ERRORS_FOUND); } -static void event_scan(PowerPCCPU *cpu, sPAPREnvironment *spapr, +static void event_scan(PowerPCCPU *cpu, sPAPRMachineState *spapr, uint32_t token, uint32_t nargs, target_ulong args, uint32_t nret, target_ulong rets) @@ -548,7 +553,7 @@ out_no_events: rtas_st(rets, 0, RTAS_OUT_NO_ERRORS_FOUND); } -void spapr_events_init(sPAPREnvironment *spapr) +void spapr_events_init(sPAPRMachineState *spapr) { QTAILQ_INIT(&spapr->pending_events); spapr->check_exception_irq = xics_alloc(spapr->icp, 0, 0, false); diff --git a/hw/ppc/spapr_hcall.c b/hw/ppc/spapr_hcall.c index 4f76f1cbfe..652ddf6e37 100644 --- a/hw/ppc/spapr_hcall.c +++ b/hw/ppc/spapr_hcall.c @@ -84,9 +84,10 @@ static inline bool valid_pte_index(CPUPPCState *env, target_ulong pte_index) return true; } -static target_ulong h_enter(PowerPCCPU *cpu, sPAPREnvironment *spapr, +static target_ulong h_enter(PowerPCCPU *cpu, sPAPRMachineState *spapr, target_ulong opcode, target_ulong *args) { + MachineState *machine = MACHINE(spapr); CPUPPCState *env = &cpu->env; target_ulong flags = args[0]; target_ulong pte_index = args[1]; @@ -118,7 +119,7 @@ static target_ulong h_enter(PowerPCCPU *cpu, sPAPREnvironment *spapr, raddr = (ptel & HPTE64_R_RPN) & ~((1ULL << page_shift) - 1); - if (raddr < spapr->ram_limit) { + if (raddr < machine->ram_size) { /* Regular RAM - should have WIMG=0010 */ if ((ptel & HPTE64_R_WIMG) != HPTE64_R_M) { return H_PARAMETER; @@ -205,7 +206,7 @@ static RemoveResult remove_hpte(CPUPPCState *env, target_ulong ptex, return REMOVE_SUCCESS; } -static target_ulong h_remove(PowerPCCPU *cpu, sPAPREnvironment *spapr, +static target_ulong h_remove(PowerPCCPU *cpu, sPAPRMachineState *spapr, target_ulong opcode, target_ulong *args) { CPUPPCState *env = &cpu->env; @@ -252,7 +253,7 @@ static target_ulong h_remove(PowerPCCPU *cpu, sPAPREnvironment *spapr, #define H_BULK_REMOVE_MAX_BATCH 4 -static target_ulong h_bulk_remove(PowerPCCPU *cpu, sPAPREnvironment *spapr, +static target_ulong h_bulk_remove(PowerPCCPU *cpu, sPAPRMachineState *spapr, target_ulong opcode, target_ulong *args) { CPUPPCState *env = &cpu->env; @@ -299,7 +300,7 @@ static target_ulong h_bulk_remove(PowerPCCPU *cpu, sPAPREnvironment *spapr, return H_SUCCESS; } -static target_ulong h_protect(PowerPCCPU *cpu, sPAPREnvironment *spapr, +static target_ulong h_protect(PowerPCCPU *cpu, sPAPRMachineState *spapr, target_ulong opcode, target_ulong *args) { CPUPPCState *env = &cpu->env; @@ -337,7 +338,7 @@ static target_ulong h_protect(PowerPCCPU *cpu, sPAPREnvironment *spapr, return H_SUCCESS; } -static target_ulong h_read(PowerPCCPU *cpu, sPAPREnvironment *spapr, +static target_ulong h_read(PowerPCCPU *cpu, sPAPRMachineState *spapr, target_ulong opcode, target_ulong *args) { CPUPPCState *env = &cpu->env; @@ -367,7 +368,7 @@ static target_ulong h_read(PowerPCCPU *cpu, sPAPREnvironment *spapr, return H_SUCCESS; } -static target_ulong h_set_dabr(PowerPCCPU *cpu, sPAPREnvironment *spapr, +static target_ulong h_set_dabr(PowerPCCPU *cpu, sPAPRMachineState *spapr, target_ulong opcode, target_ulong *args) { /* FIXME: actually implement this */ @@ -506,7 +507,7 @@ static target_ulong deregister_dtl(CPUPPCState *env, target_ulong addr) return H_SUCCESS; } -static target_ulong h_register_vpa(PowerPCCPU *cpu, sPAPREnvironment *spapr, +static target_ulong h_register_vpa(PowerPCCPU *cpu, sPAPRMachineState *spapr, target_ulong opcode, target_ulong *args) { target_ulong flags = args[0]; @@ -551,7 +552,7 @@ static target_ulong h_register_vpa(PowerPCCPU *cpu, sPAPREnvironment *spapr, return ret; } -static target_ulong h_cede(PowerPCCPU *cpu, sPAPREnvironment *spapr, +static target_ulong h_cede(PowerPCCPU *cpu, sPAPRMachineState *spapr, target_ulong opcode, target_ulong *args) { CPUPPCState *env = &cpu->env; @@ -567,7 +568,7 @@ static target_ulong h_cede(PowerPCCPU *cpu, sPAPREnvironment *spapr, return H_SUCCESS; } -static target_ulong h_rtas(PowerPCCPU *cpu, sPAPREnvironment *spapr, +static target_ulong h_rtas(PowerPCCPU *cpu, sPAPRMachineState *spapr, target_ulong opcode, target_ulong *args) { target_ulong rtas_r3 = args[0]; @@ -579,7 +580,7 @@ static target_ulong h_rtas(PowerPCCPU *cpu, sPAPREnvironment *spapr, nret, rtas_r3 + 12 + 4*nargs); } -static target_ulong h_logical_load(PowerPCCPU *cpu, sPAPREnvironment *spapr, +static target_ulong h_logical_load(PowerPCCPU *cpu, sPAPRMachineState *spapr, target_ulong opcode, target_ulong *args) { CPUState *cs = CPU(cpu); @@ -603,7 +604,7 @@ static target_ulong h_logical_load(PowerPCCPU *cpu, sPAPREnvironment *spapr, return H_PARAMETER; } -static target_ulong h_logical_store(PowerPCCPU *cpu, sPAPREnvironment *spapr, +static target_ulong h_logical_store(PowerPCCPU *cpu, sPAPRMachineState *spapr, target_ulong opcode, target_ulong *args) { CPUState *cs = CPU(cpu); @@ -629,7 +630,7 @@ static target_ulong h_logical_store(PowerPCCPU *cpu, sPAPREnvironment *spapr, return H_PARAMETER; } -static target_ulong h_logical_memop(PowerPCCPU *cpu, sPAPREnvironment *spapr, +static target_ulong h_logical_memop(PowerPCCPU *cpu, sPAPRMachineState *spapr, target_ulong opcode, target_ulong *args) { CPUState *cs = CPU(cpu); @@ -698,14 +699,14 @@ static target_ulong h_logical_memop(PowerPCCPU *cpu, sPAPREnvironment *spapr, return H_SUCCESS; } -static target_ulong h_logical_icbi(PowerPCCPU *cpu, sPAPREnvironment *spapr, +static target_ulong h_logical_icbi(PowerPCCPU *cpu, sPAPRMachineState *spapr, target_ulong opcode, target_ulong *args) { /* Nothing to do on emulation, KVM will trap this in the kernel */ return H_SUCCESS; } -static target_ulong h_logical_dcbf(PowerPCCPU *cpu, sPAPREnvironment *spapr, +static target_ulong h_logical_dcbf(PowerPCCPU *cpu, sPAPRMachineState *spapr, target_ulong opcode, target_ulong *args) { /* Nothing to do on emulation, KVM will trap this in the kernel */ @@ -788,7 +789,7 @@ static target_ulong h_set_mode_resource_addr_trans_mode(PowerPCCPU *cpu, return H_SUCCESS; } -static target_ulong h_set_mode(PowerPCCPU *cpu, sPAPREnvironment *spapr, +static target_ulong h_set_mode(PowerPCCPU *cpu, sPAPRMachineState *spapr, target_ulong opcode, target_ulong *args) { target_ulong resource = args[1]; @@ -828,7 +829,7 @@ static void do_set_compat(void *arg) ((cpuver) == CPU_POWERPC_LOGICAL_2_07) ? 2070 : 0) static target_ulong h_client_architecture_support(PowerPCCPU *cpu_, - sPAPREnvironment *spapr, + sPAPRMachineState *spapr, target_ulong opcode, target_ulong *args) { @@ -921,7 +922,7 @@ static target_ulong h_client_architecture_support(PowerPCCPU *cpu_, return H_SUCCESS; } - if (spapr_h_cas_compose_response(args[1], args[2])) { + if (spapr_h_cas_compose_response(spapr, args[1], args[2])) { qemu_system_reset_request(); } @@ -952,6 +953,8 @@ void spapr_register_hypercall(target_ulong opcode, spapr_hcall_fn fn) target_ulong spapr_hypercall(PowerPCCPU *cpu, target_ulong opcode, target_ulong *args) { + sPAPRMachineState *spapr = SPAPR_MACHINE(qdev_get_machine()); + if ((opcode <= MAX_HCALL_OPCODE) && ((opcode & 0x3) == 0)) { spapr_hcall_fn fn = papr_hypercall_table[opcode / 4]; diff --git a/hw/ppc/spapr_iommu.c b/hw/ppc/spapr_iommu.c index 8cd9dba9ac..f61504e0c5 100644 --- a/hw/ppc/spapr_iommu.c +++ b/hw/ppc/spapr_iommu.c @@ -60,6 +60,20 @@ sPAPRTCETable *spapr_tce_find_by_liobn(target_ulong liobn) return NULL; } +static IOMMUAccessFlags spapr_tce_iommu_access_flags(uint64_t tce) +{ + switch (tce & SPAPR_TCE_RW) { + case SPAPR_TCE_FAULT: + return IOMMU_NONE; + case SPAPR_TCE_RO: + return IOMMU_RO; + case SPAPR_TCE_WO: + return IOMMU_WO; + default: /* SPAPR_TCE_RW */ + return IOMMU_RW; + } +} + /* Called from RCU critical section */ static IOMMUTLBEntry spapr_tce_translate_iommu(MemoryRegion *iommu, hwaddr addr, bool is_write) @@ -82,7 +96,7 @@ static IOMMUTLBEntry spapr_tce_translate_iommu(MemoryRegion *iommu, hwaddr addr, ret.iova = addr & page_mask; ret.translated_addr = tce & page_mask; ret.addr_mask = ~page_mask; - ret.perm = tce & IOMMU_RW; + ret.perm = spapr_tce_iommu_access_flags(tce); } trace_spapr_iommu_xlate(tcet->liobn, addr, ret.iova, ret.perm, ret.addr_mask); @@ -233,14 +247,14 @@ static target_ulong put_tce_emu(sPAPRTCETable *tcet, target_ulong ioba, entry.iova = ioba & page_mask; entry.translated_addr = tce & page_mask; entry.addr_mask = ~page_mask; - entry.perm = tce & IOMMU_RW; + entry.perm = spapr_tce_iommu_access_flags(tce); memory_region_notify_iommu(&tcet->iommu, entry); return H_SUCCESS; } static target_ulong h_put_tce_indirect(PowerPCCPU *cpu, - sPAPREnvironment *spapr, + sPAPRMachineState *spapr, target_ulong opcode, target_ulong *args) { int i; @@ -267,9 +281,7 @@ static target_ulong h_put_tce_indirect(PowerPCCPU *cpu, ioba &= page_mask; for (i = 0; i < npages; ++i, ioba += page_size) { - target_ulong off = (tce_list & ~SPAPR_TCE_RW) + - i * sizeof(target_ulong); - tce = ldq_be_phys(cs->as, off); + tce = ldq_be_phys(cs->as, tce_list + i * sizeof(target_ulong)); ret = put_tce_emu(tcet, ioba, tce); if (ret) { @@ -287,7 +299,7 @@ static target_ulong h_put_tce_indirect(PowerPCCPU *cpu, return ret; } -static target_ulong h_stuff_tce(PowerPCCPU *cpu, sPAPREnvironment *spapr, +static target_ulong h_stuff_tce(PowerPCCPU *cpu, sPAPRMachineState *spapr, target_ulong opcode, target_ulong *args) { int i; @@ -326,7 +338,7 @@ static target_ulong h_stuff_tce(PowerPCCPU *cpu, sPAPREnvironment *spapr, return ret; } -static target_ulong h_put_tce(PowerPCCPU *cpu, sPAPREnvironment *spapr, +static target_ulong h_put_tce(PowerPCCPU *cpu, sPAPRMachineState *spapr, target_ulong opcode, target_ulong *args) { target_ulong liobn = args[0]; @@ -367,7 +379,7 @@ static target_ulong get_tce_emu(sPAPRTCETable *tcet, target_ulong ioba, return H_SUCCESS; } -static target_ulong h_get_tce(PowerPCCPU *cpu, sPAPREnvironment *spapr, +static target_ulong h_get_tce(PowerPCCPU *cpu, sPAPRMachineState *spapr, target_ulong opcode, target_ulong *args) { target_ulong liobn = args[0]; diff --git a/hw/ppc/spapr_pci.c b/hw/ppc/spapr_pci.c index d4a6150527..a8f79d8003 100644 --- a/hw/ppc/spapr_pci.c +++ b/hw/ppc/spapr_pci.c @@ -23,6 +23,7 @@ * THE SOFTWARE. */ #include "hw/hw.h" +#include "hw/sysbus.h" #include "hw/pci/pci.h" #include "hw/pci/msi.h" #include "hw/pci/msix.h" @@ -35,6 +36,7 @@ #include "qemu/error-report.h" #include "qapi/qmp/qerror.h" +#include "hw/pci/pci_bridge.h" #include "hw/pci/pci_bus.h" #include "hw/ppc/spapr_drc.h" #include "sysemu/device_tree.h" @@ -50,6 +52,8 @@ #define RTAS_TYPE_MSI 1 #define RTAS_TYPE_MSIX 2 +#define FDT_NAME_MAX 128 + #define _FDT(exp) \ do { \ int ret = (exp); \ @@ -58,7 +62,7 @@ } \ } while (0) -sPAPRPHBState *spapr_pci_find_phb(sPAPREnvironment *spapr, uint64_t buid) +sPAPRPHBState *spapr_pci_find_phb(sPAPRMachineState *spapr, uint64_t buid) { sPAPRPHBState *sphb; @@ -72,7 +76,7 @@ sPAPRPHBState *spapr_pci_find_phb(sPAPREnvironment *spapr, uint64_t buid) return NULL; } -PCIDevice *spapr_pci_find_dev(sPAPREnvironment *spapr, uint64_t buid, +PCIDevice *spapr_pci_find_dev(sPAPRMachineState *spapr, uint64_t buid, uint32_t config_addr) { sPAPRPHBState *sphb = spapr_pci_find_phb(spapr, buid); @@ -93,7 +97,7 @@ static uint32_t rtas_pci_cfgaddr(uint32_t arg) return ((arg >> 20) & 0xf00) | (arg & 0xff); } -static void finish_read_pci_config(sPAPREnvironment *spapr, uint64_t buid, +static void finish_read_pci_config(sPAPRMachineState *spapr, uint64_t buid, uint32_t addr, uint32_t size, target_ulong rets) { @@ -123,7 +127,7 @@ static void finish_read_pci_config(sPAPREnvironment *spapr, uint64_t buid, rtas_st(rets, 1, val); } -static void rtas_ibm_read_pci_config(PowerPCCPU *cpu, sPAPREnvironment *spapr, +static void rtas_ibm_read_pci_config(PowerPCCPU *cpu, sPAPRMachineState *spapr, uint32_t token, uint32_t nargs, target_ulong args, uint32_t nret, target_ulong rets) @@ -143,7 +147,7 @@ static void rtas_ibm_read_pci_config(PowerPCCPU *cpu, sPAPREnvironment *spapr, finish_read_pci_config(spapr, buid, addr, size, rets); } -static void rtas_read_pci_config(PowerPCCPU *cpu, sPAPREnvironment *spapr, +static void rtas_read_pci_config(PowerPCCPU *cpu, sPAPRMachineState *spapr, uint32_t token, uint32_t nargs, target_ulong args, uint32_t nret, target_ulong rets) @@ -161,7 +165,7 @@ static void rtas_read_pci_config(PowerPCCPU *cpu, sPAPREnvironment *spapr, finish_read_pci_config(spapr, 0, addr, size, rets); } -static void finish_write_pci_config(sPAPREnvironment *spapr, uint64_t buid, +static void finish_write_pci_config(sPAPRMachineState *spapr, uint64_t buid, uint32_t addr, uint32_t size, uint32_t val, target_ulong rets) { @@ -189,7 +193,7 @@ static void finish_write_pci_config(sPAPREnvironment *spapr, uint64_t buid, rtas_st(rets, 0, RTAS_OUT_SUCCESS); } -static void rtas_ibm_write_pci_config(PowerPCCPU *cpu, sPAPREnvironment *spapr, +static void rtas_ibm_write_pci_config(PowerPCCPU *cpu, sPAPRMachineState *spapr, uint32_t token, uint32_t nargs, target_ulong args, uint32_t nret, target_ulong rets) @@ -210,7 +214,7 @@ static void rtas_ibm_write_pci_config(PowerPCCPU *cpu, sPAPREnvironment *spapr, finish_write_pci_config(spapr, buid, addr, size, val, rets); } -static void rtas_write_pci_config(PowerPCCPU *cpu, sPAPREnvironment *spapr, +static void rtas_write_pci_config(PowerPCCPU *cpu, sPAPRMachineState *spapr, uint32_t token, uint32_t nargs, target_ulong args, uint32_t nret, target_ulong rets) @@ -259,7 +263,7 @@ static void spapr_msi_setmsg(PCIDevice *pdev, hwaddr addr, bool msix, } } -static void rtas_ibm_change_msi(PowerPCCPU *cpu, sPAPREnvironment *spapr, +static void rtas_ibm_change_msi(PowerPCCPU *cpu, sPAPRMachineState *spapr, uint32_t token, uint32_t nargs, target_ulong args, uint32_t nret, target_ulong rets) @@ -377,7 +381,7 @@ out: } static void rtas_ibm_query_interrupt_source_number(PowerPCCPU *cpu, - sPAPREnvironment *spapr, + sPAPRMachineState *spapr, uint32_t token, uint32_t nargs, target_ulong args, @@ -418,13 +422,14 @@ static void rtas_ibm_query_interrupt_source_number(PowerPCCPU *cpu, } static void rtas_ibm_set_eeh_option(PowerPCCPU *cpu, - sPAPREnvironment *spapr, + sPAPRMachineState *spapr, uint32_t token, uint32_t nargs, target_ulong args, uint32_t nret, target_ulong rets) { sPAPRPHBState *sphb; sPAPRPHBClass *spc; + PCIDevice *pdev; uint32_t addr, option; uint64_t buid; int ret; @@ -442,6 +447,12 @@ static void rtas_ibm_set_eeh_option(PowerPCCPU *cpu, goto param_error_exit; } + pdev = pci_find_device(PCI_HOST_BRIDGE(sphb)->bus, + (addr >> 16) & 0xFF, (addr >> 8) & 0xFF); + if (!pdev || !object_dynamic_cast(OBJECT(pdev), "vfio-pci")) { + goto param_error_exit; + } + spc = SPAPR_PCI_HOST_BRIDGE_GET_CLASS(sphb); if (!spc->eeh_set_option) { goto param_error_exit; @@ -456,7 +467,7 @@ param_error_exit: } static void rtas_ibm_get_config_addr_info2(PowerPCCPU *cpu, - sPAPREnvironment *spapr, + sPAPRMachineState *spapr, uint32_t token, uint32_t nargs, target_ulong args, uint32_t nret, target_ulong rets) @@ -512,7 +523,7 @@ param_error_exit: } static void rtas_ibm_read_slot_reset_state2(PowerPCCPU *cpu, - sPAPREnvironment *spapr, + sPAPRMachineState *spapr, uint32_t token, uint32_t nargs, target_ulong args, uint32_t nret, target_ulong rets) @@ -556,7 +567,7 @@ param_error_exit: } static void rtas_ibm_set_slot_reset(PowerPCCPU *cpu, - sPAPREnvironment *spapr, + sPAPRMachineState *spapr, uint32_t token, uint32_t nargs, target_ulong args, uint32_t nret, target_ulong rets) @@ -592,7 +603,7 @@ param_error_exit: } static void rtas_ibm_configure_pe(PowerPCCPU *cpu, - sPAPREnvironment *spapr, + sPAPRMachineState *spapr, uint32_t token, uint32_t nargs, target_ulong args, uint32_t nret, target_ulong rets) @@ -627,7 +638,7 @@ param_error_exit: /* To support it later */ static void rtas_ibm_slot_error_detail(PowerPCCPU *cpu, - sPAPREnvironment *spapr, + sPAPRMachineState *spapr, uint32_t token, uint32_t nargs, target_ulong args, uint32_t nret, target_ulong rets) @@ -718,6 +729,7 @@ static PCIINTxRoute spapr_route_intx_pin_to_irq(void *opaque, int pin) static void spapr_msi_write(void *opaque, hwaddr addr, uint64_t data, unsigned size) { + sPAPRMachineState *spapr = SPAPR_MACHINE(qdev_get_machine()); uint32_t irq = data; trace_spapr_pci_msi_write(addr, data, irq); @@ -742,6 +754,60 @@ static AddressSpace *spapr_pci_dma_iommu(PCIBus *bus, void *opaque, int devfn) return &phb->iommu_as; } +static char *spapr_phb_vfio_get_loc_code(sPAPRPHBState *sphb, PCIDevice *pdev) +{ + char *path = NULL, *buf = NULL, *host = NULL; + + /* Get the PCI VFIO host id */ + host = object_property_get_str(OBJECT(pdev), "host", NULL); + if (!host) { + goto err_out; + } + + /* Construct the path of the file that will give us the DT location */ + path = g_strdup_printf("/sys/bus/pci/devices/%s/devspec", host); + g_free(host); + if (!path || !g_file_get_contents(path, &buf, NULL, NULL)) { + goto err_out; + } + g_free(path); + + /* Construct and read from host device tree the loc-code */ + path = g_strdup_printf("/proc/device-tree%s/ibm,loc-code", buf); + g_free(buf); + if (!path || !g_file_get_contents(path, &buf, NULL, NULL)) { + goto err_out; + } + return buf; + +err_out: + g_free(path); + return NULL; +} + +static char *spapr_phb_get_loc_code(sPAPRPHBState *sphb, PCIDevice *pdev) +{ + char *buf; + const char *devtype = "qemu"; + uint32_t busnr = pci_bus_num(PCI_BUS(qdev_get_parent_bus(DEVICE(pdev)))); + + if (object_dynamic_cast(OBJECT(pdev), "vfio-pci")) { + buf = spapr_phb_vfio_get_loc_code(sphb, pdev); + if (buf) { + return buf; + } + devtype = "vfio"; + } + /* + * For emulated devices and VFIO-failure case, make up + * the loc-code. + */ + buf = g_strdup_printf("%s_%s:%04x:%02x:%02x.%x", + devtype, pdev->name, sphb->index, busnr, + PCI_SLOT(pdev->devfn), PCI_FUNC(pdev->devfn)); + return buf; +} + /* Macros to operate with address in OF binding to PCI */ #define b_x(x, p, l) (((x) & ((1<<(l))-1)) << (p)) #define b_n(x) b_x((x), 31, 1) /* 0 if relocatable */ @@ -786,7 +852,13 @@ typedef struct ResourceProps { * phys.hi = 0xYYXXXXZZ, where: * 0xYY = npt000ss * ||| | - * ||| +-- space code: 1 if IO region, 2 if MEM region + * ||| +-- space code + * ||| | + * ||| + 00 if configuration space + * ||| + 01 if IO region, + * ||| + 10 if 32-bit MEM region + * ||| + 11 if 64-bit MEM region + * ||| * ||+------ for non-relocatable IO: 1 if aliased * || for relocatable IO: 1 if below 64KB * || for MEM: 1 if below 1MB @@ -846,6 +918,8 @@ static void populate_resource_props(PCIDevice *d, ResourceProps *rp) reg->phys_hi = cpu_to_be32(dev_id | b_rrrrrrrr(pci_bar(d, i))); if (d->io_regions[i].type & PCI_BASE_ADDRESS_SPACE_IO) { reg->phys_hi |= cpu_to_be32(b_ss(1)); + } else if (d->io_regions[i].type & PCI_BASE_ADDRESS_MEM_TYPE_64) { + reg->phys_hi |= cpu_to_be32(b_ss(3)); } else { reg->phys_hi |= cpu_to_be32(b_ss(2)); } @@ -870,13 +944,17 @@ static void populate_resource_props(PCIDevice *d, ResourceProps *rp) rp->assigned_len = assigned_idx * sizeof(ResourceFields); } +static uint32_t spapr_phb_get_pci_drc_index(sPAPRPHBState *phb, + PCIDevice *pdev); + static int spapr_populate_pci_child_dt(PCIDevice *dev, void *fdt, int offset, - int phb_index, int drc_index, - const char *drc_name) + sPAPRPHBState *sphb) { ResourceProps rp; bool is_bridge = false; - int pci_status; + int pci_status, err; + char *buf = NULL; + uint32_t drc_index = spapr_phb_get_pci_drc_index(sphb, dev); if (pci_default_read_config(dev, PCI_HEADER_TYPE, 1) == PCI_HEADER_TYPE_BRIDGE) { @@ -891,8 +969,7 @@ static int spapr_populate_pci_child_dt(PCIDevice *dev, void *fdt, int offset, _FDT(fdt_setprop_cell(fdt, offset, "revision-id", pci_default_read_config(dev, PCI_REVISION_ID, 1))); _FDT(fdt_setprop_cell(fdt, offset, "class-code", - pci_default_read_config(dev, PCI_CLASS_DEVICE, 2) - << 8)); + pci_default_read_config(dev, PCI_CLASS_PROG, 3))); if (pci_default_read_config(dev, PCI_INTERRUPT_PIN, 1)) { _FDT(fdt_setprop_cell(fdt, offset, "interrupts", pci_default_read_config(dev, PCI_INTERRUPT_PIN, 1))); @@ -938,8 +1015,21 @@ static int spapr_populate_pci_child_dt(PCIDevice *dev, void *fdt, int offset, * processed by OF beforehand */ _FDT(fdt_setprop_string(fdt, offset, "name", "pci")); - _FDT(fdt_setprop(fdt, offset, "ibm,loc-code", drc_name, strlen(drc_name))); - _FDT(fdt_setprop_cell(fdt, offset, "ibm,my-drc-index", drc_index)); + buf = spapr_phb_get_loc_code(sphb, dev); + if (!buf) { + error_report("Failed setting the ibm,loc-code"); + return -1; + } + + err = fdt_setprop_string(fdt, offset, "ibm,loc-code", buf); + g_free(buf); + if (err < 0) { + return err; + } + + if (drc_index) { + _FDT(fdt_setprop_cell(fdt, offset, "ibm,my-drc-index", drc_index)); + } _FDT(fdt_setprop_cell(fdt, offset, "#address-cells", RESOURCE_CELLS_ADDRESS)); @@ -957,29 +1047,27 @@ static int spapr_populate_pci_child_dt(PCIDevice *dev, void *fdt, int offset, } /* create OF node for pci device and required OF DT properties */ -static void *spapr_create_pci_child_dt(sPAPRPHBState *phb, PCIDevice *dev, - int drc_index, const char *drc_name, - int *dt_offset) +static int spapr_create_pci_child_dt(sPAPRPHBState *phb, PCIDevice *dev, + void *fdt, int node_offset) { - void *fdt; - int offset, ret, fdt_size; + int offset, ret; int slot = PCI_SLOT(dev->devfn); int func = PCI_FUNC(dev->devfn); - char nodename[512]; + char nodename[FDT_NAME_MAX]; - fdt = create_device_tree(&fdt_size); if (func != 0) { - sprintf(nodename, "pci@%d,%d", slot, func); + snprintf(nodename, FDT_NAME_MAX, "pci@%x,%x", slot, func); } else { - sprintf(nodename, "pci@%d", slot); + snprintf(nodename, FDT_NAME_MAX, "pci@%x", slot); } - offset = fdt_add_subnode(fdt, 0, nodename); - ret = spapr_populate_pci_child_dt(dev, fdt, offset, phb->index, drc_index, - drc_name); - g_assert(!ret); + offset = fdt_add_subnode(fdt, node_offset, nodename); + ret = spapr_populate_pci_child_dt(dev, fdt, offset, phb); - *dt_offset = offset; - return fdt; + g_assert(!ret); + if (ret) { + return 0; + } + return offset; } static void spapr_phb_add_pci_device(sPAPRDRConnector *drc, @@ -989,22 +1077,21 @@ static void spapr_phb_add_pci_device(sPAPRDRConnector *drc, { sPAPRDRConnectorClass *drck = SPAPR_DR_CONNECTOR_GET_CLASS(drc); DeviceState *dev = DEVICE(pdev); - int drc_index = drck->get_index(drc); - const char *drc_name = drck->get_name(drc); void *fdt = NULL; - int fdt_start_offset = 0; + int fdt_start_offset = 0, fdt_size; - /* boot-time devices get their device tree node created by SLOF, but for - * hotplugged devices we need QEMU to generate it so the guest can fetch - * it via RTAS - */ if (dev->hotplugged) { - fdt = spapr_create_pci_child_dt(phb, pdev, drc_index, drc_name, - &fdt_start_offset); + fdt = create_device_tree(&fdt_size); + fdt_start_offset = spapr_create_pci_child_dt(phb, pdev, fdt, 0); + if (!fdt_start_offset) { + error_setg(errp, "Failed to create pci child device tree node"); + goto out; + } } drck->attach(drc, DEVICE(pdev), fdt, fdt_start_offset, !dev->hotplugged, errp); +out: if (*errp) { g_free(fdt); } @@ -1046,6 +1133,20 @@ static sPAPRDRConnector *spapr_phb_get_pci_drc(sPAPRPHBState *phb, pdev->devfn); } +static uint32_t spapr_phb_get_pci_drc_index(sPAPRPHBState *phb, + PCIDevice *pdev) +{ + sPAPRDRConnector *drc = spapr_phb_get_pci_drc(phb, pdev); + sPAPRDRConnectorClass *drck; + + if (!drc) { + return 0; + } + + drck = SPAPR_DR_CONNECTOR_GET_CLASS(drc); + return drck->get_index(drc); +} + static void spapr_phb_hot_plug_child(HotplugHandler *plug_handler, DeviceState *plugged_dev, Error **errp) { @@ -1110,6 +1211,7 @@ static void spapr_phb_hot_unplug_child(HotplugHandler *plug_handler, static void spapr_phb_realize(DeviceState *dev, Error **errp) { + sPAPRMachineState *spapr = SPAPR_MACHINE(qdev_get_machine()); SysBusDevice *s = SYS_BUS_DEVICE(dev); sPAPRPHBState *sphb = SPAPR_PCI_HOST_BRIDGE(s); PCIHostState *phb = PCI_HOST_BRIDGE(s); @@ -1351,34 +1453,28 @@ static const VMStateDescription vmstate_spapr_pci_msi = { }, }; -static void spapr_pci_fill_msi_devs(gpointer key, gpointer value, - gpointer opaque) -{ - sPAPRPHBState *sphb = opaque; - - sphb->msi_devs[sphb->msi_devs_num].key = *(uint32_t *)key; - sphb->msi_devs[sphb->msi_devs_num].value = *(spapr_pci_msi *)value; - sphb->msi_devs_num++; -} - static void spapr_pci_pre_save(void *opaque) { sPAPRPHBState *sphb = opaque; - int msi_devs_num; + GHashTableIter iter; + gpointer key, value; + int i; if (sphb->msi_devs) { g_free(sphb->msi_devs); sphb->msi_devs = NULL; } - sphb->msi_devs_num = 0; - msi_devs_num = g_hash_table_size(sphb->msi); - if (!msi_devs_num) { + sphb->msi_devs_num = g_hash_table_size(sphb->msi); + if (!sphb->msi_devs_num) { return; } - sphb->msi_devs = g_malloc(msi_devs_num * sizeof(spapr_pci_msi_mig)); + sphb->msi_devs = g_malloc(sphb->msi_devs_num * sizeof(spapr_pci_msi_mig)); - g_hash_table_foreach(sphb->msi, spapr_pci_fill_msi_devs, sphb); - assert(sphb->msi_devs_num == msi_devs_num); + g_hash_table_iter_init(&iter, sphb->msi); + for (i = 0; g_hash_table_iter_next(&iter, &key, &value); ++i) { + sphb->msi_devs[i].key = *(uint32_t *) key; + sphb->msi_devs[i].value = *(spapr_pci_msi *) value; + } } static int spapr_pci_post_load(void *opaque, int version_id) @@ -1464,7 +1560,7 @@ static const TypeInfo spapr_phb_info = { } }; -PCIHostState *spapr_create_phb(sPAPREnvironment *spapr, int index) +PCIHostState *spapr_create_phb(sPAPRMachineState *spapr, int index) { DeviceState *dev; @@ -1475,12 +1571,90 @@ PCIHostState *spapr_create_phb(sPAPREnvironment *spapr, int index) return PCI_HOST_BRIDGE(dev); } +typedef struct sPAPRFDT { + void *fdt; + int node_off; + sPAPRPHBState *sphb; +} sPAPRFDT; + +static void spapr_populate_pci_devices_dt(PCIBus *bus, PCIDevice *pdev, + void *opaque) +{ + PCIBus *sec_bus; + sPAPRFDT *p = opaque; + int offset; + sPAPRFDT s_fdt; + + offset = spapr_create_pci_child_dt(p->sphb, pdev, p->fdt, p->node_off); + if (!offset) { + error_report("Failed to create pci child device tree node"); + return; + } + + if ((pci_default_read_config(pdev, PCI_HEADER_TYPE, 1) != + PCI_HEADER_TYPE_BRIDGE)) { + return; + } + + sec_bus = pci_bridge_get_sec_bus(PCI_BRIDGE(pdev)); + if (!sec_bus) { + return; + } + + s_fdt.fdt = p->fdt; + s_fdt.node_off = offset; + s_fdt.sphb = p->sphb; + pci_for_each_device(sec_bus, pci_bus_num(sec_bus), + spapr_populate_pci_devices_dt, + &s_fdt); +} + +static void spapr_phb_pci_enumerate_bridge(PCIBus *bus, PCIDevice *pdev, + void *opaque) +{ + unsigned int *bus_no = opaque; + unsigned int primary = *bus_no; + unsigned int subordinate = 0xff; + PCIBus *sec_bus = NULL; + + if ((pci_default_read_config(pdev, PCI_HEADER_TYPE, 1) != + PCI_HEADER_TYPE_BRIDGE)) { + return; + } + + (*bus_no)++; + pci_default_write_config(pdev, PCI_PRIMARY_BUS, primary, 1); + pci_default_write_config(pdev, PCI_SECONDARY_BUS, *bus_no, 1); + pci_default_write_config(pdev, PCI_SUBORDINATE_BUS, *bus_no, 1); + + sec_bus = pci_bridge_get_sec_bus(PCI_BRIDGE(pdev)); + if (!sec_bus) { + return; + } + + pci_default_write_config(pdev, PCI_SUBORDINATE_BUS, subordinate, 1); + pci_for_each_device(sec_bus, pci_bus_num(sec_bus), + spapr_phb_pci_enumerate_bridge, bus_no); + pci_default_write_config(pdev, PCI_SUBORDINATE_BUS, *bus_no, 1); +} + +static void spapr_phb_pci_enumerate(sPAPRPHBState *phb) +{ + PCIBus *bus = PCI_HOST_BRIDGE(phb)->bus; + unsigned int bus_no = 0; + + pci_for_each_device(bus, pci_bus_num(bus), + spapr_phb_pci_enumerate_bridge, + &bus_no); + +} + int spapr_populate_pci_dt(sPAPRPHBState *phb, uint32_t xics_phandle, void *fdt) { int bus_off, i, j, ret; - char nodename[256]; + char nodename[FDT_NAME_MAX]; uint32_t bus_range[] = { cpu_to_be32(0), cpu_to_be32(0xff) }; const uint64_t mmiosize = memory_region_size(&phb->memwindow); const uint64_t w32max = (1ULL << 32) - SPAPR_PCI_MEM_WIN_BUS_OFFSET; @@ -1514,9 +1688,11 @@ int spapr_populate_pci_dt(sPAPRPHBState *phb, cpu_to_be32(b_ddddd(-1)|b_fff(0)), 0x0, 0x0, cpu_to_be32(-1)}; uint32_t interrupt_map[PCI_SLOT_MAX * PCI_NUM_PINS][7]; sPAPRTCETable *tcet; + PCIBus *bus = PCI_HOST_BRIDGE(phb)->bus; + sPAPRFDT s_fdt; /* Start populating the FDT */ - sprintf(nodename, "pci@%" PRIx64, phb->buid); + snprintf(nodename, FDT_NAME_MAX, "pci@%" PRIx64, phb->buid); bus_off = fdt_add_subnode(fdt, 0, nodename); if (bus_off < 0) { return bus_off; @@ -1563,6 +1739,18 @@ int spapr_populate_pci_dt(sPAPRPHBState *phb, tcet->liobn, tcet->bus_offset, tcet->nb_table << tcet->page_shift); + /* Walk the bridges and program the bus numbers*/ + spapr_phb_pci_enumerate(phb); + _FDT(fdt_setprop_cell(fdt, bus_off, "qemu,phb-enumerated", 0x1)); + + /* Populate tree nodes with PCI devices attached */ + s_fdt.fdt = fdt; + s_fdt.node_off = bus_off; + s_fdt.sphb = phb; + pci_for_each_device(bus, pci_bus_num(bus), + spapr_populate_pci_devices_dt, + &s_fdt); + ret = spapr_drc_populate_dt(fdt, bus_off, OBJECT(phb), SPAPR_DR_CONNECTOR_TYPE_PCI); if (ret) { @@ -1631,6 +1819,7 @@ static int spapr_switch_one_vga(DeviceState *dev, void *opaque) void spapr_pci_switch_vga(bool big_endian) { + sPAPRMachineState *spapr = SPAPR_MACHINE(qdev_get_machine()); sPAPRPHBState *sphb; /* diff --git a/hw/ppc/spapr_pci_vfio.c b/hw/ppc/spapr_pci_vfio.c index 99a1be5113..cca45ed312 100644 --- a/hw/ppc/spapr_pci_vfio.c +++ b/hw/ppc/spapr_pci_vfio.c @@ -19,6 +19,7 @@ #include "hw/ppc/spapr.h" #include "hw/pci-host/spapr.h" +#include "hw/pci/msix.h" #include "linux/vfio.h" #include "hw/vfio/vfio.h" @@ -71,9 +72,26 @@ static void spapr_phb_vfio_finish_realize(sPAPRPHBState *sphb, Error **errp) spapr_tce_get_iommu(tcet)); } +static void spapr_phb_vfio_eeh_reenable(sPAPRPHBVFIOState *svphb) +{ + struct vfio_eeh_pe_op op = { + .argsz = sizeof(op), + .op = VFIO_EEH_PE_ENABLE + }; + + vfio_container_ioctl(&svphb->phb.iommu_as, + svphb->iommugroupid, VFIO_EEH_PE_OP, &op); +} + static void spapr_phb_vfio_reset(DeviceState *qdev) { - /* Do nothing */ + /* + * The PE might be in frozen state. To reenable the EEH + * functionality on it will clean the frozen state, which + * ensures that the contained PCI devices will work properly + * after reboot. + */ + spapr_phb_vfio_eeh_reenable(SPAPR_PCI_VFIO_HOST_BRIDGE(qdev)); } static int spapr_phb_vfio_eeh_set_option(sPAPRPHBState *sphb, @@ -142,6 +160,49 @@ static int spapr_phb_vfio_eeh_get_state(sPAPRPHBState *sphb, int *state) return RTAS_OUT_SUCCESS; } +static void spapr_phb_vfio_eeh_clear_dev_msix(PCIBus *bus, + PCIDevice *pdev, + void *opaque) +{ + /* Check if the device is VFIO PCI device */ + if (!object_dynamic_cast(OBJECT(pdev), "vfio-pci")) { + return; + } + + /* + * The MSIx table will be cleaned out by reset. We need + * disable it so that it can be reenabled properly. Also, + * the cached MSIx table should be cleared as it's not + * reflecting the contents in hardware. + */ + if (msix_enabled(pdev)) { + uint16_t flags; + + flags = pci_host_config_read_common(pdev, + pdev->msix_cap + PCI_MSIX_FLAGS, + pci_config_size(pdev), 2); + flags &= ~PCI_MSIX_FLAGS_ENABLE; + pci_host_config_write_common(pdev, + pdev->msix_cap + PCI_MSIX_FLAGS, + pci_config_size(pdev), flags, 2); + } + + msix_reset(pdev); +} + +static void spapr_phb_vfio_eeh_clear_bus_msix(PCIBus *bus, void *opaque) +{ + pci_for_each_device(bus, pci_bus_num(bus), + spapr_phb_vfio_eeh_clear_dev_msix, NULL); +} + +static void spapr_phb_vfio_eeh_pre_reset(sPAPRPHBState *sphb) +{ + PCIHostState *phb = PCI_HOST_BRIDGE(sphb); + + pci_for_each_bus(phb->bus, spapr_phb_vfio_eeh_clear_bus_msix, NULL); +} + static int spapr_phb_vfio_eeh_reset(sPAPRPHBState *sphb, int option) { sPAPRPHBVFIOState *svphb = SPAPR_PCI_VFIO_HOST_BRIDGE(sphb); @@ -153,9 +214,11 @@ static int spapr_phb_vfio_eeh_reset(sPAPRPHBState *sphb, int option) op.op = VFIO_EEH_PE_RESET_DEACTIVATE; break; case RTAS_SLOT_RESET_HOT: + spapr_phb_vfio_eeh_pre_reset(sphb); op.op = VFIO_EEH_PE_RESET_HOT; break; case RTAS_SLOT_RESET_FUNDAMENTAL: + spapr_phb_vfio_eeh_pre_reset(sphb); op.op = VFIO_EEH_PE_RESET_FUNDAMENTAL; break; default: diff --git a/hw/ppc/spapr_rtas.c b/hw/ppc/spapr_rtas.c index fa28d43f81..2986f94f03 100644 --- a/hw/ppc/spapr_rtas.c +++ b/hw/ppc/spapr_rtas.c @@ -29,6 +29,7 @@ #include "sysemu/char.h" #include "hw/qdev.h" #include "sysemu/device_tree.h" +#include "sysemu/cpus.h" #include "hw/ppc/spapr.h" #include "hw/ppc/spapr_vio.h" @@ -47,7 +48,7 @@ do { } while (0) #endif -static sPAPRConfigureConnectorState *spapr_ccs_find(sPAPREnvironment *spapr, +static sPAPRConfigureConnectorState *spapr_ccs_find(sPAPRMachineState *spapr, uint32_t drc_index) { sPAPRConfigureConnectorState *ccs = NULL; @@ -61,14 +62,14 @@ static sPAPRConfigureConnectorState *spapr_ccs_find(sPAPREnvironment *spapr, return ccs; } -static void spapr_ccs_add(sPAPREnvironment *spapr, +static void spapr_ccs_add(sPAPRMachineState *spapr, sPAPRConfigureConnectorState *ccs) { g_assert(!spapr_ccs_find(spapr, ccs->drc_index)); QTAILQ_INSERT_HEAD(&spapr->ccs_list, ccs, next); } -static void spapr_ccs_remove(sPAPREnvironment *spapr, +static void spapr_ccs_remove(sPAPRMachineState *spapr, sPAPRConfigureConnectorState *ccs) { QTAILQ_REMOVE(&spapr->ccs_list, ccs, next); @@ -77,7 +78,7 @@ static void spapr_ccs_remove(sPAPREnvironment *spapr, void spapr_ccs_reset_hook(void *opaque) { - sPAPREnvironment *spapr = opaque; + sPAPRMachineState *spapr = opaque; sPAPRConfigureConnectorState *ccs, *ccs_tmp; QTAILQ_FOREACH_SAFE(ccs, &spapr->ccs_list, next, ccs_tmp) { @@ -85,7 +86,7 @@ void spapr_ccs_reset_hook(void *opaque) } } -static void rtas_display_character(PowerPCCPU *cpu, sPAPREnvironment *spapr, +static void rtas_display_character(PowerPCCPU *cpu, sPAPRMachineState *spapr, uint32_t token, uint32_t nargs, target_ulong args, uint32_t nret, target_ulong rets) @@ -101,7 +102,7 @@ static void rtas_display_character(PowerPCCPU *cpu, sPAPREnvironment *spapr, } } -static void rtas_power_off(PowerPCCPU *cpu, sPAPREnvironment *spapr, +static void rtas_power_off(PowerPCCPU *cpu, sPAPRMachineState *spapr, uint32_t token, uint32_t nargs, target_ulong args, uint32_t nret, target_ulong rets) { @@ -113,7 +114,7 @@ static void rtas_power_off(PowerPCCPU *cpu, sPAPREnvironment *spapr, rtas_st(rets, 0, RTAS_OUT_SUCCESS); } -static void rtas_system_reboot(PowerPCCPU *cpu, sPAPREnvironment *spapr, +static void rtas_system_reboot(PowerPCCPU *cpu, sPAPRMachineState *spapr, uint32_t token, uint32_t nargs, target_ulong args, uint32_t nret, target_ulong rets) @@ -127,7 +128,7 @@ static void rtas_system_reboot(PowerPCCPU *cpu, sPAPREnvironment *spapr, } static void rtas_query_cpu_stopped_state(PowerPCCPU *cpu_, - sPAPREnvironment *spapr, + sPAPRMachineState *spapr, uint32_t token, uint32_t nargs, target_ulong args, uint32_t nret, target_ulong rets) @@ -157,7 +158,7 @@ static void rtas_query_cpu_stopped_state(PowerPCCPU *cpu_, rtas_st(rets, 0, RTAS_OUT_PARAM_ERROR); } -static void rtas_start_cpu(PowerPCCPU *cpu_, sPAPREnvironment *spapr, +static void rtas_start_cpu(PowerPCCPU *cpu_, sPAPRMachineState *spapr, uint32_t token, uint32_t nargs, target_ulong args, uint32_t nret, target_ulong rets) @@ -204,7 +205,7 @@ static void rtas_start_cpu(PowerPCCPU *cpu_, sPAPREnvironment *spapr, rtas_st(rets, 0, RTAS_OUT_PARAM_ERROR); } -static void rtas_stop_self(PowerPCCPU *cpu, sPAPREnvironment *spapr, +static void rtas_stop_self(PowerPCCPU *cpu, sPAPRMachineState *spapr, uint32_t token, uint32_t nargs, target_ulong args, uint32_t nret, target_ulong rets) @@ -227,7 +228,7 @@ static void rtas_stop_self(PowerPCCPU *cpu, sPAPREnvironment *spapr, } static void rtas_ibm_get_system_parameter(PowerPCCPU *cpu, - sPAPREnvironment *spapr, + sPAPRMachineState *spapr, uint32_t token, uint32_t nargs, target_ulong args, uint32_t nret, target_ulong rets) @@ -262,7 +263,7 @@ static void rtas_ibm_get_system_parameter(PowerPCCPU *cpu, } static void rtas_ibm_set_system_parameter(PowerPCCPU *cpu, - sPAPREnvironment *spapr, + sPAPRMachineState *spapr, uint32_t token, uint32_t nargs, target_ulong args, uint32_t nret, target_ulong rets) @@ -282,7 +283,7 @@ static void rtas_ibm_set_system_parameter(PowerPCCPU *cpu, } static void rtas_ibm_os_term(PowerPCCPU *cpu, - sPAPREnvironment *spapr, + sPAPRMachineState *spapr, uint32_t token, uint32_t nargs, target_ulong args, uint32_t nret, target_ulong rets) @@ -294,7 +295,7 @@ static void rtas_ibm_os_term(PowerPCCPU *cpu, rtas_st(rets, 0, ret); } -static void rtas_set_power_level(PowerPCCPU *cpu, sPAPREnvironment *spapr, +static void rtas_set_power_level(PowerPCCPU *cpu, sPAPRMachineState *spapr, uint32_t token, uint32_t nargs, target_ulong args, uint32_t nret, target_ulong rets) @@ -319,7 +320,7 @@ static void rtas_set_power_level(PowerPCCPU *cpu, sPAPREnvironment *spapr, rtas_st(rets, 1, 100); } -static void rtas_get_power_level(PowerPCCPU *cpu, sPAPREnvironment *spapr, +static void rtas_get_power_level(PowerPCCPU *cpu, sPAPRMachineState *spapr, uint32_t token, uint32_t nargs, target_ulong args, uint32_t nret, target_ulong rets) @@ -356,7 +357,7 @@ static bool sensor_type_is_dr(uint32_t sensor_type) return false; } -static void rtas_set_indicator(PowerPCCPU *cpu, sPAPREnvironment *spapr, +static void rtas_set_indicator(PowerPCCPU *cpu, sPAPRMachineState *spapr, uint32_t token, uint32_t nargs, target_ulong args, uint32_t nret, target_ulong rets) @@ -427,7 +428,7 @@ out_unimplemented: rtas_st(rets, 0, RTAS_OUT_NOT_SUPPORTED); } -static void rtas_get_sensor_state(PowerPCCPU *cpu, sPAPREnvironment *spapr, +static void rtas_get_sensor_state(PowerPCCPU *cpu, sPAPRMachineState *spapr, uint32_t token, uint32_t nargs, target_ulong args, uint32_t nret, target_ulong rets) @@ -481,7 +482,7 @@ static void rtas_get_sensor_state(PowerPCCPU *cpu, sPAPREnvironment *spapr, #define CC_WA_LEN 4096 static void rtas_ibm_configure_connector(PowerPCCPU *cpu, - sPAPREnvironment *spapr, + sPAPRMachineState *spapr, uint32_t token, uint32_t nargs, target_ulong args, uint32_t nret, target_ulong rets) @@ -601,7 +602,7 @@ static struct rtas_call { spapr_rtas_fn fn; } rtas_table[RTAS_TOKEN_MAX - RTAS_TOKEN_BASE]; -target_ulong spapr_rtas_call(PowerPCCPU *cpu, sPAPREnvironment *spapr, +target_ulong spapr_rtas_call(PowerPCCPU *cpu, sPAPRMachineState *spapr, uint32_t token, uint32_t nargs, target_ulong args, uint32_t nret, target_ulong rets) { @@ -651,6 +652,8 @@ int spapr_rtas_device_tree_setup(void *fdt, hwaddr rtas_addr, { int ret; int i; + uint32_t lrdr_capacity[5]; + MachineState *machine = MACHINE(qdev_get_machine()); ret = fdt_add_mem_rsv(fdt, rtas_addr, rtas_size); if (ret < 0) { @@ -699,6 +702,19 @@ int spapr_rtas_device_tree_setup(void *fdt, hwaddr rtas_addr, } } + + lrdr_capacity[0] = cpu_to_be32(((uint64_t)machine->maxram_size) >> 32); + lrdr_capacity[1] = cpu_to_be32(machine->maxram_size & 0xffffffff); + lrdr_capacity[2] = 0; + lrdr_capacity[3] = cpu_to_be32(SPAPR_MEMORY_BLOCK_SIZE); + lrdr_capacity[4] = cpu_to_be32(max_cpus/smp_threads); + ret = qemu_fdt_setprop(fdt, "/rtas", "ibm,lrdr-capacity", lrdr_capacity, + sizeof(lrdr_capacity)); + if (ret < 0) { + fprintf(stderr, "Couldn't add ibm,lrdr-capacity rtas property\n"); + return ret; + } + return 0; } diff --git a/hw/ppc/spapr_rtc.c b/hw/ppc/spapr_rtc.c index 9da3746e7c..d20b8f270c 100644 --- a/hw/ppc/spapr_rtc.c +++ b/hw/ppc/spapr_rtc.c @@ -76,7 +76,7 @@ int spapr_rtc_import_offset(DeviceState *dev, int64_t legacy_offset) return 0; } -static void rtas_get_time_of_day(PowerPCCPU *cpu, sPAPREnvironment *spapr, +static void rtas_get_time_of_day(PowerPCCPU *cpu, sPAPRMachineState *spapr, uint32_t token, uint32_t nargs, target_ulong args, uint32_t nret, target_ulong rets) @@ -106,7 +106,7 @@ static void rtas_get_time_of_day(PowerPCCPU *cpu, sPAPREnvironment *spapr, rtas_st(rets, 7, ns); } -static void rtas_set_time_of_day(PowerPCCPU *cpu, sPAPREnvironment *spapr, +static void rtas_set_time_of_day(PowerPCCPU *cpu, sPAPRMachineState *spapr, uint32_t token, uint32_t nargs, target_ulong args, uint32_t nret, target_ulong rets) diff --git a/hw/ppc/spapr_vio.c b/hw/ppc/spapr_vio.c index 8b59b64b7e..c51eb8e244 100644 --- a/hw/ppc/spapr_vio.c +++ b/hw/ppc/spapr_vio.c @@ -160,7 +160,7 @@ static int vio_make_devnode(VIOsPAPRDevice *dev, /* * CRQ handling */ -static target_ulong h_reg_crq(PowerPCCPU *cpu, sPAPREnvironment *spapr, +static target_ulong h_reg_crq(PowerPCCPU *cpu, sPAPRMachineState *spapr, target_ulong opcode, target_ulong *args) { target_ulong reg = args[0]; @@ -218,7 +218,7 @@ static target_ulong free_crq(VIOsPAPRDevice *dev) return H_SUCCESS; } -static target_ulong h_free_crq(PowerPCCPU *cpu, sPAPREnvironment *spapr, +static target_ulong h_free_crq(PowerPCCPU *cpu, sPAPRMachineState *spapr, target_ulong opcode, target_ulong *args) { target_ulong reg = args[0]; @@ -232,7 +232,7 @@ static target_ulong h_free_crq(PowerPCCPU *cpu, sPAPREnvironment *spapr, return free_crq(dev); } -static target_ulong h_send_crq(PowerPCCPU *cpu, sPAPREnvironment *spapr, +static target_ulong h_send_crq(PowerPCCPU *cpu, sPAPRMachineState *spapr, target_ulong opcode, target_ulong *args) { target_ulong reg = args[0]; @@ -255,7 +255,7 @@ static target_ulong h_send_crq(PowerPCCPU *cpu, sPAPREnvironment *spapr, return H_HARDWARE; } -static target_ulong h_enable_crq(PowerPCCPU *cpu, sPAPREnvironment *spapr, +static target_ulong h_enable_crq(PowerPCCPU *cpu, sPAPRMachineState *spapr, target_ulong opcode, target_ulong *args) { target_ulong reg = args[0]; @@ -333,7 +333,7 @@ void spapr_vio_set_bypass(VIOsPAPRDevice *dev, bool bypass) dev->tcet->bypass = bypass; } -static void rtas_set_tce_bypass(PowerPCCPU *cpu, sPAPREnvironment *spapr, +static void rtas_set_tce_bypass(PowerPCCPU *cpu, sPAPRMachineState *spapr, uint32_t token, uint32_t nargs, target_ulong args, uint32_t nret, target_ulong rets) @@ -364,7 +364,7 @@ static void rtas_set_tce_bypass(PowerPCCPU *cpu, sPAPREnvironment *spapr, rtas_st(rets, 0, RTAS_OUT_SUCCESS); } -static void rtas_quiesce(PowerPCCPU *cpu, sPAPREnvironment *spapr, +static void rtas_quiesce(PowerPCCPU *cpu, sPAPRMachineState *spapr, uint32_t token, uint32_t nargs, target_ulong args, uint32_t nret, target_ulong rets) @@ -426,6 +426,7 @@ static void spapr_vio_busdev_reset(DeviceState *qdev) static void spapr_vio_busdev_realize(DeviceState *qdev, Error **errp) { + sPAPRMachineState *spapr = SPAPR_MACHINE(qdev_get_machine()); VIOsPAPRDevice *dev = (VIOsPAPRDevice *)qdev; VIOsPAPRDeviceClass *pc = VIO_SPAPR_DEVICE_GET_CLASS(dev); char *id; @@ -491,7 +492,7 @@ static void spapr_vio_busdev_realize(DeviceState *qdev, Error **errp) pc->realize(dev, errp); } -static target_ulong h_vio_signal(PowerPCCPU *cpu, sPAPREnvironment *spapr, +static target_ulong h_vio_signal(PowerPCCPU *cpu, sPAPRMachineState *spapr, target_ulong opcode, target_ulong *args) { diff --git a/hw/ppc/virtex_ml507.c b/hw/ppc/virtex_ml507.c index 439732f7ab..de86f7c64c 100644 --- a/hw/ppc/virtex_ml507.c +++ b/hw/ppc/virtex_ml507.c @@ -197,7 +197,6 @@ static int xilinx_load_device_tree(hwaddr addr, static void virtex_init(MachineState *machine) { ram_addr_t ram_size = machine->ram_size; - const char *cpu_model = machine->cpu_model; const char *kernel_filename = machine->kernel_filename; const char *kernel_cmdline = machine->kernel_cmdline; hwaddr initrd_base = 0; @@ -214,11 +213,11 @@ static void virtex_init(MachineState *machine) int i; /* init CPUs */ - if (cpu_model == NULL) { - cpu_model = "440-Xilinx"; + if (machine->cpu_model == NULL) { + machine->cpu_model = "440-Xilinx"; } - cpu = ppc440_init_xilinx(&ram_size, 1, cpu_model, 400000000); + cpu = ppc440_init_xilinx(&ram_size, 1, machine->cpu_model, 400000000); env = &cpu->env; qemu_register_reset(main_cpu_reset, cpu); diff --git a/hw/s390x/virtio-ccw.c b/hw/s390x/virtio-ccw.c index d631337e11..e345a6e9b8 100644 --- a/hw/s390x/virtio-ccw.c +++ b/hw/s390x/virtio-ccw.c @@ -1316,8 +1316,8 @@ static int virtio_ccw_add_irqfd(VirtioCcwDevice *dev, int n) VirtQueue *vq = virtio_get_queue(vdev, n); EventNotifier *notifier = virtio_queue_get_guest_notifier(vq); - return kvm_irqchip_add_irqfd_notifier(kvm_state, notifier, NULL, - dev->routes.gsi[n]); + return kvm_irqchip_add_irqfd_notifier_gsi(kvm_state, notifier, NULL, + dev->routes.gsi[n]); } static void virtio_ccw_remove_irqfd(VirtioCcwDevice *dev, int n) @@ -1327,8 +1327,8 @@ static void virtio_ccw_remove_irqfd(VirtioCcwDevice *dev, int n) EventNotifier *notifier = virtio_queue_get_guest_notifier(vq); int ret; - ret = kvm_irqchip_remove_irqfd_notifier(kvm_state, notifier, - dev->routes.gsi[n]); + ret = kvm_irqchip_remove_irqfd_notifier_gsi(kvm_state, notifier, + dev->routes.gsi[n]); assert(ret == 0); } diff --git a/hw/vfio/common.c b/hw/vfio/common.c index b1045da857..85ee9b005e 100644 --- a/hw/vfio/common.c +++ b/hw/vfio/common.c @@ -772,11 +772,19 @@ static void vfio_disconnect_container(VFIOGroup *group) if (QLIST_EMPTY(&container->group_list)) { VFIOAddressSpace *space = container->space; + VFIOGuestIOMMU *giommu, *tmp; if (container->iommu_data.release) { container->iommu_data.release(container); } QLIST_REMOVE(container, next); + + QLIST_FOREACH_SAFE(giommu, &container->giommu_list, giommu_next, tmp) { + memory_region_unregister_iommu_notifier(&giommu->n); + QLIST_REMOVE(giommu, giommu_next); + g_free(giommu); + } + trace_vfio_disconnect_container(container->fd); close(container->fd); g_free(container); diff --git a/hw/vfio/pci.c b/hw/vfio/pci.c index e0e339a534..2ed877fe9f 100644 --- a/hw/vfio/pci.c +++ b/hw/vfio/pci.c @@ -597,7 +597,7 @@ static void vfio_add_kvm_msi_virq(VFIOMSIVector *vector, MSIMessage *msg, return; } - if (kvm_irqchip_add_irqfd_notifier(kvm_state, &vector->kvm_interrupt, + if (kvm_irqchip_add_irqfd_notifier_gsi(kvm_state, &vector->kvm_interrupt, NULL, virq) < 0) { kvm_irqchip_release_virq(kvm_state, virq); event_notifier_cleanup(&vector->kvm_interrupt); @@ -609,8 +609,8 @@ static void vfio_add_kvm_msi_virq(VFIOMSIVector *vector, MSIMessage *msg, static void vfio_remove_kvm_msi_virq(VFIOMSIVector *vector) { - kvm_irqchip_remove_irqfd_notifier(kvm_state, &vector->kvm_interrupt, - vector->virq); + kvm_irqchip_remove_irqfd_notifier_gsi(kvm_state, &vector->kvm_interrupt, + vector->virq); kvm_irqchip_release_virq(kvm_state, vector->virq); vector->virq = -1; event_notifier_cleanup(&vector->kvm_interrupt); @@ -939,7 +939,7 @@ static void vfio_pci_load_rom(VFIOPCIDevice *vdev) }; uint64_t size; off_t off = 0; - size_t bytes; + ssize_t bytes; if (ioctl(vdev->vbasedev.fd, VFIO_DEVICE_GET_REGION_INFO, ®_info)) { error_report("vfio: Error getting ROM info: %m"); @@ -2252,6 +2252,33 @@ static int vfio_early_setup_msix(VFIOPCIDevice *vdev) vdev->msix->pba_offset = pba & ~PCI_MSIX_FLAGS_BIRMASK; vdev->msix->entries = (ctrl & PCI_MSIX_FLAGS_QSIZE) + 1; + /* + * Test the size of the pba_offset variable and catch if it extends outside + * of the specified BAR. If it is the case, we need to apply a hardware + * specific quirk if the device is known or we have a broken configuration. + */ + if (vdev->msix->pba_offset >= + vdev->bars[vdev->msix->pba_bar].region.size) { + + PCIDevice *pdev = &vdev->pdev; + uint16_t vendor = pci_get_word(pdev->config + PCI_VENDOR_ID); + uint16_t device = pci_get_word(pdev->config + PCI_DEVICE_ID); + + /* + * Chelsio T5 Virtual Function devices are encoded as 0x58xx for T5 + * adapters. The T5 hardware returns an incorrect value of 0x8000 for + * the VF PBA offset while the BAR itself is only 8k. The correct value + * is 0x1000, so we hard code that here. + */ + if (vendor == PCI_VENDOR_ID_CHELSIO && (device & 0xff00) == 0x5800) { + vdev->msix->pba_offset = 0x1000; + } else { + error_report("vfio: Hardware reports invalid configuration, " + "MSIX PBA outside of specified BAR"); + return -EINVAL; + } + } + trace_vfio_early_setup_msix(vdev->vbasedev.name, pos, vdev->msix->table_bar, vdev->msix->table_offset, @@ -2388,7 +2415,7 @@ static void vfio_map_bar(VFIOPCIDevice *vdev, int nr) * potentially insert a direct-mapped subregion before and after it. */ if (vdev->msix && vdev->msix->table_bar == nr) { - size = vdev->msix->table_offset & qemu_host_page_mask; + size = vdev->msix->table_offset & qemu_real_host_page_mask; } strncat(name, " mmap", sizeof(name) - strlen(name) - 1); @@ -2401,8 +2428,9 @@ static void vfio_map_bar(VFIOPCIDevice *vdev, int nr) if (vdev->msix && vdev->msix->table_bar == nr) { uint64_t start; - start = HOST_PAGE_ALIGN((uint64_t)vdev->msix->table_offset + - (vdev->msix->entries * PCI_MSIX_ENTRY_SIZE)); + start = REAL_HOST_PAGE_ALIGN((uint64_t)vdev->msix->table_offset + + (vdev->msix->entries * + PCI_MSIX_ENTRY_SIZE)); size = start < bar->region.size ? bar->region.size - start : 0; strncat(name, " msix-hi", sizeof(name) - strlen(name) - 1); diff --git a/hw/vfio/platform.c b/hw/vfio/platform.c index 5c678b914e..60365d1279 100644 --- a/hw/vfio/platform.c +++ b/hw/vfio/platform.c @@ -26,6 +26,7 @@ #include "hw/sysbus.h" #include "trace.h" #include "hw/platform-bus.h" +#include "sysemu/kvm.h" /* * Functions used whatever the injection method @@ -51,6 +52,7 @@ static VFIOINTp *vfio_init_intp(VFIODevice *vbasedev, intp->pin = info.index; intp->flags = info.flags; intp->state = VFIO_IRQ_INACTIVE; + intp->kvm_accel = false; sysbus_init_irq(sbdev, &intp->qemuirq); @@ -61,6 +63,13 @@ static VFIOINTp *vfio_init_intp(VFIODevice *vbasedev, error_report("vfio: Error: trigger event_notifier_init failed "); return NULL; } + /* Get an eventfd for resample/unmask */ + ret = event_notifier_init(&intp->unmask, 0); + if (ret) { + g_free(intp); + error_report("vfio: Error: resamplefd event_notifier_init failed"); + return NULL; + } QLIST_INSERT_HEAD(&vdev->intp_list, intp, next); return intp; @@ -315,6 +324,94 @@ static int vfio_start_eventfd_injection(VFIOINTp *intp) return ret; } +/* + * Functions used for irqfd + */ + +/** + * vfio_set_resample_eventfd - sets the resamplefd for an IRQ + * @intp: the IRQ struct handle + * programs the VFIO driver to unmask this IRQ when the + * intp->unmask eventfd is triggered + */ +static int vfio_set_resample_eventfd(VFIOINTp *intp) +{ + VFIODevice *vbasedev = &intp->vdev->vbasedev; + struct vfio_irq_set *irq_set; + int argsz, ret; + int32_t *pfd; + + argsz = sizeof(*irq_set) + sizeof(*pfd); + irq_set = g_malloc0(argsz); + irq_set->argsz = argsz; + irq_set->flags = VFIO_IRQ_SET_DATA_EVENTFD | VFIO_IRQ_SET_ACTION_UNMASK; + irq_set->index = intp->pin; + irq_set->start = 0; + irq_set->count = 1; + pfd = (int32_t *)&irq_set->data; + *pfd = event_notifier_get_fd(&intp->unmask); + qemu_set_fd_handler(*pfd, NULL, NULL, NULL); + ret = ioctl(vbasedev->fd, VFIO_DEVICE_SET_IRQS, irq_set); + g_free(irq_set); + if (ret < 0) { + error_report("vfio: Failed to set resample eventfd: %m"); + } + return ret; +} + +static void vfio_start_irqfd_injection(SysBusDevice *sbdev, qemu_irq irq) +{ + VFIOPlatformDevice *vdev = VFIO_PLATFORM_DEVICE(sbdev); + VFIOINTp *intp; + + if (!kvm_irqfds_enabled() || !kvm_resamplefds_enabled() || + !vdev->irqfd_allowed) { + return; + } + + QLIST_FOREACH(intp, &vdev->intp_list, next) { + if (intp->qemuirq == irq) { + break; + } + } + assert(intp); + + /* Get to a known interrupt state */ + qemu_set_fd_handler(event_notifier_get_fd(&intp->interrupt), + NULL, NULL, vdev); + + vfio_mask_single_irqindex(&vdev->vbasedev, intp->pin); + qemu_set_irq(intp->qemuirq, 0); + + if (kvm_irqchip_add_irqfd_notifier(kvm_state, &intp->interrupt, + &intp->unmask, irq) < 0) { + goto fail_irqfd; + } + + if (vfio_set_trigger_eventfd(intp, NULL) < 0) { + goto fail_vfio; + } + if (vfio_set_resample_eventfd(intp) < 0) { + goto fail_vfio; + } + + /* Let's resume injection with irqfd setup */ + vfio_unmask_single_irqindex(&vdev->vbasedev, intp->pin); + + intp->kvm_accel = true; + + trace_vfio_platform_start_irqfd_injection(intp->pin, + event_notifier_get_fd(&intp->interrupt), + event_notifier_get_fd(&intp->unmask)); + return; +fail_vfio: + kvm_irqchip_remove_irqfd_notifier(kvm_state, &intp->interrupt, irq); +fail_irqfd: + vfio_start_eventfd_injection(intp); + vfio_unmask_single_irqindex(&vdev->vbasedev, intp->pin); + return; +} + /* VFIO skeleton */ static void vfio_platform_compute_needs_reset(VFIODevice *vbasedev) @@ -584,17 +681,20 @@ static Property vfio_platform_dev_properties[] = { DEFINE_PROP_BOOL("x-mmap", VFIOPlatformDevice, vbasedev.allow_mmap, true), DEFINE_PROP_UINT32("mmap-timeout-ms", VFIOPlatformDevice, mmap_timeout, 1100), + DEFINE_PROP_BOOL("x-irqfd", VFIOPlatformDevice, irqfd_allowed, true), DEFINE_PROP_END_OF_LIST(), }; static void vfio_platform_class_init(ObjectClass *klass, void *data) { DeviceClass *dc = DEVICE_CLASS(klass); + SysBusDeviceClass *sbc = SYS_BUS_DEVICE_CLASS(klass); dc->realize = vfio_platform_realize; dc->props = vfio_platform_dev_properties; dc->vmsd = &vfio_platform_vmstate; dc->desc = "VFIO-based platform device assignment"; + sbc->connect_irq_notifier = vfio_start_irqfd_injection; set_bit(DEVICE_CATEGORY_MISC, dc->categories); } diff --git a/hw/virtio/dataplane/vring.c b/hw/virtio/dataplane/vring.c index 35891856ee..07fd69c69e 100644 --- a/hw/virtio/dataplane/vring.c +++ b/hw/virtio/dataplane/vring.c @@ -153,22 +153,20 @@ bool vring_should_notify(VirtIODevice *vdev, Vring *vring) return true; } - return vring_need_event(vring_used_event(&vring->vr), new, old); + return vring_need_event(virtio_tswap16(vdev, vring_used_event(&vring->vr)), + new, old); } -static int get_desc(VirtIODevice *vdev, Vring *vring, VirtQueueElement *elem, +static int get_desc(Vring *vring, VirtQueueElement *elem, struct vring_desc *desc) { unsigned *num; struct iovec *iov; hwaddr *addr; MemoryRegion *mr; - int is_write = virtio_tswap16(vdev, desc->flags) & VRING_DESC_F_WRITE; - uint32_t len = virtio_tswap32(vdev, desc->len); - uint64_t desc_addr = virtio_tswap64(vdev, desc->addr); - if (is_write) { + if (desc->flags & VRING_DESC_F_WRITE) { num = &elem->in_num; iov = &elem->in_sg[*num]; addr = &elem->in_addr[*num]; @@ -192,17 +190,18 @@ static int get_desc(VirtIODevice *vdev, Vring *vring, VirtQueueElement *elem, } /* TODO handle non-contiguous memory across region boundaries */ - iov->iov_base = vring_map(&mr, desc_addr, len, is_write); + iov->iov_base = vring_map(&mr, desc->addr, desc->len, + desc->flags & VRING_DESC_F_WRITE); if (!iov->iov_base) { error_report("Failed to map descriptor addr %#" PRIx64 " len %u", - (uint64_t)desc_addr, len); + (uint64_t)desc->addr, desc->len); return -EFAULT; } /* The MemoryRegion is looked up again and unref'ed later, leave the * ref in place. */ - iov->iov_len = len; - *addr = desc_addr; + iov->iov_len = desc->len; + *addr = desc->addr; *num += 1; return 0; } @@ -224,23 +223,21 @@ static int get_indirect(VirtIODevice *vdev, Vring *vring, struct vring_desc desc; unsigned int i = 0, count, found = 0; int ret; - uint32_t len = virtio_tswap32(vdev, indirect->len); - uint64_t addr = virtio_tswap64(vdev, indirect->addr); /* Sanity check */ - if (unlikely(len % sizeof(desc))) { + if (unlikely(indirect->len % sizeof(desc))) { error_report("Invalid length in indirect descriptor: " "len %#x not multiple of %#zx", - len, sizeof(desc)); + indirect->len, sizeof(desc)); vring->broken = true; return -EFAULT; } - count = len / sizeof(desc); + count = indirect->len / sizeof(desc); /* Buffers are chained via a 16 bit next field, so * we can have at most 2^16 of these. */ if (unlikely(count > USHRT_MAX + 1)) { - error_report("Indirect buffer length too big: %d", len); + error_report("Indirect buffer length too big: %d", indirect->len); vring->broken = true; return -EFAULT; } @@ -251,12 +248,12 @@ static int get_indirect(VirtIODevice *vdev, Vring *vring, /* Translate indirect descriptor */ desc_ptr = vring_map(&mr, - addr + found * sizeof(desc), + indirect->addr + found * sizeof(desc), sizeof(desc), false); if (!desc_ptr) { error_report("Failed to map indirect descriptor " "addr %#" PRIx64 " len %zu", - (uint64_t)addr + found * sizeof(desc), + (uint64_t)indirect->addr + found * sizeof(desc), sizeof(desc)); vring->broken = true; return -EFAULT; @@ -274,20 +271,19 @@ static int get_indirect(VirtIODevice *vdev, Vring *vring, return -EFAULT; } - if (unlikely(virtio_tswap16(vdev, desc.flags) - & VRING_DESC_F_INDIRECT)) { + if (unlikely(desc.flags & VRING_DESC_F_INDIRECT)) { error_report("Nested indirect descriptor"); vring->broken = true; return -EFAULT; } - ret = get_desc(vdev, vring, elem, &desc); + ret = get_desc(vring, elem, &desc); if (ret < 0) { vring->broken |= (ret == -EFAULT); return ret; } - i = virtio_tswap16(vdev, desc.next); - } while (virtio_tswap16(vdev, desc.flags) & VRING_DESC_F_NEXT); + i = desc.next; + } while (desc.flags & VRING_DESC_F_NEXT); return 0; } @@ -388,7 +384,7 @@ int vring_pop(VirtIODevice *vdev, Vring *vring, /* Ensure descriptor is loaded before accessing fields */ barrier(); - if (virtio_tswap16(vdev, desc.flags) & VRING_DESC_F_INDIRECT) { + if (desc.flags & VRING_DESC_F_INDIRECT) { ret = get_indirect(vdev, vring, elem, &desc); if (ret < 0) { goto out; @@ -396,18 +392,19 @@ int vring_pop(VirtIODevice *vdev, Vring *vring, continue; } - ret = get_desc(vdev, vring, elem, &desc); + ret = get_desc(vring, elem, &desc); if (ret < 0) { goto out; } - i = virtio_tswap16(vdev, desc.next); - } while (virtio_tswap16(vdev, desc.flags) & VRING_DESC_F_NEXT); + i = desc.next; + } while (desc.flags & VRING_DESC_F_NEXT); /* On success, increment avail index. */ vring->last_avail_idx++; if (virtio_has_feature(vdev, VIRTIO_RING_F_EVENT_IDX)) { - vring_avail_event(&vring->vr) = vring->last_avail_idx; + vring_avail_event(&vring->vr) = + virtio_tswap16(vdev, vring->last_avail_idx); } return head; diff --git a/hw/virtio/virtio-pci.c b/hw/virtio/virtio-pci.c index 6a0174e9cc..ccca2b6f3b 100644 --- a/hw/virtio/virtio-pci.c +++ b/hw/virtio/virtio-pci.c @@ -443,11 +443,89 @@ static const MemoryRegionOps virtio_pci_config_ops = { .endianness = DEVICE_LITTLE_ENDIAN, }; +/* Below are generic functions to do memcpy from/to an address space, + * without byteswaps, with input validation. + * + * As regular address_space_* APIs all do some kind of byteswap at least for + * some host/target combinations, we are forced to explicitly convert to a + * known-endianness integer value. + * It doesn't really matter which endian format to go through, so the code + * below selects the endian that causes the least amount of work on the given + * host. + * + * Note: host pointer must be aligned. + */ +static +void virtio_address_space_write(AddressSpace *as, hwaddr addr, + const uint8_t *buf, int len) +{ + uint32_t val; + + /* address_space_* APIs assume an aligned address. + * As address is under guest control, handle illegal values. + */ + addr &= ~(len - 1); + + /* Make sure caller aligned buf properly */ + assert(!(((uintptr_t)buf) & (len - 1))); + + switch (len) { + case 1: + val = pci_get_byte(buf); + address_space_stb(as, addr, val, MEMTXATTRS_UNSPECIFIED, NULL); + break; + case 2: + val = pci_get_word(buf); + address_space_stw_le(as, addr, val, MEMTXATTRS_UNSPECIFIED, NULL); + break; + case 4: + val = pci_get_long(buf); + address_space_stl_le(as, addr, val, MEMTXATTRS_UNSPECIFIED, NULL); + break; + default: + /* As length is under guest control, handle illegal values. */ + break; + } +} + +static void +virtio_address_space_read(AddressSpace *as, hwaddr addr, uint8_t *buf, int len) +{ + uint32_t val; + + /* address_space_* APIs assume an aligned address. + * As address is under guest control, handle illegal values. + */ + addr &= ~(len - 1); + + /* Make sure caller aligned buf properly */ + assert(!(((uintptr_t)buf) & (len - 1))); + + switch (len) { + case 1: + val = address_space_ldub(as, addr, MEMTXATTRS_UNSPECIFIED, NULL); + pci_set_byte(buf, val); + break; + case 2: + val = address_space_lduw_le(as, addr, MEMTXATTRS_UNSPECIFIED, NULL); + pci_set_word(buf, val); + break; + case 4: + val = address_space_ldl_le(as, addr, MEMTXATTRS_UNSPECIFIED, NULL); + pci_set_long(buf, val); + break; + default: + /* As length is under guest control, handle illegal values. */ + break; + } +} + static void virtio_write_config(PCIDevice *pci_dev, uint32_t address, uint32_t val, int len) { VirtIOPCIProxy *proxy = DO_UPCAST(VirtIOPCIProxy, pci_dev, pci_dev); VirtIODevice *vdev = virtio_bus_get_device(&proxy->bus); + struct virtio_pci_cfg_cap *cfg; pci_default_write_config(pci_dev, address, val, len); @@ -456,6 +534,49 @@ static void virtio_write_config(PCIDevice *pci_dev, uint32_t address, virtio_pci_stop_ioeventfd(proxy); virtio_set_status(vdev, vdev->status & ~VIRTIO_CONFIG_S_DRIVER_OK); } + + if (proxy->config_cap && + ranges_overlap(address, len, proxy->config_cap + offsetof(struct virtio_pci_cfg_cap, + pci_cfg_data), + sizeof cfg->pci_cfg_data)) { + uint32_t off; + uint32_t len; + + cfg = (void *)(proxy->pci_dev.config + proxy->config_cap); + off = le32_to_cpu(cfg->cap.offset); + len = le32_to_cpu(cfg->cap.length); + + if (len <= sizeof cfg->pci_cfg_data) { + virtio_address_space_write(&proxy->modern_as, off, + cfg->pci_cfg_data, len); + } + } +} + +static uint32_t virtio_read_config(PCIDevice *pci_dev, + uint32_t address, int len) +{ + VirtIOPCIProxy *proxy = DO_UPCAST(VirtIOPCIProxy, pci_dev, pci_dev); + struct virtio_pci_cfg_cap *cfg; + + if (proxy->config_cap && + ranges_overlap(address, len, proxy->config_cap + offsetof(struct virtio_pci_cfg_cap, + pci_cfg_data), + sizeof cfg->pci_cfg_data)) { + uint32_t off; + uint32_t len; + + cfg = (void *)(proxy->pci_dev.config + proxy->config_cap); + off = le32_to_cpu(cfg->cap.offset); + len = le32_to_cpu(cfg->cap.length); + + if (len <= sizeof cfg->pci_cfg_data) { + virtio_address_space_read(&proxy->modern_as, off, + cfg->pci_cfg_data, len); + } + } + + return pci_default_read_config(pci_dev, address, len); } static int kvm_virtio_pci_vq_vector_use(VirtIOPCIProxy *proxy, @@ -495,7 +616,7 @@ static int kvm_virtio_pci_irqfd_use(VirtIOPCIProxy *proxy, VirtQueue *vq = virtio_get_queue(vdev, queue_no); EventNotifier *n = virtio_queue_get_guest_notifier(vq); int ret; - ret = kvm_irqchip_add_irqfd_notifier(kvm_state, n, NULL, irqfd->virq); + ret = kvm_irqchip_add_irqfd_notifier_gsi(kvm_state, n, NULL, irqfd->virq); return ret; } @@ -509,7 +630,7 @@ static void kvm_virtio_pci_irqfd_release(VirtIOPCIProxy *proxy, VirtIOIRQFD *irqfd = &proxy->vector_irqfd[vector]; int ret; - ret = kvm_irqchip_remove_irqfd_notifier(kvm_state, n, irqfd->virq); + ret = kvm_irqchip_remove_irqfd_notifier_gsi(kvm_state, n, irqfd->virq); assert(ret == 0); } @@ -942,7 +1063,7 @@ static int virtio_pci_query_nvectors(DeviceState *d) return proxy->nvectors; } -static void virtio_pci_add_mem_cap(VirtIOPCIProxy *proxy, +static int virtio_pci_add_mem_cap(VirtIOPCIProxy *proxy, struct virtio_pci_cap *cap) { PCIDevice *dev = &proxy->pci_dev; @@ -954,6 +1075,8 @@ static void virtio_pci_add_mem_cap(VirtIOPCIProxy *proxy, assert(cap->cap_len >= sizeof *cap); memcpy(dev->config + offset + PCI_CAP_FLAGS, &cap->cap_len, cap->cap_len - PCI_CAP_FLAGS); + + return offset; } static uint64_t virtio_pci_common_read(void *opaque, hwaddr addr, @@ -1329,6 +1452,11 @@ static void virtio_pci_device_plugged(DeviceState *d, Error **errp) .notify_off_multiplier = cpu_to_le32(QEMU_VIRTIO_PCI_QUEUE_MEM_MULT), }; + struct virtio_pci_cfg_cap cfg = { + .cap.cap_len = sizeof cfg, + .cap.cfg_type = VIRTIO_PCI_CAP_PCI_CFG, + }; + struct virtio_pci_cfg_cap *cfg_mask; /* TODO: add io access for speed */ @@ -1338,11 +1466,19 @@ static void virtio_pci_device_plugged(DeviceState *d, Error **errp) virtio_pci_modern_region_map(proxy, &proxy->isr, &cap); virtio_pci_modern_region_map(proxy, &proxy->device, &cap); virtio_pci_modern_region_map(proxy, &proxy->notify, ¬ify.cap); + pci_register_bar(&proxy->pci_dev, proxy->modern_mem_bar, PCI_BASE_ADDRESS_SPACE_MEMORY | PCI_BASE_ADDRESS_MEM_PREFETCH | PCI_BASE_ADDRESS_MEM_TYPE_64, &proxy->modern_bar); + + proxy->config_cap = virtio_pci_add_mem_cap(proxy, &cfg.cap); + cfg_mask = (void *)(proxy->pci_dev.wmask + proxy->config_cap); + pci_set_byte(&cfg_mask->cap.bar, ~0x0); + pci_set_long((uint8_t *)&cfg_mask->cap.offset, ~0x0); + pci_set_long((uint8_t *)&cfg_mask->cap.length, ~0x0); + pci_set_long(cfg_mask->pci_cfg_data, ~0x0); } if (proxy->nvectors && @@ -1354,6 +1490,7 @@ static void virtio_pci_device_plugged(DeviceState *d, Error **errp) } proxy->pci_dev.config_write = virtio_write_config; + proxy->pci_dev.config_read = virtio_read_config; if (legacy) { size = VIRTIO_PCI_REGION_SIZE(&proxy->pci_dev) @@ -1424,6 +1561,15 @@ static void virtio_pci_realize(PCIDevice *pci_dev, Error **errp) 2 * QEMU_VIRTIO_PCI_QUEUE_MEM_MULT * VIRTIO_QUEUE_MAX); + memory_region_init_alias(&proxy->modern_cfg, + OBJECT(proxy), + "virtio-pci-cfg", + &proxy->modern_bar, + 0, + memory_region_size(&proxy->modern_bar)); + + address_space_init(&proxy->modern_as, &proxy->modern_cfg, "virtio-pci-cfg-as"); + virtio_pci_bus_new(&proxy->bus, sizeof(proxy->bus), proxy); if (k->realize) { k->realize(proxy, errp); @@ -1432,7 +1578,10 @@ static void virtio_pci_realize(PCIDevice *pci_dev, Error **errp) static void virtio_pci_exit(PCIDevice *pci_dev) { + VirtIOPCIProxy *proxy = VIRTIO_PCI(pci_dev); + msix_uninit_exclusive_bar(pci_dev); + address_space_destroy(&proxy->modern_as); } static void virtio_pci_reset(DeviceState *qdev) diff --git a/hw/virtio/virtio-pci.h b/hw/virtio/virtio-pci.h index 05d9d243f6..b6c442f522 100644 --- a/hw/virtio/virtio-pci.h +++ b/hw/virtio/virtio-pci.h @@ -112,9 +112,12 @@ struct VirtIOPCIProxy { VirtIOPCIRegion device; VirtIOPCIRegion notify; MemoryRegion modern_bar; + MemoryRegion modern_cfg; + AddressSpace modern_as; uint32_t legacy_io_bar; uint32_t msix_bar; uint32_t modern_mem_bar; + int config_cap; uint32_t flags; uint32_t class_code; uint32_t nvectors; |