diff options
128 files changed, 4511 insertions, 1526 deletions
@@ -1271,7 +1271,7 @@ int bdrv_append_temp_snapshot(BlockDriverState *bs, int flags, Error **errp) QemuOpts *opts = NULL; QDict *snapshot_options; BlockDriverState *bs_snapshot; - Error *local_err; + Error *local_err = NULL; int ret; /* if snapshot, we create a temporary backing file and open it @@ -1841,9 +1841,9 @@ void bdrv_close(BlockDriverState *bs) if (bs->job) { block_job_cancel_sync(bs->job); } - bdrv_drain_all(); /* complete I/O */ + bdrv_drain(bs); /* complete I/O */ bdrv_flush(bs); - bdrv_drain_all(); /* in case flush left pending I/O */ + bdrv_drain(bs); /* in case flush left pending I/O */ notifier_list_notify(&bs->close_notifiers, bs); if (bs->drv) { @@ -3906,7 +3906,7 @@ void bdrv_attach_aio_context(BlockDriverState *bs, void bdrv_set_aio_context(BlockDriverState *bs, AioContext *new_context) { - bdrv_drain_all(); /* ensure there are no in-flight requests */ + bdrv_drain(bs); /* ensure there are no in-flight requests */ bdrv_detach_aio_context(bs); diff --git a/block/io.c b/block/io.c index 305e0d952e..d4bc83b33b 100644 --- a/block/io.c +++ b/block/io.c @@ -236,12 +236,12 @@ static bool bdrv_requests_pending(BlockDriverState *bs) /* * Wait for pending requests to complete on a single BlockDriverState subtree * - * See the warning in bdrv_drain_all(). This function can only be called if - * you are sure nothing can generate I/O because you have op blockers - * installed. - * * Note that unlike bdrv_drain_all(), the caller must hold the BlockDriverState * AioContext. + * + * Only this BlockDriverState's AioContext is run, so in-flight requests must + * not depend on events in other AioContexts. In that case, use + * bdrv_drain_all() instead. */ void bdrv_drain(BlockDriverState *bs) { @@ -260,12 +260,6 @@ void bdrv_drain(BlockDriverState *bs) * * This function does not flush data to disk, use bdrv_flush_all() for that * after calling this function. - * - * Note that completion of an asynchronous I/O operation can trigger any - * number of other I/O operations on other devices---for example a coroutine - * can be arbitrarily complex and a constant flow of I/O can come until the - * coroutine is complete. Because of this, it is not possible to have a - * function to drain a single device's I/O queue. */ void bdrv_drain_all(void) { @@ -288,6 +282,12 @@ void bdrv_drain_all(void) } } + /* Note that completion of an asynchronous I/O operation can trigger any + * number of other I/O operations on other devices---for example a + * coroutine can submit an I/O request to another device in response to + * request completion. Therefore we must keep looping until there was no + * more activity rather than simply draining each device independently. + */ while (busy) { busy = false; diff --git a/block/mirror.c b/block/mirror.c index 8888cea952..d409337c4a 100644 --- a/block/mirror.c +++ b/block/mirror.c @@ -708,6 +708,8 @@ static void mirror_start_job(BlockDriverState *bs, BlockDriverState *target, s->dirty_bitmap = bdrv_create_dirty_bitmap(bs, granularity, NULL, errp); if (!s->dirty_bitmap) { + g_free(s->replaces); + block_job_release(bs); return; } bdrv_set_enable_write_cache(s->target, true); diff --git a/block/qcow2-cache.c b/block/qcow2-cache.c index ed92a098c4..53b8afc3d3 100644 --- a/block/qcow2-cache.c +++ b/block/qcow2-cache.c @@ -281,9 +281,6 @@ static int qcow2_cache_do_get(BlockDriverState *bs, Qcow2Cache *c, i = min_lru_index; trace_qcow2_cache_get_replace_entry(qemu_coroutine_self(), c == s->l2_table_cache, i); - if (i < 0) { - return i; - } ret = qcow2_cache_entry_flush(bs, c, i); if (ret < 0) { diff --git a/block/raw-posix.c b/block/raw-posix.c index cbe6574bf4..855febed52 100644 --- a/block/raw-posix.c +++ b/block/raw-posix.c @@ -2430,7 +2430,8 @@ static int floppy_probe_device(const char *filename) struct stat st; if (strstart(filename, "/dev/fd", NULL) && - !strstart(filename, "/dev/fdset/", NULL)) { + !strstart(filename, "/dev/fdset/", NULL) && + !strstart(filename, "/dev/fd/", NULL)) { prio = 50; } diff --git a/block/snapshot.c b/block/snapshot.c index 19395ae014..49e143e991 100644 --- a/block/snapshot.c +++ b/block/snapshot.c @@ -239,7 +239,7 @@ int bdrv_snapshot_delete(BlockDriverState *bs, } /* drain all pending i/o before deleting snapshot */ - bdrv_drain_all(); + bdrv_drain(bs); if (drv->bdrv_snapshot_delete) { return drv->bdrv_snapshot_delete(bs, snapshot_id, name, errp); diff --git a/blockjob.c b/blockjob.c index ec46fad2f1..62bb906634 100644 --- a/blockjob.c +++ b/blockjob.c @@ -66,10 +66,7 @@ void *block_job_create(const BlockJobDriver *driver, BlockDriverState *bs, block_job_set_speed(job, speed, &local_err); if (local_err) { - bs->job = NULL; - bdrv_op_unblock_all(bs, job->blocker); - error_free(job->blocker); - g_free(job); + block_job_release(bs); error_propagate(errp, local_err); return NULL; } @@ -77,18 +74,25 @@ void *block_job_create(const BlockJobDriver *driver, BlockDriverState *bs, return job; } -void block_job_completed(BlockJob *job, int ret) +void block_job_release(BlockDriverState *bs) { - BlockDriverState *bs = job->bs; + BlockJob *job = bs->job; - assert(bs->job == job); - job->cb(job->opaque, ret); bs->job = NULL; bdrv_op_unblock_all(bs, job->blocker); error_free(job->blocker); g_free(job); } +void block_job_completed(BlockJob *job, int ret) +{ + BlockDriverState *bs = job->bs; + + assert(bs->job == job); + job->cb(job->opaque, ret); + block_job_release(bs); +} + void block_job_set_speed(BlockJob *job, int64_t speed, Error **errp) { Error *local_err = NULL; @@ -314,6 +314,7 @@ snappy="" bzip2="" guest_agent="" guest_agent_with_vss="no" +guest_agent_ntddscsi="no" guest_agent_msi="" vss_win32_sdk="" win_sdk="no" @@ -732,7 +733,7 @@ if test "$mingw32" = "yes" ; then sysconfdir="\${prefix}" local_statedir= confsuffix="" - libs_qga="-lws2_32 -lwinmm -lpowrprof $libs_qga" + libs_qga="-lws2_32 -lwinmm -lpowrprof -liphlpapi $libs_qga" fi werror="" @@ -3862,6 +3863,26 @@ if test "$mingw32" = "yes" -a "$guest_agent" != "no" -a "$guest_agent_with_vss" fi ########################################## +# check if mingw environment provides a recent ntddscsi.h +if test "$mingw32" = "yes" -a "$guest_agent" != "no"; then + cat > $TMPC << EOF +#include <windows.h> +#include <ntddscsi.h> +int main(void) { +#if !defined(IOCTL_SCSI_GET_ADDRESS) +#error Missing required ioctl definitions +#endif + SCSI_ADDRESS addr = { .Lun = 0, .TargetId = 0, .PathId = 0 }; + return addr.Lun; +} +EOF + if compile_prog "" "" ; then + guest_agent_ntddscsi=yes + libs_qga="-lsetupapi $libs_qga" + fi +fi + +########################################## # Guest agent Window MSI package if test "$guest_agent" != yes; then @@ -4534,6 +4555,7 @@ echo "libiscsi support $libiscsi" echo "libnfs support $libnfs" echo "build guest agent $guest_agent" echo "QGA VSS support $guest_agent_with_vss" +echo "QGA w32 disk info $guest_agent_ntddscsi" echo "seccomp support $seccomp" echo "coroutine backend $coroutine" echo "coroutine pool $coroutine_pool" @@ -4610,6 +4632,9 @@ if test "$mingw32" = "yes" ; then echo "CONFIG_QGA_VSS=y" >> $config_host_mak echo "WIN_SDK=\"$win_sdk\"" >> $config_host_mak fi + if test "$guest_agent_ntddscsi" = "yes" ; then + echo "CONFIG_QGA_NTDDDISK=y" >> $config_host_mak + fi if test "$guest_agent_msi" != "no"; then echo "QEMU_GA_MSI_ENABLED=yes" >> $config_host_mak echo "QEMU_GA_MSI_MINGW_DLL_PATH=${QEMU_GA_MSI_MINGW_DLL_PATH}" >> $config_host_mak diff --git a/default-configs/i386-softmmu.mak b/default-configs/i386-softmmu.mak index 91d602c2e3..48b57623f7 100644 --- a/default-configs/i386-softmmu.mak +++ b/default-configs/i386-softmmu.mak @@ -16,6 +16,7 @@ CONFIG_PCKBD=y CONFIG_FDC=y CONFIG_ACPI=y CONFIG_ACPI_X86=y +CONFIG_ACPI_X86_ICH=y CONFIG_ACPI_MEMORY_HOTPLUG=y CONFIG_ACPI_CPU_HOTPLUG=y CONFIG_APM=y diff --git a/default-configs/x86_64-softmmu.mak b/default-configs/x86_64-softmmu.mak index 62575ebcd8..4962ed70af 100644 --- a/default-configs/x86_64-softmmu.mak +++ b/default-configs/x86_64-softmmu.mak @@ -17,6 +17,7 @@ CONFIG_PCKBD=y CONFIG_FDC=y CONFIG_ACPI=y CONFIG_ACPI_X86=y +CONFIG_ACPI_X86_ICH=y CONFIG_ACPI_MEMORY_HOTPLUG=y CONFIG_ACPI_CPU_HOTPLUG=y CONFIG_APM=y diff --git a/docs/qmp/qmp-events.txt b/docs/qmp/qmp-events.txt index 4c13d48726..d92cc4833b 100644 --- a/docs/qmp/qmp-events.txt +++ b/docs/qmp/qmp-events.txt @@ -473,6 +473,20 @@ Example: { "timestamp": {"seconds": 1290688046, "microseconds": 417172}, "event": "SPICE_MIGRATE_COMPLETED" } +MIGRATION +--------- + +Emitted when a migration event happens + +Data: None. + + - "status": migration status + See MigrationStatus in ~/qapi-schema.json for possible values + +Example: + +{"timestamp": {"seconds": 1432121972, "microseconds": 744001}, + "event": "MIGRATION", "data": {"status": "completed"}} STOP ---- diff --git a/docs/specs/ppc-spapr-hotplug.txt b/docs/specs/ppc-spapr-hotplug.txt index d35771cc2b..46e07196bb 100644 --- a/docs/specs/ppc-spapr-hotplug.txt +++ b/docs/specs/ppc-spapr-hotplug.txt @@ -284,4 +284,22 @@ struct rtas_event_log_v6_hp { } drc; } QEMU_PACKED; +== ibm,lrdr-capacity == + +ibm,lrdr-capacity is a property in the /rtas device tree node that identifies +the dynamic reconfiguration capabilities of the guest. It consists of a triple +consisting of <phys>, <size> and <maxcpus>. + + <phys>, encoded in BE format represents the maximum address in bytes and + hence the maximum memory that can be allocated to the guest. + + <size>, encoded in BE format represents the size increments in which + memory can be hot-plugged to the guest. + + <maxcpus>, a BE-encoded integer, represents the maximum number of + processors that the guest can have. + +pseries guests use this property to note the maximum allowed CPUs for the +guest. + [1] http://thread.gmane.org/gmane.linux.ports.ppc.embedded/75350/focus=106867 diff --git a/docs/specs/rocker.txt b/docs/specs/rocker.txt index 0af5c61585..1c743515c1 100644 --- a/docs/specs/rocker.txt +++ b/docs/specs/rocker.txt @@ -637,6 +637,7 @@ The TLVs for Rx descriptor buffer are: (1 << 5): TCP packet (1 << 6): UDP packet (1 << 7): TCP/UDP csum good + (1 << 8): Offload forward RX_CSUM 2 IP calculated checksum: IPv4: IP payload csum IPv6: header and payload csum @@ -645,6 +646,9 @@ The TLVs for Rx descriptor buffer are: RX_FRAG_MAX_LEN 2 Packet maximum fragment length RX_FRAG_LEN 2 Actual packet fragment length after receive +Offload forward RX_FLAG indicates the device has already forwarded the packet +so the host CPU should not also forward the packet. + Possible status return codes in descriptor on completion are: DESC_COMP_ERR reason @@ -1414,6 +1414,11 @@ static ram_addr_t ram_block_add(RAMBlock *new_block, Error **errp) } } + new_ram_size = MAX(old_ram_size, + (new_block->offset + new_block->max_length) >> TARGET_PAGE_BITS); + if (new_ram_size > old_ram_size) { + migration_bitmap_extend(old_ram_size, new_ram_size); + } /* Keep the list sorted from biggest to smallest block. Unlike QTAILQ, * QLIST (which has an RCU-friendly variant) does not have insertion at * tail, so save the last element in last_block. diff --git a/hw/acpi/Makefile.objs b/hw/acpi/Makefile.objs index 29d46d8786..7d3230c2a5 100644 --- a/hw/acpi/Makefile.objs +++ b/hw/acpi/Makefile.objs @@ -1,4 +1,5 @@ -common-obj-$(CONFIG_ACPI_X86) += core.o piix4.o ich9.o pcihp.o +common-obj-$(CONFIG_ACPI_X86) += core.o piix4.o pcihp.o +common-obj-$(CONFIG_ACPI_X86_ICH) += ich9.o tco.o common-obj-$(CONFIG_ACPI_CPU_HOTPLUG) += cpu_hotplug.o common-obj-$(CONFIG_ACPI_MEMORY_HOTPLUG) += memory_hotplug.o common-obj-$(CONFIG_ACPI) += acpi_interface.o diff --git a/hw/acpi/ich9.c b/hw/acpi/ich9.c index f4dc7a84be..5fb7a879d6 100644 --- a/hw/acpi/ich9.c +++ b/hw/acpi/ich9.c @@ -30,6 +30,7 @@ #include "qemu/timer.h" #include "sysemu/sysemu.h" #include "hw/acpi/acpi.h" +#include "hw/acpi/tco.h" #include "sysemu/kvm.h" #include "exec/address-spaces.h" @@ -92,8 +93,16 @@ static void ich9_smi_writel(void *opaque, hwaddr addr, uint64_t val, unsigned width) { ICH9LPCPMRegs *pm = opaque; + TCOIORegs *tr = &pm->tco_regs; + uint64_t tco_en; + switch (addr) { case 0: + tco_en = pm->smi_en & ICH9_PMIO_SMI_EN_TCO_EN; + /* once TCO_LOCK bit is set, TCO_EN bit cannot be overwritten */ + if (tr->tco.cnt1 & TCO_LOCK) { + val = (val & ~ICH9_PMIO_SMI_EN_TCO_EN) | tco_en; + } pm->smi_en &= ~pm->smi_en_wmask; pm->smi_en |= (val & pm->smi_en_wmask); break; @@ -159,6 +168,25 @@ static const VMStateDescription vmstate_memhp_state = { } }; +static bool vmstate_test_use_tco(void *opaque) +{ + ICH9LPCPMRegs *s = opaque; + return s->enable_tco; +} + +static const VMStateDescription vmstate_tco_io_state = { + .name = "ich9_pm/tco", + .version_id = 1, + .minimum_version_id = 1, + .minimum_version_id_old = 1, + .needed = vmstate_test_use_tco, + .fields = (VMStateField[]) { + VMSTATE_STRUCT(tco_regs, ICH9LPCPMRegs, 1, vmstate_tco_io_sts, + TCOIORegs), + VMSTATE_END_OF_LIST() + } +}; + const VMStateDescription vmstate_ich9_pm = { .name = "ich9_pm", .version_id = 1, @@ -179,6 +207,10 @@ const VMStateDescription vmstate_ich9_pm = { .subsections = (const VMStateDescription*[]) { &vmstate_memhp_state, NULL + }, + .subsections = (const VMStateDescription*[]) { + &vmstate_tco_io_state, + NULL } }; @@ -209,7 +241,8 @@ static void pm_powerdown_req(Notifier *n, void *opaque) acpi_pm1_evt_power_down(&pm->acpi_regs); } -void ich9_pm_init(PCIDevice *lpc_pci, ICH9LPCPMRegs *pm, bool smm_enabled, +void ich9_pm_init(PCIDevice *lpc_pci, ICH9LPCPMRegs *pm, + bool smm_enabled, bool enable_tco, qemu_irq sci_irq) { memory_region_init(&pm->io, OBJECT(lpc_pci), "ich9-pm", ICH9_PMIO_SIZE); @@ -232,6 +265,12 @@ void ich9_pm_init(PCIDevice *lpc_pci, ICH9LPCPMRegs *pm, bool smm_enabled, memory_region_add_subregion(&pm->io, ICH9_PMIO_SMI_EN, &pm->io_smi); pm->smm_enabled = smm_enabled; + + pm->enable_tco = enable_tco; + if (pm->enable_tco) { + acpi_pm_tco_init(&pm->tco_regs, &pm->io); + } + pm->irq = sci_irq; qemu_register_reset(pm_reset, pm); pm->powerdown_notifier.notify = pm_powerdown_req; @@ -352,6 +391,18 @@ out: error_propagate(errp, local_err); } +static bool ich9_pm_get_enable_tco(Object *obj, Error **errp) +{ + ICH9LPCState *s = ICH9_LPC_DEVICE(obj); + return s->pm.enable_tco; +} + +static void ich9_pm_set_enable_tco(Object *obj, bool value, Error **errp) +{ + ICH9LPCState *s = ICH9_LPC_DEVICE(obj); + s->pm.enable_tco = value; +} + void ich9_pm_add_properties(Object *obj, ICH9LPCPMRegs *pm, Error **errp) { static const uint32_t gpe0_len = ICH9_PMIO_GPE0_LEN; @@ -383,6 +434,10 @@ void ich9_pm_add_properties(Object *obj, ICH9LPCPMRegs *pm, Error **errp) ich9_pm_get_s4_val, ich9_pm_set_s4_val, NULL, pm, NULL); + object_property_add_bool(obj, ACPI_PM_PROP_TCO_ENABLED, + ich9_pm_get_enable_tco, + ich9_pm_set_enable_tco, + NULL); } void ich9_pm_device_plug_cb(ICH9LPCPMRegs *pm, DeviceState *dev, Error **errp) diff --git a/hw/acpi/tco.c b/hw/acpi/tco.c new file mode 100644 index 0000000000..7a026c255b --- /dev/null +++ b/hw/acpi/tco.c @@ -0,0 +1,264 @@ +/* + * QEMU ICH9 TCO emulation + * + * Copyright (c) 2015 Paulo Alcantara <pcacjr@zytor.com> + * + * This work is licensed under the terms of the GNU GPL, version 2 or later. + * See the COPYING file in the top-level directory. + */ +#include "qemu-common.h" +#include "sysemu/watchdog.h" +#include "hw/i386/ich9.h" + +#include "hw/acpi/tco.h" + +//#define DEBUG + +#ifdef DEBUG +#define TCO_DEBUG(fmt, ...) \ + do { \ + fprintf(stderr, "%s "fmt, __func__, ## __VA_ARGS__); \ + } while (0) +#else +#define TCO_DEBUG(fmt, ...) do { } while (0) +#endif + +enum { + TCO_RLD_DEFAULT = 0x0000, + TCO_DAT_IN_DEFAULT = 0x00, + TCO_DAT_OUT_DEFAULT = 0x00, + TCO1_STS_DEFAULT = 0x0000, + TCO2_STS_DEFAULT = 0x0000, + TCO1_CNT_DEFAULT = 0x0000, + TCO2_CNT_DEFAULT = 0x0008, + TCO_MESSAGE1_DEFAULT = 0x00, + TCO_MESSAGE2_DEFAULT = 0x00, + TCO_WDCNT_DEFAULT = 0x00, + TCO_TMR_DEFAULT = 0x0004, + SW_IRQ_GEN_DEFAULT = 0x03, +}; + +static inline void tco_timer_reload(TCOIORegs *tr) +{ + tr->expire_time = qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL) + + ((int64_t)(tr->tco.tmr & TCO_TMR_MASK) * TCO_TICK_NSEC); + timer_mod(tr->tco_timer, tr->expire_time); +} + +static inline void tco_timer_stop(TCOIORegs *tr) +{ + tr->expire_time = -1; +} + +static void tco_timer_expired(void *opaque) +{ + TCOIORegs *tr = opaque; + ICH9LPCPMRegs *pm = container_of(tr, ICH9LPCPMRegs, tco_regs); + ICH9LPCState *lpc = container_of(pm, ICH9LPCState, pm); + uint32_t gcs = pci_get_long(lpc->chip_config + ICH9_CC_GCS); + + tr->tco.rld = 0; + tr->tco.sts1 |= TCO_TIMEOUT; + if (++tr->timeouts_no == 2) { + tr->tco.sts2 |= TCO_SECOND_TO_STS; + tr->tco.sts2 |= TCO_BOOT_STS; + tr->timeouts_no = 0; + + if (!lpc->pin_strap.spkr_hi && !(gcs & ICH9_CC_GCS_NO_REBOOT)) { + watchdog_perform_action(); + tco_timer_stop(tr); + return; + } + } + + if (pm->smi_en & ICH9_PMIO_SMI_EN_TCO_EN) { + ich9_generate_smi(); + } else { + ich9_generate_nmi(); + } + tr->tco.rld = tr->tco.tmr; + tco_timer_reload(tr); +} + +/* NOTE: values of 0 or 1 will be ignored by ICH */ +static inline int can_start_tco_timer(TCOIORegs *tr) +{ + return !(tr->tco.cnt1 & TCO_TMR_HLT) && tr->tco.tmr > 1; +} + +static uint32_t tco_ioport_readw(TCOIORegs *tr, uint32_t addr) +{ + uint16_t rld; + + switch (addr) { + case TCO_RLD: + if (tr->expire_time != -1) { + int64_t now = qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL); + int64_t elapsed = (tr->expire_time - now) / TCO_TICK_NSEC; + rld = (uint16_t)elapsed | (tr->tco.rld & ~TCO_RLD_MASK); + } else { + rld = tr->tco.rld; + } + return rld; + case TCO_DAT_IN: + return tr->tco.din; + case TCO_DAT_OUT: + return tr->tco.dout; + case TCO1_STS: + return tr->tco.sts1; + case TCO2_STS: + return tr->tco.sts2; + case TCO1_CNT: + return tr->tco.cnt1; + case TCO2_CNT: + return tr->tco.cnt2; + case TCO_MESSAGE1: + return tr->tco.msg1; + case TCO_MESSAGE2: + return tr->tco.msg2; + case TCO_WDCNT: + return tr->tco.wdcnt; + case TCO_TMR: + return tr->tco.tmr; + case SW_IRQ_GEN: + return tr->sw_irq_gen; + } + return 0; +} + +static void tco_ioport_writew(TCOIORegs *tr, uint32_t addr, uint32_t val) +{ + switch (addr) { + case TCO_RLD: + tr->timeouts_no = 0; + if (can_start_tco_timer(tr)) { + tr->tco.rld = tr->tco.tmr; + tco_timer_reload(tr); + } else { + tr->tco.rld = val; + } + break; + case TCO_DAT_IN: + tr->tco.din = val; + tr->tco.sts1 |= SW_TCO_SMI; + ich9_generate_smi(); + break; + case TCO_DAT_OUT: + tr->tco.dout = val; + tr->tco.sts1 |= TCO_INT_STS; + /* TODO: cause an interrupt, as selected by the TCO_INT_SEL bits */ + break; + case TCO1_STS: + tr->tco.sts1 = val & TCO1_STS_MASK; + break; + case TCO2_STS: + tr->tco.sts2 = val & TCO2_STS_MASK; + break; + case TCO1_CNT: + val &= TCO1_CNT_MASK; + /* + * once TCO_LOCK bit is set, it can not be cleared by software. a reset + * is required to change this bit from 1 to 0 -- it defaults to 0. + */ + tr->tco.cnt1 = val | (tr->tco.cnt1 & TCO_LOCK); + if (can_start_tco_timer(tr)) { + tr->tco.rld = tr->tco.tmr; + tco_timer_reload(tr); + } else { + tco_timer_stop(tr); + } + break; + case TCO2_CNT: + tr->tco.cnt2 = val; + break; + case TCO_MESSAGE1: + tr->tco.msg1 = val; + break; + case TCO_MESSAGE2: + tr->tco.msg2 = val; + break; + case TCO_WDCNT: + tr->tco.wdcnt = val; + break; + case TCO_TMR: + tr->tco.tmr = val; + break; + case SW_IRQ_GEN: + tr->sw_irq_gen = val; + break; + } +} + +static uint64_t tco_io_readw(void *opaque, hwaddr addr, unsigned width) +{ + TCOIORegs *tr = opaque; + return tco_ioport_readw(tr, addr); +} + +static void tco_io_writew(void *opaque, hwaddr addr, uint64_t val, + unsigned width) +{ + TCOIORegs *tr = opaque; + tco_ioport_writew(tr, addr, val); +} + +static const MemoryRegionOps tco_io_ops = { + .read = tco_io_readw, + .write = tco_io_writew, + .valid.min_access_size = 1, + .valid.max_access_size = 4, + .impl.min_access_size = 1, + .impl.max_access_size = 2, + .endianness = DEVICE_LITTLE_ENDIAN, +}; + +void acpi_pm_tco_init(TCOIORegs *tr, MemoryRegion *parent) +{ + *tr = (TCOIORegs) { + .tco = { + .rld = TCO_RLD_DEFAULT, + .din = TCO_DAT_IN_DEFAULT, + .dout = TCO_DAT_OUT_DEFAULT, + .sts1 = TCO1_STS_DEFAULT, + .sts2 = TCO2_STS_DEFAULT, + .cnt1 = TCO1_CNT_DEFAULT, + .cnt2 = TCO2_CNT_DEFAULT, + .msg1 = TCO_MESSAGE1_DEFAULT, + .msg2 = TCO_MESSAGE2_DEFAULT, + .wdcnt = TCO_WDCNT_DEFAULT, + .tmr = TCO_TMR_DEFAULT, + }, + .sw_irq_gen = SW_IRQ_GEN_DEFAULT, + .tco_timer = timer_new_ns(QEMU_CLOCK_VIRTUAL, tco_timer_expired, tr), + .expire_time = -1, + .timeouts_no = 0, + }; + memory_region_init_io(&tr->io, memory_region_owner(parent), + &tco_io_ops, tr, "sm-tco", ICH9_PMIO_TCO_LEN); + memory_region_add_subregion(parent, ICH9_PMIO_TCO_RLD, &tr->io); +} + +const VMStateDescription vmstate_tco_io_sts = { + .name = "tco io device status", + .version_id = 1, + .minimum_version_id = 1, + .minimum_version_id_old = 1, + .fields = (VMStateField[]) { + VMSTATE_UINT16(tco.rld, TCOIORegs), + VMSTATE_UINT8(tco.din, TCOIORegs), + VMSTATE_UINT8(tco.dout, TCOIORegs), + VMSTATE_UINT16(tco.sts1, TCOIORegs), + VMSTATE_UINT16(tco.sts2, TCOIORegs), + VMSTATE_UINT16(tco.cnt1, TCOIORegs), + VMSTATE_UINT16(tco.cnt2, TCOIORegs), + VMSTATE_UINT8(tco.msg1, TCOIORegs), + VMSTATE_UINT8(tco.msg2, TCOIORegs), + VMSTATE_UINT8(tco.wdcnt, TCOIORegs), + VMSTATE_UINT16(tco.tmr, TCOIORegs), + VMSTATE_UINT8(sw_irq_gen, TCOIORegs), + VMSTATE_TIMER_PTR(tco_timer, TCOIORegs), + VMSTATE_INT64(expire_time, TCOIORegs), + VMSTATE_UINT8(timeouts_no, TCOIORegs), + VMSTATE_END_OF_LIST() + } +}; diff --git a/hw/char/spapr_vty.c b/hw/char/spapr_vty.c index 4e464bd15a..36b328b9af 100644 --- a/hw/char/spapr_vty.c +++ b/hw/char/spapr_vty.c @@ -74,7 +74,7 @@ static void spapr_vty_realize(VIOsPAPRDevice *sdev, Error **errp) } /* Forward declaration */ -static target_ulong h_put_term_char(PowerPCCPU *cpu, sPAPREnvironment *spapr, +static target_ulong h_put_term_char(PowerPCCPU *cpu, sPAPRMachineState *spapr, target_ulong opcode, target_ulong *args) { target_ulong reg = args[0]; @@ -101,7 +101,7 @@ static target_ulong h_put_term_char(PowerPCCPU *cpu, sPAPREnvironment *spapr, return H_SUCCESS; } -static target_ulong h_get_term_char(PowerPCCPU *cpu, sPAPREnvironment *spapr, +static target_ulong h_get_term_char(PowerPCCPU *cpu, sPAPRMachineState *spapr, target_ulong opcode, target_ulong *args) { target_ulong reg = args[0]; @@ -193,7 +193,7 @@ VIOsPAPRDevice *spapr_vty_get_default(VIOsPAPRBus *bus) DeviceState *iter = kid->child; /* Only look at VTY devices */ - if (!object_dynamic_cast(OBJECT(iter), "spapr-vty")) { + if (!object_dynamic_cast(OBJECT(iter), TYPE_VIO_SPAPR_VTY_DEVICE)) { continue; } @@ -214,7 +214,7 @@ VIOsPAPRDevice *spapr_vty_get_default(VIOsPAPRBus *bus) return selected; } -VIOsPAPRDevice *vty_lookup(sPAPREnvironment *spapr, target_ulong reg) +VIOsPAPRDevice *vty_lookup(sPAPRMachineState *spapr, target_ulong reg) { VIOsPAPRDevice *sdev; @@ -228,6 +228,10 @@ VIOsPAPRDevice *vty_lookup(sPAPREnvironment *spapr, target_ulong reg) return spapr_vty_get_default(spapr->vio_bus); } + if (!object_dynamic_cast(OBJECT(sdev), TYPE_VIO_SPAPR_VTY_DEVICE)) { + return NULL; + } + return sdev; } diff --git a/hw/core/sysbus.c b/hw/core/sysbus.c index 278a2d1bdd..3c58629894 100644 --- a/hw/core/sysbus.c +++ b/hw/core/sysbus.c @@ -109,7 +109,13 @@ qemu_irq sysbus_get_connected_irq(SysBusDevice *dev, int n) void sysbus_connect_irq(SysBusDevice *dev, int n, qemu_irq irq) { + SysBusDeviceClass *sbd = SYS_BUS_DEVICE_GET_CLASS(dev); + qdev_connect_gpio_out_named(DEVICE(dev), SYSBUS_DEVICE_GPIO_IRQ, n, irq); + + if (sbd->connect_irq_notifier) { + sbd->connect_irq_notifier(dev, irq); + } } /* Check whether an MMIO region exists */ diff --git a/hw/display/virtio-gpu-pci.c b/hw/display/virtio-gpu-pci.c index f0f25c7bc9..5bc62cf344 100644 --- a/hw/display/virtio-gpu-pci.c +++ b/hw/display/virtio-gpu-pci.c @@ -17,7 +17,6 @@ #include "hw/virtio/virtio-gpu.h" static Property virtio_gpu_pci_properties[] = { - DEFINE_VIRTIO_GPU_PROPERTIES(VirtIOGPUPCI, vdev.conf), DEFINE_VIRTIO_GPU_PCI_PROPERTIES(VirtIOPCIProxy), DEFINE_PROP_END_OF_LIST(), }; @@ -25,13 +24,21 @@ static Property virtio_gpu_pci_properties[] = { static void virtio_gpu_pci_realize(VirtIOPCIProxy *vpci_dev, Error **errp) { VirtIOGPUPCI *vgpu = VIRTIO_GPU_PCI(vpci_dev); + VirtIOGPU *g = &vgpu->vdev; DeviceState *vdev = DEVICE(&vgpu->vdev); + int i; qdev_set_parent_bus(vdev, BUS(&vpci_dev->bus)); /* force virtio-1.0 */ vpci_dev->flags &= ~VIRTIO_PCI_FLAG_DISABLE_MODERN; vpci_dev->flags |= VIRTIO_PCI_FLAG_DISABLE_LEGACY; object_property_set_bool(OBJECT(vdev), true, "realized", errp); + + for (i = 0; i < g->conf.max_outputs; i++) { + object_property_set_link(OBJECT(g->scanout[i].con), + OBJECT(vpci_dev), + "device", errp); + } } static void virtio_gpu_pci_class_init(ObjectClass *klass, void *data) @@ -49,8 +56,9 @@ static void virtio_gpu_pci_class_init(ObjectClass *klass, void *data) static void virtio_gpu_initfn(Object *obj) { VirtIOGPUPCI *dev = VIRTIO_GPU_PCI(obj); - object_initialize(&dev->vdev, sizeof(dev->vdev), TYPE_VIRTIO_GPU); - object_property_add_child(obj, "virtio-backend", OBJECT(&dev->vdev), NULL); + + virtio_instance_init_common(obj, &dev->vdev, sizeof(dev->vdev), + TYPE_VIRTIO_GPU); } static const TypeInfo virtio_gpu_pci_info = { diff --git a/hw/display/virtio-gpu.c b/hw/display/virtio-gpu.c index 8c109b79f4..990a26b850 100644 --- a/hw/display/virtio-gpu.c +++ b/hw/display/virtio-gpu.c @@ -871,7 +871,7 @@ static void virtio_gpu_reset(VirtIODevice *vdev) } static Property virtio_gpu_properties[] = { - DEFINE_VIRTIO_GPU_PROPERTIES(VirtIOGPU, conf), + DEFINE_PROP_UINT32("max_outputs", VirtIOGPU, conf.max_outputs, 1), DEFINE_PROP_END_OF_LIST(), }; diff --git a/hw/display/virtio-vga.c b/hw/display/virtio-vga.c index 94f9d0eb5a..f7e539fe93 100644 --- a/hw/display/virtio-vga.c +++ b/hw/display/virtio-vga.c @@ -79,6 +79,7 @@ static void virtio_vga_realize(VirtIOPCIProxy *vpci_dev, Error **errp) VirtIOGPU *g = &vvga->vdev; VGACommonState *vga = &vvga->vga; uint32_t offset; + int i; /* init vga compat bits */ vga->vram_size_mb = 8; @@ -120,6 +121,12 @@ static void virtio_vga_realize(VirtIOPCIProxy *vpci_dev, Error **errp) vga->con = g->scanout[0].con; graphic_console_set_hwops(vga->con, &virtio_vga_ops, vvga); + + for (i = 0; i < g->conf.max_outputs; i++) { + object_property_set_link(OBJECT(g->scanout[i].con), + OBJECT(vpci_dev), + "device", errp); + } } static void virtio_vga_reset(DeviceState *dev) @@ -131,7 +138,6 @@ static void virtio_vga_reset(DeviceState *dev) } static Property virtio_vga_properties[] = { - DEFINE_VIRTIO_GPU_PROPERTIES(VirtIOVGA, vdev.conf), DEFINE_VIRTIO_GPU_PCI_PROPERTIES(VirtIOPCIProxy), DEFINE_PROP_END_OF_LIST(), }; @@ -155,8 +161,9 @@ static void virtio_vga_class_init(ObjectClass *klass, void *data) static void virtio_vga_inst_initfn(Object *obj) { VirtIOVGA *dev = VIRTIO_VGA(obj); - object_initialize(&dev->vdev, sizeof(dev->vdev), TYPE_VIRTIO_GPU); - object_property_add_child(obj, "virtio-backend", OBJECT(&dev->vdev), NULL); + + virtio_instance_init_common(obj, &dev->vdev, sizeof(dev->vdev), + TYPE_VIRTIO_GPU); } static TypeInfo virtio_vga_info = { diff --git a/hw/i386/pc.c b/hw/i386/pc.c index 7959b44b6b..7661ea9cdf 100644 --- a/hw/i386/pc.c +++ b/hw/i386/pc.c @@ -293,11 +293,82 @@ static void pc_boot_set(void *opaque, const char *boot_device, Error **errp) set_boot_dev(opaque, boot_device, errp); } +static void pc_cmos_init_floppy(ISADevice *rtc_state, ISADevice *floppy) +{ + int val, nb, i; + FDriveType fd_type[2] = { FDRIVE_DRV_NONE, FDRIVE_DRV_NONE }; + + /* floppy type */ + if (floppy) { + for (i = 0; i < 2; i++) { + fd_type[i] = isa_fdc_get_drive_type(floppy, i); + } + } + val = (cmos_get_fd_drive_type(fd_type[0]) << 4) | + cmos_get_fd_drive_type(fd_type[1]); + rtc_set_memory(rtc_state, 0x10, val); + + val = rtc_get_memory(rtc_state, REG_EQUIPMENT_BYTE); + nb = 0; + if (fd_type[0] < FDRIVE_DRV_NONE) { + nb++; + } + if (fd_type[1] < FDRIVE_DRV_NONE) { + nb++; + } + switch (nb) { + case 0: + break; + case 1: + val |= 0x01; /* 1 drive, ready for boot */ + break; + case 2: + val |= 0x41; /* 2 drives, ready for boot */ + break; + } + rtc_set_memory(rtc_state, REG_EQUIPMENT_BYTE, val); +} + typedef struct pc_cmos_init_late_arg { ISADevice *rtc_state; BusState *idebus[2]; } pc_cmos_init_late_arg; +typedef struct check_fdc_state { + ISADevice *floppy; + bool multiple; +} CheckFdcState; + +static int check_fdc(Object *obj, void *opaque) +{ + CheckFdcState *state = opaque; + Object *fdc; + uint32_t iobase; + Error *local_err = NULL; + + fdc = object_dynamic_cast(obj, TYPE_ISA_FDC); + if (!fdc) { + return 0; + } + + iobase = object_property_get_int(obj, "iobase", &local_err); + if (local_err || iobase != 0x3f0) { + error_free(local_err); + return 0; + } + + if (state->floppy) { + state->multiple = true; + } else { + state->floppy = ISA_DEVICE(obj); + } + return 0; +} + +static const char * const fdc_container_path[] = { + "/unattached", "/peripheral", "/peripheral-anon" +}; + static void pc_cmos_init_late(void *opaque) { pc_cmos_init_late_arg *arg = opaque; @@ -306,6 +377,8 @@ static void pc_cmos_init_late(void *opaque) int8_t heads, sectors; int val; int i, trans; + Object *container; + CheckFdcState state = { 0 }; val = 0; if (ide_get_geometry(arg->idebus[0], 0, @@ -335,16 +408,32 @@ static void pc_cmos_init_late(void *opaque) } rtc_set_memory(s, 0x39, val); + /* + * Locate the FDC at IO address 0x3f0, and configure the CMOS registers + * accordingly. + */ + for (i = 0; i < ARRAY_SIZE(fdc_container_path); i++) { + container = container_get(qdev_get_machine(), fdc_container_path[i]); + object_child_foreach(container, check_fdc, &state); + } + + if (state.multiple) { + error_report("warning: multiple floppy disk controllers with " + "iobase=0x3f0 have been found;\n" + "the one being picked for CMOS setup might not reflect " + "your intent"); + } + pc_cmos_init_floppy(s, state.floppy); + qemu_unregister_reset(pc_cmos_init_late, opaque); } void pc_cmos_init(ram_addr_t ram_size, ram_addr_t above_4g_mem_size, const char *boot_device, MachineState *machine, - ISADevice *floppy, BusState *idebus0, BusState *idebus1, + BusState *idebus0, BusState *idebus1, ISADevice *s) { - int val, nb, i; - FDriveType fd_type[2] = { FDRIVE_DRV_NONE, FDRIVE_DRV_NONE }; + int val; static pc_cmos_init_late_arg arg; PCMachineState *pc_machine = PC_MACHINE(machine); Error *local_err = NULL; @@ -401,39 +490,12 @@ void pc_cmos_init(ram_addr_t ram_size, ram_addr_t above_4g_mem_size, exit(1); } - /* floppy type */ - if (floppy) { - for (i = 0; i < 2; i++) { - fd_type[i] = isa_fdc_get_drive_type(floppy, i); - } - } - val = (cmos_get_fd_drive_type(fd_type[0]) << 4) | - cmos_get_fd_drive_type(fd_type[1]); - rtc_set_memory(s, 0x10, val); - val = 0; - nb = 0; - if (fd_type[0] < FDRIVE_DRV_NONE) { - nb++; - } - if (fd_type[1] < FDRIVE_DRV_NONE) { - nb++; - } - switch (nb) { - case 0: - break; - case 1: - val |= 0x01; /* 1 drive, ready for boot */ - break; - case 2: - val |= 0x41; /* 2 drives, ready for boot */ - break; - } val |= 0x02; /* FPU is there */ val |= 0x04; /* PS/2 mouse installed */ rtc_set_memory(s, REG_EQUIPMENT_BYTE, val); - /* hard drives */ + /* hard drives and FDC */ arg.rtc_state = s; arg.idebus[0] = idebus0; arg.idebus[1] = idebus1; @@ -1401,7 +1463,6 @@ static const MemoryRegionOps ioportF0_io_ops = { void pc_basic_device_init(ISABus *isa_bus, qemu_irq *gsi, ISADevice **rtc_state, bool create_fdctrl, - ISADevice **floppy, bool no_vmport, uint32 hpet_irqs) { @@ -1497,7 +1558,9 @@ void pc_basic_device_init(ISABus *isa_bus, qemu_irq *gsi, fd[i] = drive_get(IF_FLOPPY, 0, i); create_fdctrl |= !!fd[i]; } - *floppy = create_fdctrl ? fdctrl_init_isa(isa_bus, fd) : NULL; + if (create_fdctrl) { + fdctrl_init_isa(isa_bus, fd); + } } void pc_nic_init(ISABus *isa_bus, PCIBus *pci_bus) diff --git a/hw/i386/pc_piix.c b/hw/i386/pc_piix.c index 56cdcb9661..8167b122f0 100644 --- a/hw/i386/pc_piix.c +++ b/hw/i386/pc_piix.c @@ -94,7 +94,6 @@ static void pc_init1(MachineState *machine) DriveInfo *hd[MAX_IDE_BUS * MAX_IDE_DEVS]; BusState *idebus[MAX_IDE_BUS]; ISADevice *rtc_state; - ISADevice *floppy; MemoryRegion *ram_memory; MemoryRegion *pci_memory; MemoryRegion *rom_memory; @@ -241,7 +240,7 @@ static void pc_init1(MachineState *machine) } /* init basic PC hardware */ - pc_basic_device_init(isa_bus, gsi, &rtc_state, true, &floppy, + pc_basic_device_init(isa_bus, gsi, &rtc_state, true, (pc_machine->vmport != ON_OFF_AUTO_ON), 0x4); pc_nic_init(isa_bus, pci_bus); @@ -273,7 +272,7 @@ static void pc_init1(MachineState *machine) } pc_cmos_init(below_4g_mem_size, above_4g_mem_size, machine->boot_order, - machine, floppy, idebus[0], idebus[1], rtc_state); + machine, idebus[0], idebus[1], rtc_state); if (pci_enabled && usb_enabled()) { pci_create_simple(pci_bus, piix3_devfn + 2, "piix3-usb-uhci"); @@ -312,6 +311,8 @@ static void pc_compat_2_3(MachineState *machine) if (kvm_enabled()) { pcms->smm = ON_OFF_AUTO_OFF; } + global_state_set_optional(); + savevm_skip_configuration(); } static void pc_compat_2_2(MachineState *machine) diff --git a/hw/i386/pc_q35.c b/hw/i386/pc_q35.c index 8aa3a67fdf..974aead5a9 100644 --- a/hw/i386/pc_q35.c +++ b/hw/i386/pc_q35.c @@ -73,7 +73,6 @@ static void pc_q35_init(MachineState *machine) PCIDevice *lpc; BusState *idebus[MAX_SATA_PORTS]; ISADevice *rtc_state; - ISADevice *floppy; MemoryRegion *pci_memory; MemoryRegion *rom_memory; MemoryRegion *ram_memory; @@ -249,11 +248,11 @@ static void pc_q35_init(MachineState *machine) } /* init basic PC hardware */ - pc_basic_device_init(isa_bus, gsi, &rtc_state, !mc->no_floppy, &floppy, + pc_basic_device_init(isa_bus, gsi, &rtc_state, !mc->no_floppy, (pc_machine->vmport != ON_OFF_AUTO_ON), 0xff0104); /* connect pm stuff to lpc */ - ich9_lpc_pm_init(lpc, pc_machine_is_smm_enabled(pc_machine)); + ich9_lpc_pm_init(lpc, pc_machine_is_smm_enabled(pc_machine), !mc->no_tco); /* ahci and SATA device, for q35 1 ahci controller is built-in */ ahci = pci_create_simple_multifunction(host_bus, @@ -278,7 +277,7 @@ static void pc_q35_init(MachineState *machine) 8, NULL, 0); pc_cmos_init(below_4g_mem_size, above_4g_mem_size, machine->boot_order, - machine, floppy, idebus[0], idebus[1], rtc_state); + machine, idebus[0], idebus[1], rtc_state); /* the rest devices to which pci devfn is automatically assigned */ pc_vga_init(isa_bus, host_bus); @@ -295,6 +294,8 @@ static void pc_compat_2_3(MachineState *machine) if (kvm_enabled()) { pcms->smm = ON_OFF_AUTO_OFF; } + global_state_set_optional(); + savevm_skip_configuration(); } static void pc_compat_2_2(MachineState *machine) @@ -397,6 +398,7 @@ static void pc_q35_2_4_machine_options(MachineClass *m) m->default_machine_opts = "firmware=bios-256k.bin"; m->default_display = "std"; m->no_floppy = 1; + m->no_tco = 0; m->alias = "q35"; } @@ -408,6 +410,7 @@ static void pc_q35_2_3_machine_options(MachineClass *m) { pc_q35_2_4_machine_options(m); m->no_floppy = 0; + m->no_tco = 1; m->alias = NULL; SET_MACHINE_COMPAT(m, PC_COMPAT_2_3); } diff --git a/hw/ide/ahci.h b/hw/ide/ahci.h index 9f5b4d20b5..68d5074b33 100644 --- a/hw/ide/ahci.h +++ b/hw/ide/ahci.h @@ -166,7 +166,7 @@ #define AHCI_CMD_HDR_CMD_FIS_LEN 0x1f #define AHCI_CMD_HDR_PRDT_LEN 16 -#define SATA_SIGNATURE_CDROM 0xeb140000 +#define SATA_SIGNATURE_CDROM 0xeb140101 #define SATA_SIGNATURE_DISK 0x00000101 #define AHCI_GENERIC_HOST_CONTROL_REGS_MAX_ADDR 0x20 diff --git a/hw/intc/arm_gic_kvm.c b/hw/intc/arm_gic_kvm.c index 2cb7d255d2..f56bff1afb 100644 --- a/hw/intc/arm_gic_kvm.c +++ b/hw/intc/arm_gic_kvm.c @@ -570,6 +570,12 @@ static void kvm_arm_gic_realize(DeviceState *dev, Error **errp) */ i += (GIC_INTERNAL * s->num_cpu); qdev_init_gpio_in(dev, kvm_arm_gic_set_irq, i); + + for (i = 0; i < s->num_irq - GIC_INTERNAL; i++) { + qemu_irq irq = qdev_get_gpio_in(dev, i); + kvm_irqchip_set_qemuirq_gsi(kvm_state, irq, i); + } + /* We never use our outbound IRQ/FIQ lines but provide them so that * we maintain the same interface as the non-KVM GIC. */ diff --git a/hw/intc/xics.c b/hw/intc/xics.c index 0fd2a84c7b..924b1ae3c9 100644 --- a/hw/intc/xics.c +++ b/hw/intc/xics.c @@ -806,7 +806,7 @@ void xics_free(XICSState *icp, int irq, int num) * Guest interfaces */ -static target_ulong h_cppr(PowerPCCPU *cpu, sPAPREnvironment *spapr, +static target_ulong h_cppr(PowerPCCPU *cpu, sPAPRMachineState *spapr, target_ulong opcode, target_ulong *args) { CPUState *cs = CPU(cpu); @@ -816,7 +816,7 @@ static target_ulong h_cppr(PowerPCCPU *cpu, sPAPREnvironment *spapr, return H_SUCCESS; } -static target_ulong h_ipi(PowerPCCPU *cpu, sPAPREnvironment *spapr, +static target_ulong h_ipi(PowerPCCPU *cpu, sPAPRMachineState *spapr, target_ulong opcode, target_ulong *args) { target_ulong server = get_cpu_index_by_dt_id(args[0]); @@ -830,7 +830,7 @@ static target_ulong h_ipi(PowerPCCPU *cpu, sPAPREnvironment *spapr, return H_SUCCESS; } -static target_ulong h_xirr(PowerPCCPU *cpu, sPAPREnvironment *spapr, +static target_ulong h_xirr(PowerPCCPU *cpu, sPAPRMachineState *spapr, target_ulong opcode, target_ulong *args) { CPUState *cs = CPU(cpu); @@ -840,7 +840,7 @@ static target_ulong h_xirr(PowerPCCPU *cpu, sPAPREnvironment *spapr, return H_SUCCESS; } -static target_ulong h_xirr_x(PowerPCCPU *cpu, sPAPREnvironment *spapr, +static target_ulong h_xirr_x(PowerPCCPU *cpu, sPAPRMachineState *spapr, target_ulong opcode, target_ulong *args) { CPUState *cs = CPU(cpu); @@ -852,7 +852,7 @@ static target_ulong h_xirr_x(PowerPCCPU *cpu, sPAPREnvironment *spapr, return H_SUCCESS; } -static target_ulong h_eoi(PowerPCCPU *cpu, sPAPREnvironment *spapr, +static target_ulong h_eoi(PowerPCCPU *cpu, sPAPRMachineState *spapr, target_ulong opcode, target_ulong *args) { CPUState *cs = CPU(cpu); @@ -862,7 +862,7 @@ static target_ulong h_eoi(PowerPCCPU *cpu, sPAPREnvironment *spapr, return H_SUCCESS; } -static target_ulong h_ipoll(PowerPCCPU *cpu, sPAPREnvironment *spapr, +static target_ulong h_ipoll(PowerPCCPU *cpu, sPAPRMachineState *spapr, target_ulong opcode, target_ulong *args) { CPUState *cs = CPU(cpu); @@ -874,7 +874,7 @@ static target_ulong h_ipoll(PowerPCCPU *cpu, sPAPREnvironment *spapr, return H_SUCCESS; } -static void rtas_set_xive(PowerPCCPU *cpu, sPAPREnvironment *spapr, +static void rtas_set_xive(PowerPCCPU *cpu, sPAPRMachineState *spapr, uint32_t token, uint32_t nargs, target_ulong args, uint32_t nret, target_ulong rets) @@ -902,7 +902,7 @@ static void rtas_set_xive(PowerPCCPU *cpu, sPAPREnvironment *spapr, rtas_st(rets, 0, RTAS_OUT_SUCCESS); } -static void rtas_get_xive(PowerPCCPU *cpu, sPAPREnvironment *spapr, +static void rtas_get_xive(PowerPCCPU *cpu, sPAPRMachineState *spapr, uint32_t token, uint32_t nargs, target_ulong args, uint32_t nret, target_ulong rets) @@ -927,7 +927,7 @@ static void rtas_get_xive(PowerPCCPU *cpu, sPAPREnvironment *spapr, rtas_st(rets, 2, ics->irqs[nr - ics->offset].priority); } -static void rtas_int_off(PowerPCCPU *cpu, sPAPREnvironment *spapr, +static void rtas_int_off(PowerPCCPU *cpu, sPAPRMachineState *spapr, uint32_t token, uint32_t nargs, target_ulong args, uint32_t nret, target_ulong rets) @@ -953,7 +953,7 @@ static void rtas_int_off(PowerPCCPU *cpu, sPAPREnvironment *spapr, rtas_st(rets, 0, RTAS_OUT_SUCCESS); } -static void rtas_int_on(PowerPCCPU *cpu, sPAPREnvironment *spapr, +static void rtas_int_on(PowerPCCPU *cpu, sPAPRMachineState *spapr, uint32_t token, uint32_t nargs, target_ulong args, uint32_t nret, target_ulong rets) diff --git a/hw/intc/xics_kvm.c b/hw/intc/xics_kvm.c index c15453f26f..d58729cfae 100644 --- a/hw/intc/xics_kvm.c +++ b/hw/intc/xics_kvm.c @@ -331,6 +331,15 @@ static void xics_kvm_cpu_setup(XICSState *icp, PowerPCCPU *cpu) abort(); } + /* + * If we are reusing a parked vCPU fd corresponding to the CPU + * which was hot-removed earlier we don't have to renable + * KVM_CAP_IRQ_XICS capability again. + */ + if (ss->cap_irq_xics_enabled) { + return; + } + if (icpkvm->kernel_xics_fd != -1) { int ret; @@ -343,6 +352,7 @@ static void xics_kvm_cpu_setup(XICSState *icp, PowerPCCPU *cpu) kvm_arch_vcpu_id(cs), strerror(errno)); exit(1); } + ss->cap_irq_xics_enabled = true; } } @@ -368,7 +378,7 @@ static void xics_kvm_set_nr_servers(XICSState *icp, uint32_t nr_servers, } } -static void rtas_dummy(PowerPCCPU *cpu, sPAPREnvironment *spapr, +static void rtas_dummy(PowerPCCPU *cpu, sPAPRMachineState *spapr, uint32_t token, uint32_t nargs, target_ulong args, uint32_t nret, target_ulong rets) diff --git a/hw/isa/lpc_ich9.c b/hw/isa/lpc_ich9.c index bd655b8405..360699f6fd 100644 --- a/hw/isa/lpc_ich9.c +++ b/hw/isa/lpc_ich9.c @@ -138,6 +138,7 @@ static void ich9_cc_reset(ICH9LPCState *lpc) pci_set_long(c + ICH9_CC_D27IR, ICH9_CC_DIR_DEFAULT); pci_set_long(c + ICH9_CC_D26IR, ICH9_CC_DIR_DEFAULT); pci_set_long(c + ICH9_CC_D25IR, ICH9_CC_DIR_DEFAULT); + pci_set_long(c + ICH9_CC_GCS, ICH9_CC_GCS_DEFAULT); ich9_cc_update(lpc); } @@ -313,6 +314,16 @@ PCIINTxRoute ich9_route_intx_pin_to_irq(void *opaque, int pirq_pin) return route; } +void ich9_generate_smi(void) +{ + cpu_interrupt(first_cpu, CPU_INTERRUPT_SMI); +} + +void ich9_generate_nmi(void) +{ + cpu_interrupt(first_cpu, CPU_INTERRUPT_NMI); +} + static int ich9_lpc_sci_irq(ICH9LPCState *lpc) { switch (lpc->d.config[ICH9_LPC_ACPI_CTRL] & @@ -357,13 +368,13 @@ static void ich9_set_sci(void *opaque, int irq_num, int level) } } -void ich9_lpc_pm_init(PCIDevice *lpc_pci, bool smm_enabled) +void ich9_lpc_pm_init(PCIDevice *lpc_pci, bool smm_enabled, bool enable_tco) { ICH9LPCState *lpc = ICH9_LPC_DEVICE(lpc_pci); qemu_irq sci_irq; sci_irq = qemu_allocate_irq(ich9_set_sci, lpc, 0); - ich9_pm_init(lpc_pci, &lpc->pm, smm_enabled, sci_irq); + ich9_pm_init(lpc_pci, &lpc->pm, smm_enabled, enable_tco, sci_irq); ich9_lpc_reset(&lpc->d.qdev); } @@ -681,6 +692,11 @@ static const VMStateDescription vmstate_ich9_lpc = { } }; +static Property ich9_lpc_properties[] = { + DEFINE_PROP_BOOL("noreboot", ICH9LPCState, pin_strap.spkr_hi, true), + DEFINE_PROP_END_OF_LIST(), +}; + static void ich9_lpc_class_init(ObjectClass *klass, void *data) { DeviceClass *dc = DEVICE_CLASS(klass); @@ -692,6 +708,7 @@ static void ich9_lpc_class_init(ObjectClass *klass, void *data) dc->reset = ich9_lpc_reset; k->init = ich9_lpc_init; dc->vmsd = &vmstate_ich9_lpc; + dc->props = ich9_lpc_properties; k->config_write = ich9_lpc_config_write; dc->desc = "ICH9 LPC bridge"; k->vendor_id = PCI_VENDOR_ID_INTEL; diff --git a/hw/misc/macio/macio.c b/hw/misc/macio/macio.c index e9037b0c39..e3c0242d41 100644 --- a/hw/misc/macio/macio.c +++ b/hw/misc/macio/macio.c @@ -132,8 +132,6 @@ static void macio_common_realize(PCIDevice *d, Error **errp) SysBusDevice *sysbus_dev; Error *err = NULL; - d->config[0x3d] = 0x01; // interrupt on pin 1 - object_property_set_bool(OBJECT(&s->cuda), true, "realized", &err); if (err) { error_propagate(errp, err); diff --git a/hw/net/e1000.c b/hw/net/e1000.c index bab8e2abfb..5c6bcd0014 100644 --- a/hw/net/e1000.c +++ b/hw/net/e1000.c @@ -185,6 +185,9 @@ e1000_link_up(E1000State *s) { s->mac_reg[STATUS] |= E1000_STATUS_LU; s->phy_reg[PHY_STATUS] |= MII_SR_LINK_STATUS; + + /* E1000_STATUS_LU is tested by e1000_can_receive() */ + qemu_flush_queued_packets(qemu_get_queue(s->nic)); } static bool diff --git a/hw/net/rocker/rocker.c b/hw/net/rocker/rocker.c index 4d25842509..47d080fd33 100644 --- a/hw/net/rocker/rocker.c +++ b/hw/net/rocker/rocker.c @@ -96,7 +96,7 @@ World *rocker_get_world(Rocker *r, enum rocker_world_type type) RockerSwitch *qmp_query_rocker(const char *name, Error **errp) { - RockerSwitch *rocker = g_malloc0(sizeof(*rocker)); + RockerSwitch *rocker; Rocker *r; r = rocker_find(name); @@ -106,6 +106,7 @@ RockerSwitch *qmp_query_rocker(const char *name, Error **errp) return NULL; } + rocker = g_new0(RockerSwitch, 1); rocker->name = g_strdup(r->name); rocker->id = r->switch_id; rocker->ports = r->fp_ports; @@ -192,11 +193,13 @@ static int tx_consume(Rocker *r, DescInfo *info) if (!tlvs[ROCKER_TLV_TX_L3_CSUM_OFF]) { return -ROCKER_EINVAL; } + break; case ROCKER_TX_OFFLOAD_TSO: if (!tlvs[ROCKER_TLV_TX_TSO_MSS] || !tlvs[ROCKER_TLV_TX_TSO_HDR_LEN]) { return -ROCKER_EINVAL; } + break; } if (tlvs[ROCKER_TLV_TX_L3_CSUM_OFF]) { @@ -600,7 +603,7 @@ static DescRing *rocker_get_rx_ring_by_pport(Rocker *r, } int rx_produce(World *world, uint32_t pport, - const struct iovec *iov, int iovcnt) + const struct iovec *iov, int iovcnt, uint8_t copy_to_cpu) { Rocker *r = world_rocker(world); PCIDevice *dev = (PCIDevice *)r; @@ -643,6 +646,10 @@ int rx_produce(World *world, uint32_t pport, goto out; } + if (copy_to_cpu) { + rx_flags |= ROCKER_RX_FLAGS_FWD_OFFLOAD; + } + /* XXX calc rx flags/csum */ tlv_size = rocker_tlv_total_size(sizeof(uint16_t)) + /* flags */ diff --git a/hw/net/rocker/rocker.h b/hw/net/rocker/rocker.h index b3310b61eb..f9c80f8013 100644 --- a/hw/net/rocker/rocker.h +++ b/hw/net/rocker/rocker.h @@ -77,7 +77,7 @@ int rocker_event_link_changed(Rocker *r, uint32_t pport, bool link_up); int rocker_event_mac_vlan_seen(Rocker *r, uint32_t pport, uint8_t *addr, uint16_t vlan_id); int rx_produce(World *world, uint32_t pport, - const struct iovec *iov, int iovcnt); + const struct iovec *iov, int iovcnt, uint8_t copy_to_cpu); int rocker_port_eg(Rocker *r, uint32_t pport, const struct iovec *iov, int iovcnt); diff --git a/hw/net/rocker/rocker_fp.c b/hw/net/rocker/rocker_fp.c index d8d934c396..c693ae5081 100644 --- a/hw/net/rocker/rocker_fp.c +++ b/hw/net/rocker/rocker_fp.c @@ -125,18 +125,21 @@ int fp_port_eg(FpPort *port, const struct iovec *iov, int iovcnt) return ROCKER_OK; } -static int fp_port_can_receive(NetClientState *nc) -{ - FpPort *port = qemu_get_nic_opaque(nc); - - return port->enabled; -} - static ssize_t fp_port_receive_iov(NetClientState *nc, const struct iovec *iov, int iovcnt) { FpPort *port = qemu_get_nic_opaque(nc); + /* If the port is disabled, we want to drop this pkt + * now rather than queing it for later. We don't want + * any stale pkts getting into the device when the port + * transitions to enabled. + */ + + if (!port->enabled) { + return -1; + } + return world_ingress(port->world, port->pport, iov, iovcnt); } @@ -165,7 +168,6 @@ static void fp_port_set_link_status(NetClientState *nc) static NetClientInfo fp_port_info = { .type = NET_CLIENT_OPTIONS_KIND_NIC, .size = sizeof(NICState), - .can_receive = fp_port_can_receive, .receive = fp_port_receive, .receive_iov = fp_port_receive_iov, .cleanup = fp_port_cleanup, diff --git a/hw/net/rocker/rocker_hw.h b/hw/net/rocker/rocker_hw.h index fe639badd4..8c50830325 100644 --- a/hw/net/rocker/rocker_hw.h +++ b/hw/net/rocker/rocker_hw.h @@ -250,6 +250,7 @@ enum { #define ROCKER_RX_FLAGS_TCP (1 << 5) #define ROCKER_RX_FLAGS_UDP (1 << 6) #define ROCKER_RX_FLAGS_TCP_UDP_CSUM_GOOD (1 << 7) +#define ROCKER_RX_FLAGS_FWD_OFFLOAD (1 << 8) /* Tx msg */ enum { diff --git a/hw/net/rocker/rocker_of_dpa.c b/hw/net/rocker/rocker_of_dpa.c index b25a17d6d7..874fb01d69 100644 --- a/hw/net/rocker/rocker_of_dpa.c +++ b/hw/net/rocker/rocker_of_dpa.c @@ -825,6 +825,8 @@ static OfDpaGroup *of_dpa_group_alloc(uint32_t id) static void of_dpa_output_l2_interface(OfDpaFlowContext *fc, OfDpaGroup *group) { + uint8_t copy_to_cpu = fc->action_set.apply.copy_to_cpu; + if (group->l2_interface.pop_vlan) { of_dpa_flow_pkt_strip_vlan(fc); } @@ -837,7 +839,8 @@ static void of_dpa_output_l2_interface(OfDpaFlowContext *fc, */ if (group->l2_interface.out_pport == 0) { - rx_produce(fc->of_dpa->world, fc->in_pport, fc->iov, fc->iovcnt); + rx_produce(fc->of_dpa->world, fc->in_pport, fc->iov, fc->iovcnt, + copy_to_cpu); } else if (group->l2_interface.out_pport != fc->in_pport) { rocker_port_eg(world_rocker(fc->of_dpa->world), group->l2_interface.out_pport, @@ -2525,7 +2528,6 @@ static void of_dpa_group_fill(void *key, void *value, void *user_data) ngroup->has_set_vlan_id = true; ngroup->set_vlan_id = ntohs(group->l2_rewrite.vlan_id); } - break; if (memcmp(group->l2_rewrite.src_mac.a, zero_mac.a, ETH_ALEN)) { ngroup->has_set_eth_src = true; ngroup->set_eth_src = @@ -2536,6 +2538,7 @@ static void of_dpa_group_fill(void *key, void *value, void *user_data) ngroup->set_eth_dst = qemu_mac_strdup_printf(group->l2_rewrite.dst_mac.a); } + break; case ROCKER_OF_DPA_GROUP_TYPE_L2_FLOOD: case ROCKER_OF_DPA_GROUP_TYPE_L2_MCAST: ngroup->has_vlan_id = true; diff --git a/hw/net/rocker/rocker_world.c b/hw/net/rocker/rocker_world.c index b991e871d3..a6b18f1754 100644 --- a/hw/net/rocker/rocker_world.c +++ b/hw/net/rocker/rocker_world.c @@ -32,7 +32,7 @@ ssize_t world_ingress(World *world, uint32_t pport, return world->ops->ig(world, pport, iov, iovcnt); } - return iov_size(iov, iovcnt); + return -1; } int world_do_cmd(World *world, DescInfo *info, diff --git a/hw/net/spapr_llan.c b/hw/net/spapr_llan.c index 2dd5ec1117..1ca5e9ce6a 100644 --- a/hw/net/spapr_llan.c +++ b/hw/net/spapr_llan.c @@ -284,7 +284,7 @@ static int check_bd(VIOsPAPRVLANDevice *dev, vlan_bd_t bd, } static target_ulong h_register_logical_lan(PowerPCCPU *cpu, - sPAPREnvironment *spapr, + sPAPRMachineState *spapr, target_ulong opcode, target_ulong *args) { @@ -349,7 +349,8 @@ static target_ulong h_register_logical_lan(PowerPCCPU *cpu, } -static target_ulong h_free_logical_lan(PowerPCCPU *cpu, sPAPREnvironment *spapr, +static target_ulong h_free_logical_lan(PowerPCCPU *cpu, + sPAPRMachineState *spapr, target_ulong opcode, target_ulong *args) { target_ulong reg = args[0]; @@ -371,7 +372,7 @@ static target_ulong h_free_logical_lan(PowerPCCPU *cpu, sPAPREnvironment *spapr, } static target_ulong h_add_logical_lan_buffer(PowerPCCPU *cpu, - sPAPREnvironment *spapr, + sPAPRMachineState *spapr, target_ulong opcode, target_ulong *args) { @@ -421,7 +422,8 @@ static target_ulong h_add_logical_lan_buffer(PowerPCCPU *cpu, return H_SUCCESS; } -static target_ulong h_send_logical_lan(PowerPCCPU *cpu, sPAPREnvironment *spapr, +static target_ulong h_send_logical_lan(PowerPCCPU *cpu, + sPAPRMachineState *spapr, target_ulong opcode, target_ulong *args) { target_ulong reg = args[0]; @@ -490,7 +492,7 @@ static target_ulong h_send_logical_lan(PowerPCCPU *cpu, sPAPREnvironment *spapr, return H_SUCCESS; } -static target_ulong h_multicast_ctrl(PowerPCCPU *cpu, sPAPREnvironment *spapr, +static target_ulong h_multicast_ctrl(PowerPCCPU *cpu, sPAPRMachineState *spapr, target_ulong opcode, target_ulong *args) { target_ulong reg = args[0]; diff --git a/hw/net/vmxnet3.c b/hw/net/vmxnet3.c index 104a0f599b..706e0606a9 100644 --- a/hw/net/vmxnet3.c +++ b/hw/net/vmxnet3.c @@ -1879,6 +1879,12 @@ vmxnet3_receive(NetClientState *nc, const uint8_t *buf, size_t size) return -1; } + if (s->peer_has_vhdr) { + vmxnet_rx_pkt_set_vhdr(s->rx_pkt, (struct virtio_net_hdr *)buf); + buf += sizeof(struct virtio_net_hdr); + size -= sizeof(struct virtio_net_hdr); + } + /* Pad to minimum Ethernet frame length */ if (size < sizeof(min_buf)) { memcpy(min_buf, buf, size); @@ -1887,12 +1893,6 @@ vmxnet3_receive(NetClientState *nc, const uint8_t *buf, size_t size) size = sizeof(min_buf); } - if (s->peer_has_vhdr) { - vmxnet_rx_pkt_set_vhdr(s->rx_pkt, (struct virtio_net_hdr *)buf); - buf += sizeof(struct virtio_net_hdr); - size -= sizeof(struct virtio_net_hdr); - } - vmxnet_rx_pkt_set_packet_type(s->rx_pkt, get_eth_packet_type(PKT_GET_ETH_HDR(buf))); diff --git a/hw/nvram/spapr_nvram.c b/hw/nvram/spapr_nvram.c index 11332d14ea..fcaa77dd9a 100644 --- a/hw/nvram/spapr_nvram.c +++ b/hw/nvram/spapr_nvram.c @@ -45,7 +45,7 @@ typedef struct sPAPRNVRAM { #define DEFAULT_NVRAM_SIZE 65536 #define MAX_NVRAM_SIZE 1048576 -static void rtas_nvram_fetch(PowerPCCPU *cpu, sPAPREnvironment *spapr, +static void rtas_nvram_fetch(PowerPCCPU *cpu, sPAPRMachineState *spapr, uint32_t token, uint32_t nargs, target_ulong args, uint32_t nret, target_ulong rets) @@ -86,7 +86,7 @@ static void rtas_nvram_fetch(PowerPCCPU *cpu, sPAPREnvironment *spapr, rtas_st(rets, 1, len); } -static void rtas_nvram_store(PowerPCCPU *cpu, sPAPREnvironment *spapr, +static void rtas_nvram_store(PowerPCCPU *cpu, sPAPRMachineState *spapr, uint32_t token, uint32_t nargs, target_ulong args, uint32_t nret, target_ulong rets) diff --git a/hw/pci/pcie.c b/hw/pci/pcie.c index 14c77117f6..6e28985bd1 100644 --- a/hw/pci/pcie.c +++ b/hw/pci/pcie.c @@ -78,7 +78,7 @@ int pcie_cap_init(PCIDevice *dev, uint8_t offset, uint8_t type, uint8_t port) PCI_EXP_LNK_LS_25); pci_set_word(exp_cap + PCI_EXP_LNKSTA, - PCI_EXP_LNK_MLW_1 | PCI_EXP_LNK_LS_25); + PCI_EXP_LNK_MLW_1 | PCI_EXP_LNK_LS_25 |PCI_EXP_LNKSTA_DLLLA); pci_set_long(exp_cap + PCI_EXP_DEVCAP2, PCI_EXP_DEVCAP2_EFF | PCI_EXP_DEVCAP2_EETLPP); diff --git a/hw/ppc/mac_newworld.c b/hw/ppc/mac_newworld.c index 0f3e34122a..77d5c819ef 100644 --- a/hw/ppc/mac_newworld.c +++ b/hw/ppc/mac_newworld.c @@ -145,7 +145,6 @@ static void ppc_core99_reset(void *opaque) static void ppc_core99_init(MachineState *machine) { ram_addr_t ram_size = machine->ram_size; - const char *cpu_model = machine->cpu_model; const char *kernel_filename = machine->kernel_filename; const char *kernel_cmdline = machine->kernel_cmdline; const char *initrd_filename = machine->initrd_filename; @@ -182,14 +181,15 @@ static void ppc_core99_init(MachineState *machine) linux_boot = (kernel_filename != NULL); /* init CPUs */ - if (cpu_model == NULL) + if (machine->cpu_model == NULL) { #ifdef TARGET_PPC64 - cpu_model = "970fx"; + machine->cpu_model = "970fx"; #else - cpu_model = "G4"; + machine->cpu_model = "G4"; #endif + } for (i = 0; i < smp_cpus; i++) { - cpu = cpu_ppc_init(cpu_model); + cpu = cpu_ppc_init(machine->cpu_model); if (cpu == NULL) { fprintf(stderr, "Unable to find PowerPC CPU definition\n"); exit(1); diff --git a/hw/ppc/mac_oldworld.c b/hw/ppc/mac_oldworld.c index 99879dd2d5..06fdbaf588 100644 --- a/hw/ppc/mac_oldworld.c +++ b/hw/ppc/mac_oldworld.c @@ -75,7 +75,6 @@ static void ppc_heathrow_reset(void *opaque) static void ppc_heathrow_init(MachineState *machine) { ram_addr_t ram_size = machine->ram_size; - const char *cpu_model = machine->cpu_model; const char *kernel_filename = machine->kernel_filename; const char *kernel_cmdline = machine->kernel_cmdline; const char *initrd_filename = machine->initrd_filename; @@ -107,10 +106,10 @@ static void ppc_heathrow_init(MachineState *machine) linux_boot = (kernel_filename != NULL); /* init CPUs */ - if (cpu_model == NULL) - cpu_model = "G3"; + if (machine->cpu_model == NULL) + machine->cpu_model = "G3"; for (i = 0; i < smp_cpus; i++) { - cpu = cpu_ppc_init(cpu_model); + cpu = cpu_ppc_init(machine->cpu_model); if (cpu == NULL) { fprintf(stderr, "Unable to find PowerPC CPU definition\n"); exit(1); diff --git a/hw/ppc/ppc440_bamboo.c b/hw/ppc/ppc440_bamboo.c index 778970aa9b..032fa803db 100644 --- a/hw/ppc/ppc440_bamboo.c +++ b/hw/ppc/ppc440_bamboo.c @@ -159,7 +159,6 @@ static void main_cpu_reset(void *opaque) static void bamboo_init(MachineState *machine) { ram_addr_t ram_size = machine->ram_size; - const char *cpu_model = machine->cpu_model; const char *kernel_filename = machine->kernel_filename; const char *kernel_cmdline = machine->kernel_cmdline; const char *initrd_filename = machine->initrd_filename; @@ -184,10 +183,10 @@ static void bamboo_init(MachineState *machine) int i; /* Setup CPU. */ - if (cpu_model == NULL) { - cpu_model = "440EP"; + if (machine->cpu_model == NULL) { + machine->cpu_model = "440EP"; } - cpu = cpu_ppc_init(cpu_model); + cpu = cpu_ppc_init(machine->cpu_model); if (cpu == NULL) { fprintf(stderr, "Unable to initialize CPU!\n"); exit(1); diff --git a/hw/ppc/prep.c b/hw/ppc/prep.c index 998ee2d16b..45b5f62d6e 100644 --- a/hw/ppc/prep.c +++ b/hw/ppc/prep.c @@ -506,7 +506,6 @@ static int PPC_NVRAM_set_params (Nvram *nvram, uint16_t NVRAM_size, static void ppc_prep_init(MachineState *machine) { ram_addr_t ram_size = machine->ram_size; - const char *cpu_model = machine->cpu_model; const char *kernel_filename = machine->kernel_filename; const char *kernel_cmdline = machine->kernel_cmdline; const char *initrd_filename = machine->initrd_filename; @@ -536,10 +535,10 @@ static void ppc_prep_init(MachineState *machine) linux_boot = (kernel_filename != NULL); /* init CPUs */ - if (cpu_model == NULL) - cpu_model = "602"; + if (machine->cpu_model == NULL) + machine->cpu_model = "602"; for (i = 0; i < smp_cpus; i++) { - cpu = cpu_ppc_init(cpu_model); + cpu = cpu_ppc_init(machine->cpu_model); if (cpu == NULL) { fprintf(stderr, "Unable to find PowerPC CPU definition\n"); exit(1); diff --git a/hw/ppc/spapr.c b/hw/ppc/spapr.c index f174e5a0f3..a6f19473cf 100644 --- a/hw/ppc/spapr.c +++ b/hw/ppc/spapr.c @@ -34,6 +34,7 @@ #include "sysemu/cpus.h" #include "sysemu/kvm.h" #include "kvm_ppc.h" +#include "migration/migration.h" #include "mmu-hash64.h" #include "qom/cpu.h" @@ -90,25 +91,6 @@ #define HTAB_SIZE(spapr) (1ULL << ((spapr)->htab_shift)) -typedef struct sPAPRMachineState sPAPRMachineState; - -#define TYPE_SPAPR_MACHINE "spapr-machine" -#define SPAPR_MACHINE(obj) \ - OBJECT_CHECK(sPAPRMachineState, (obj), TYPE_SPAPR_MACHINE) - -/** - * sPAPRMachineState: - */ -struct sPAPRMachineState { - /*< private >*/ - MachineState parent_obj; - - /*< public >*/ - char *kvm_type; -}; - -sPAPREnvironment *spapr; - static XICSState *try_create_xics(const char *type, int nr_servers, int nr_irqs, Error **errp) { @@ -184,7 +166,28 @@ static int spapr_fixup_cpu_smt_dt(void *fdt, int offset, PowerPCCPU *cpu, return ret; } -static int spapr_fixup_cpu_dt(void *fdt, sPAPREnvironment *spapr) +static int spapr_fixup_cpu_numa_dt(void *fdt, int offset, CPUState *cs) +{ + int ret = 0; + PowerPCCPU *cpu = POWERPC_CPU(cs); + int index = ppc_get_vcpu_dt_id(cpu); + uint32_t associativity[] = {cpu_to_be32(0x5), + cpu_to_be32(0x0), + cpu_to_be32(0x0), + cpu_to_be32(0x0), + cpu_to_be32(cs->numa_node), + cpu_to_be32(index)}; + + /* Advertise NUMA via ibm,associativity */ + if (nb_numa_nodes > 1) { + ret = fdt_setprop(fdt, offset, "ibm,associativity", associativity, + sizeof(associativity)); + } + + return ret; +} + +static int spapr_fixup_cpu_dt(void *fdt, sPAPRMachineState *spapr) { int ret = 0, offset, cpus_offset; CPUState *cs; @@ -196,12 +199,6 @@ static int spapr_fixup_cpu_dt(void *fdt, sPAPREnvironment *spapr) PowerPCCPU *cpu = POWERPC_CPU(cs); DeviceClass *dc = DEVICE_GET_CLASS(cs); int index = ppc_get_vcpu_dt_id(cpu); - uint32_t associativity[] = {cpu_to_be32(0x5), - cpu_to_be32(0x0), - cpu_to_be32(0x0), - cpu_to_be32(0x0), - cpu_to_be32(cs->numa_node), - cpu_to_be32(index)}; if ((index % smt) != 0) { continue; @@ -225,20 +222,17 @@ static int spapr_fixup_cpu_dt(void *fdt, sPAPREnvironment *spapr) } } - if (nb_numa_nodes > 1) { - ret = fdt_setprop(fdt, offset, "ibm,associativity", associativity, - sizeof(associativity)); - if (ret < 0) { - return ret; - } - } - ret = fdt_setprop(fdt, offset, "ibm,pft-size", pft_size_prop, sizeof(pft_size_prop)); if (ret < 0) { return ret; } + ret = spapr_fixup_cpu_numa_dt(fdt, offset, cs); + if (ret < 0) { + return ret; + } + ret = spapr_fixup_cpu_smt_dt(fdt, offset, cpu, ppc_get_compat_smt_threads(cpu)); if (ret < 0) { @@ -284,15 +278,18 @@ static size_t create_page_sizes_prop(CPUPPCState *env, uint32_t *prop, static hwaddr spapr_node0_size(void) { + MachineState *machine = MACHINE(qdev_get_machine()); + if (nb_numa_nodes) { int i; for (i = 0; i < nb_numa_nodes; ++i) { if (numa_info[i].node_mem) { - return MIN(pow2floor(numa_info[i].node_mem), ram_size); + return MIN(pow2floor(numa_info[i].node_mem), + machine->ram_size); } } } - return ram_size; + return machine->ram_size; } #define _FDT(exp) \ @@ -318,18 +315,13 @@ static void *spapr_create_fdt_skel(hwaddr initrd_base, uint32_t epow_irq) { void *fdt; - CPUState *cs; uint32_t start_prop = cpu_to_be32(initrd_base); uint32_t end_prop = cpu_to_be32(initrd_base + initrd_size); GString *hypertas = g_string_sized_new(256); GString *qemu_hypertas = g_string_sized_new(256); uint32_t refpoints[] = {cpu_to_be32(0x4), cpu_to_be32(0x4)}; - uint32_t interrupt_server_ranges_prop[] = {0, cpu_to_be32(smp_cpus)}; - int smt = kvmppc_smt_threads(); + uint32_t interrupt_server_ranges_prop[] = {0, cpu_to_be32(max_cpus)}; unsigned char vec5[] = {0x0, 0x0, 0x0, 0x0, 0x0, 0x80}; - QemuOpts *opts = qemu_opts_find(qemu_find_opts("smp-opts"), NULL); - unsigned sockets = opts ? qemu_opt_get_number(opts, "sockets", 0) : 0; - uint32_t cpus_per_socket = sockets ? (smp_cpus / sockets) : 1; char *buf; add_str(hypertas, "hcall-pft"); @@ -415,107 +407,6 @@ static void *spapr_create_fdt_skel(hwaddr initrd_base, _FDT((fdt_end_node(fdt))); - /* cpus */ - _FDT((fdt_begin_node(fdt, "cpus"))); - - _FDT((fdt_property_cell(fdt, "#address-cells", 0x1))); - _FDT((fdt_property_cell(fdt, "#size-cells", 0x0))); - - CPU_FOREACH(cs) { - PowerPCCPU *cpu = POWERPC_CPU(cs); - CPUPPCState *env = &cpu->env; - DeviceClass *dc = DEVICE_GET_CLASS(cs); - PowerPCCPUClass *pcc = POWERPC_CPU_GET_CLASS(cs); - int index = ppc_get_vcpu_dt_id(cpu); - char *nodename; - uint32_t segs[] = {cpu_to_be32(28), cpu_to_be32(40), - 0xffffffff, 0xffffffff}; - uint32_t tbfreq = kvm_enabled() ? kvmppc_get_tbfreq() : TIMEBASE_FREQ; - uint32_t cpufreq = kvm_enabled() ? kvmppc_get_clockfreq() : 1000000000; - uint32_t page_sizes_prop[64]; - size_t page_sizes_prop_size; - - if ((index % smt) != 0) { - continue; - } - - nodename = g_strdup_printf("%s@%x", dc->fw_name, index); - - _FDT((fdt_begin_node(fdt, nodename))); - - g_free(nodename); - - _FDT((fdt_property_cell(fdt, "reg", index))); - _FDT((fdt_property_string(fdt, "device_type", "cpu"))); - - _FDT((fdt_property_cell(fdt, "cpu-version", env->spr[SPR_PVR]))); - _FDT((fdt_property_cell(fdt, "d-cache-block-size", - env->dcache_line_size))); - _FDT((fdt_property_cell(fdt, "d-cache-line-size", - env->dcache_line_size))); - _FDT((fdt_property_cell(fdt, "i-cache-block-size", - env->icache_line_size))); - _FDT((fdt_property_cell(fdt, "i-cache-line-size", - env->icache_line_size))); - - if (pcc->l1_dcache_size) { - _FDT((fdt_property_cell(fdt, "d-cache-size", pcc->l1_dcache_size))); - } else { - fprintf(stderr, "Warning: Unknown L1 dcache size for cpu\n"); - } - if (pcc->l1_icache_size) { - _FDT((fdt_property_cell(fdt, "i-cache-size", pcc->l1_icache_size))); - } else { - fprintf(stderr, "Warning: Unknown L1 icache size for cpu\n"); - } - - _FDT((fdt_property_cell(fdt, "timebase-frequency", tbfreq))); - _FDT((fdt_property_cell(fdt, "clock-frequency", cpufreq))); - _FDT((fdt_property_cell(fdt, "ibm,slb-size", env->slb_nr))); - _FDT((fdt_property_string(fdt, "status", "okay"))); - _FDT((fdt_property(fdt, "64-bit", NULL, 0))); - - if (env->spr_cb[SPR_PURR].oea_read) { - _FDT((fdt_property(fdt, "ibm,purr", NULL, 0))); - } - - if (env->mmu_model & POWERPC_MMU_1TSEG) { - _FDT((fdt_property(fdt, "ibm,processor-segment-sizes", - segs, sizeof(segs)))); - } - - /* Advertise VMX/VSX (vector extensions) if available - * 0 / no property == no vector extensions - * 1 == VMX / Altivec available - * 2 == VSX available */ - if (env->insns_flags & PPC_ALTIVEC) { - uint32_t vmx = (env->insns_flags2 & PPC2_VSX) ? 2 : 1; - - _FDT((fdt_property_cell(fdt, "ibm,vmx", vmx))); - } - - /* Advertise DFP (Decimal Floating Point) if available - * 0 / no property == no DFP - * 1 == DFP available */ - if (env->insns_flags2 & PPC2_DFP) { - _FDT((fdt_property_cell(fdt, "ibm,dfp", 1))); - } - - page_sizes_prop_size = create_page_sizes_prop(env, page_sizes_prop, - sizeof(page_sizes_prop)); - if (page_sizes_prop_size) { - _FDT((fdt_property(fdt, "ibm,segment-page-sizes", - page_sizes_prop, page_sizes_prop_size))); - } - - _FDT((fdt_property_cell(fdt, "ibm,chip-id", - cs->cpu_index / cpus_per_socket))); - - _FDT((fdt_end_node(fdt))); - } - - _FDT((fdt_end_node(fdt))); - /* RTAS */ _FDT((fdt_begin_node(fdt, "rtas"))); @@ -604,7 +495,8 @@ static void *spapr_create_fdt_skel(hwaddr initrd_base, return fdt; } -int spapr_h_cas_compose_response(target_ulong addr, target_ulong size) +int spapr_h_cas_compose_response(sPAPRMachineState *spapr, + target_ulong addr, target_ulong size) { void *fdt, *fdt_skel; sPAPRDeviceTreeUpdateHeader hdr = { .version_id = 1 }; @@ -665,8 +557,9 @@ static void spapr_populate_memory_node(void *fdt, int nodeid, hwaddr start, sizeof(associativity)))); } -static int spapr_populate_memory(sPAPREnvironment *spapr, void *fdt) +static int spapr_populate_memory(sPAPRMachineState *spapr, void *fdt) { + MachineState *machine = MACHINE(spapr); hwaddr mem_start, node_size; int i, nb_nodes = nb_numa_nodes; NodeInfo *nodes = numa_info; @@ -675,7 +568,7 @@ static int spapr_populate_memory(sPAPREnvironment *spapr, void *fdt) /* No NUMA nodes, assume there is just one node with whole RAM */ if (!nb_numa_nodes) { nb_nodes = 1; - ramnode.node_mem = ram_size; + ramnode.node_mem = machine->ram_size; nodes = &ramnode; } @@ -683,12 +576,12 @@ static int spapr_populate_memory(sPAPREnvironment *spapr, void *fdt) if (!nodes[i].node_mem) { continue; } - if (mem_start >= ram_size) { + if (mem_start >= machine->ram_size) { node_size = 0; } else { node_size = nodes[i].node_mem; - if (node_size > ram_size - mem_start) { - node_size = ram_size - mem_start; + if (node_size > machine->ram_size - mem_start) { + node_size = machine->ram_size - mem_start; } } if (!mem_start) { @@ -714,7 +607,138 @@ static int spapr_populate_memory(sPAPREnvironment *spapr, void *fdt) return 0; } -static void spapr_finalize_fdt(sPAPREnvironment *spapr, +static void spapr_populate_cpu_dt(CPUState *cs, void *fdt, int offset, + sPAPRMachineState *spapr) +{ + PowerPCCPU *cpu = POWERPC_CPU(cs); + CPUPPCState *env = &cpu->env; + PowerPCCPUClass *pcc = POWERPC_CPU_GET_CLASS(cs); + int index = ppc_get_vcpu_dt_id(cpu); + uint32_t segs[] = {cpu_to_be32(28), cpu_to_be32(40), + 0xffffffff, 0xffffffff}; + uint32_t tbfreq = kvm_enabled() ? kvmppc_get_tbfreq() : TIMEBASE_FREQ; + uint32_t cpufreq = kvm_enabled() ? kvmppc_get_clockfreq() : 1000000000; + uint32_t page_sizes_prop[64]; + size_t page_sizes_prop_size; + QemuOpts *opts = qemu_opts_find(qemu_find_opts("smp-opts"), NULL); + unsigned sockets = opts ? qemu_opt_get_number(opts, "sockets", 0) : 0; + uint32_t cpus_per_socket = sockets ? (smp_cpus / sockets) : 1; + uint32_t pft_size_prop[] = {0, cpu_to_be32(spapr->htab_shift)}; + + _FDT((fdt_setprop_cell(fdt, offset, "reg", index))); + _FDT((fdt_setprop_string(fdt, offset, "device_type", "cpu"))); + + _FDT((fdt_setprop_cell(fdt, offset, "cpu-version", env->spr[SPR_PVR]))); + _FDT((fdt_setprop_cell(fdt, offset, "d-cache-block-size", + env->dcache_line_size))); + _FDT((fdt_setprop_cell(fdt, offset, "d-cache-line-size", + env->dcache_line_size))); + _FDT((fdt_setprop_cell(fdt, offset, "i-cache-block-size", + env->icache_line_size))); + _FDT((fdt_setprop_cell(fdt, offset, "i-cache-line-size", + env->icache_line_size))); + + if (pcc->l1_dcache_size) { + _FDT((fdt_setprop_cell(fdt, offset, "d-cache-size", + pcc->l1_dcache_size))); + } else { + fprintf(stderr, "Warning: Unknown L1 dcache size for cpu\n"); + } + if (pcc->l1_icache_size) { + _FDT((fdt_setprop_cell(fdt, offset, "i-cache-size", + pcc->l1_icache_size))); + } else { + fprintf(stderr, "Warning: Unknown L1 icache size for cpu\n"); + } + + _FDT((fdt_setprop_cell(fdt, offset, "timebase-frequency", tbfreq))); + _FDT((fdt_setprop_cell(fdt, offset, "clock-frequency", cpufreq))); + _FDT((fdt_setprop_cell(fdt, offset, "ibm,slb-size", env->slb_nr))); + _FDT((fdt_setprop_string(fdt, offset, "status", "okay"))); + _FDT((fdt_setprop(fdt, offset, "64-bit", NULL, 0))); + + if (env->spr_cb[SPR_PURR].oea_read) { + _FDT((fdt_setprop(fdt, offset, "ibm,purr", NULL, 0))); + } + + if (env->mmu_model & POWERPC_MMU_1TSEG) { + _FDT((fdt_setprop(fdt, offset, "ibm,processor-segment-sizes", + segs, sizeof(segs)))); + } + + /* Advertise VMX/VSX (vector extensions) if available + * 0 / no property == no vector extensions + * 1 == VMX / Altivec available + * 2 == VSX available */ + if (env->insns_flags & PPC_ALTIVEC) { + uint32_t vmx = (env->insns_flags2 & PPC2_VSX) ? 2 : 1; + + _FDT((fdt_setprop_cell(fdt, offset, "ibm,vmx", vmx))); + } + + /* Advertise DFP (Decimal Floating Point) if available + * 0 / no property == no DFP + * 1 == DFP available */ + if (env->insns_flags2 & PPC2_DFP) { + _FDT((fdt_setprop_cell(fdt, offset, "ibm,dfp", 1))); + } + + page_sizes_prop_size = create_page_sizes_prop(env, page_sizes_prop, + sizeof(page_sizes_prop)); + if (page_sizes_prop_size) { + _FDT((fdt_setprop(fdt, offset, "ibm,segment-page-sizes", + page_sizes_prop, page_sizes_prop_size))); + } + + _FDT((fdt_setprop_cell(fdt, offset, "ibm,chip-id", + cs->cpu_index / cpus_per_socket))); + + _FDT((fdt_setprop(fdt, offset, "ibm,pft-size", + pft_size_prop, sizeof(pft_size_prop)))); + + _FDT(spapr_fixup_cpu_numa_dt(fdt, offset, cs)); + + _FDT(spapr_fixup_cpu_smt_dt(fdt, offset, cpu, + ppc_get_compat_smt_threads(cpu))); +} + +static void spapr_populate_cpus_dt_node(void *fdt, sPAPRMachineState *spapr) +{ + CPUState *cs; + int cpus_offset; + char *nodename; + int smt = kvmppc_smt_threads(); + + cpus_offset = fdt_add_subnode(fdt, 0, "cpus"); + _FDT(cpus_offset); + _FDT((fdt_setprop_cell(fdt, cpus_offset, "#address-cells", 0x1))); + _FDT((fdt_setprop_cell(fdt, cpus_offset, "#size-cells", 0x0))); + + /* + * We walk the CPUs in reverse order to ensure that CPU DT nodes + * created by fdt_add_subnode() end up in the right order in FDT + * for the guest kernel the enumerate the CPUs correctly. + */ + CPU_FOREACH_REVERSE(cs) { + PowerPCCPU *cpu = POWERPC_CPU(cs); + int index = ppc_get_vcpu_dt_id(cpu); + DeviceClass *dc = DEVICE_GET_CLASS(cs); + int offset; + + if ((index % smt) != 0) { + continue; + } + + nodename = g_strdup_printf("%s@%x", dc->fw_name, index); + offset = fdt_add_subnode(fdt, cpus_offset, nodename); + g_free(nodename); + _FDT(offset); + spapr_populate_cpu_dt(cs, fdt, offset, spapr); + } + +} + +static void spapr_finalize_fdt(sPAPRMachineState *spapr, hwaddr fdt_addr, hwaddr rtas_addr, hwaddr rtas_size) @@ -759,11 +783,8 @@ static void spapr_finalize_fdt(sPAPREnvironment *spapr, fprintf(stderr, "Couldn't set up RTAS device tree properties\n"); } - /* Advertise NUMA via ibm,associativity */ - ret = spapr_fixup_cpu_dt(fdt, spapr); - if (ret < 0) { - fprintf(stderr, "Couldn't finalize CPU device tree properties\n"); - } + /* cpus */ + spapr_populate_cpus_dt_node(fdt, spapr); bootlist = get_boot_devices_list(&cb, true); if (cb && bootlist) { @@ -830,7 +851,7 @@ static void emulate_spapr_hypercall(PowerPCCPU *cpu) #define CLEAN_HPTE(_hpte) ((*(uint64_t *)(_hpte)) &= tswap64(~HPTE64_V_HPTE_DIRTY)) #define DIRTY_HPTE(_hpte) ((*(uint64_t *)(_hpte)) |= tswap64(HPTE64_V_HPTE_DIRTY)) -static void spapr_reset_htab(sPAPREnvironment *spapr) +static void spapr_reset_htab(sPAPRMachineState *spapr) { long shift; int index; @@ -892,7 +913,7 @@ static int find_unknown_sysbus_device(SysBusDevice *sbdev, void *opaque) * A guest reset will cause spapr->htab_fd to become stale if being used. * Reopen the file descriptor to make sure the whole HTAB is properly read. */ -static int spapr_check_htab_fd(sPAPREnvironment *spapr) +static int spapr_check_htab_fd(sPAPRMachineState *spapr) { int rc = 0; @@ -912,6 +933,7 @@ static int spapr_check_htab_fd(sPAPREnvironment *spapr) static void ppc_spapr_reset(void) { + sPAPRMachineState *spapr = SPAPR_MACHINE(qdev_get_machine()); PowerPCCPU *first_ppc_cpu; uint32_t rtas_limit; @@ -945,12 +967,13 @@ static void ppc_spapr_reset(void) first_ppc_cpu->env.gpr[3] = spapr->fdt_addr; first_ppc_cpu->env.gpr[5] = 0; first_cpu->halted = 0; - first_ppc_cpu->env.nip = spapr->entry_point; + first_ppc_cpu->env.nip = SPAPR_ENTRY_POINT; } static void spapr_cpu_reset(void *opaque) { + sPAPRMachineState *spapr = SPAPR_MACHINE(qdev_get_machine()); PowerPCCPU *cpu = opaque; CPUState *cs = CPU(cpu); CPUPPCState *env = &cpu->env; @@ -979,12 +1002,12 @@ static void spapr_cpu_reset(void *opaque) * We have 8 hpte per group, and each hpte is 16 bytes. * ie have 128 bytes per hpte entry. */ - env->htab_mask = (1ULL << ((spapr)->htab_shift - 7)) - 1; + env->htab_mask = (1ULL << (spapr->htab_shift - 7)) - 1; env->spr[SPR_SDR1] = (target_ulong)(uintptr_t)spapr->htab | (spapr->htab_shift - 18); } -static void spapr_create_nvram(sPAPREnvironment *spapr) +static void spapr_create_nvram(sPAPRMachineState *spapr) { DeviceState *dev = qdev_create(&spapr->vio_bus->bus, "spapr-nvram"); DriveInfo *dinfo = drive_get(IF_PFLASH, 0, 0); @@ -998,7 +1021,7 @@ static void spapr_create_nvram(sPAPREnvironment *spapr) spapr->nvram = (struct sPAPRNVRAM *)dev; } -static void spapr_rtc_create(sPAPREnvironment *spapr) +static void spapr_rtc_create(sPAPRMachineState *spapr) { DeviceState *dev = qdev_create(NULL, TYPE_SPAPR_RTC); @@ -1028,7 +1051,7 @@ static int spapr_vga_init(PCIBus *pci_bus) static int spapr_post_load(void *opaque, int version_id) { - sPAPREnvironment *spapr = (sPAPREnvironment *)opaque; + sPAPRMachineState *spapr = (sPAPRMachineState *)opaque; int err = 0; /* In earlier versions, there was no separate qdev for the PAPR @@ -1057,16 +1080,16 @@ static const VMStateDescription vmstate_spapr = { VMSTATE_UNUSED_BUFFER(version_before_3, 0, 4), /* RTC offset */ - VMSTATE_UINT64_TEST(rtc_offset, sPAPREnvironment, version_before_3), + VMSTATE_UINT64_TEST(rtc_offset, sPAPRMachineState, version_before_3), - VMSTATE_PPC_TIMEBASE_V(tb, sPAPREnvironment, 2), + VMSTATE_PPC_TIMEBASE_V(tb, sPAPRMachineState, 2), VMSTATE_END_OF_LIST() }, }; static int htab_save_setup(QEMUFile *f, void *opaque) { - sPAPREnvironment *spapr = opaque; + sPAPRMachineState *spapr = opaque; /* "Iteration" header */ qemu_put_be32(f, spapr->htab_shift); @@ -1090,7 +1113,7 @@ static int htab_save_setup(QEMUFile *f, void *opaque) return 0; } -static void htab_save_first_pass(QEMUFile *f, sPAPREnvironment *spapr, +static void htab_save_first_pass(QEMUFile *f, sPAPRMachineState *spapr, int64_t max_ns) { int htabslots = HTAB_SIZE(spapr) / HASH_PTE_SIZE_64; @@ -1140,7 +1163,7 @@ static void htab_save_first_pass(QEMUFile *f, sPAPREnvironment *spapr, spapr->htab_save_index = index; } -static int htab_save_later_pass(QEMUFile *f, sPAPREnvironment *spapr, +static int htab_save_later_pass(QEMUFile *f, sPAPRMachineState *spapr, int64_t max_ns) { bool final = max_ns < 0; @@ -1222,7 +1245,7 @@ static int htab_save_later_pass(QEMUFile *f, sPAPREnvironment *spapr, static int htab_save_iterate(QEMUFile *f, void *opaque) { - sPAPREnvironment *spapr = opaque; + sPAPRMachineState *spapr = opaque; int rc = 0; /* Iteration header */ @@ -1257,7 +1280,7 @@ static int htab_save_iterate(QEMUFile *f, void *opaque) static int htab_save_complete(QEMUFile *f, void *opaque) { - sPAPREnvironment *spapr = opaque; + sPAPRMachineState *spapr = opaque; /* Iteration header */ qemu_put_be32(f, 0); @@ -1292,7 +1315,7 @@ static int htab_save_complete(QEMUFile *f, void *opaque) static int htab_load(QEMUFile *f, void *opaque, int version_id) { - sPAPREnvironment *spapr = opaque; + sPAPRMachineState *spapr = opaque; uint32_t section_hdr; int fd = -1; @@ -1386,16 +1409,42 @@ static void spapr_boot_set(void *opaque, const char *boot_device, machine->boot_order = g_strdup(boot_device); } +static void spapr_cpu_init(sPAPRMachineState *spapr, PowerPCCPU *cpu) +{ + CPUPPCState *env = &cpu->env; + + /* Set time-base frequency to 512 MHz */ + cpu_ppc_tb_init(env, TIMEBASE_FREQ); + + /* PAPR always has exception vectors in RAM not ROM. To ensure this, + * MSR[IP] should never be set. + */ + env->msr_mask &= ~(1 << 6); + + /* Tell KVM that we're in PAPR mode */ + if (kvm_enabled()) { + kvmppc_set_papr(cpu); + } + + if (cpu->max_compat) { + if (ppc_set_compat(cpu, cpu->max_compat) < 0) { + exit(1); + } + } + + xics_cpu_setup(spapr->icp, cpu); + + qemu_register_reset(spapr_cpu_reset, cpu); +} + /* pSeries LPAR / sPAPR hardware init */ static void ppc_spapr_init(MachineState *machine) { - ram_addr_t ram_size = machine->ram_size; - const char *cpu_model = machine->cpu_model; + sPAPRMachineState *spapr = SPAPR_MACHINE(machine); const char *kernel_filename = machine->kernel_filename; const char *kernel_cmdline = machine->kernel_cmdline; const char *initrd_filename = machine->initrd_filename; PowerPCCPU *cpu; - CPUPPCState *env; PCIHostState *phb; int i; MemoryRegion *sysmem = get_system_memory(); @@ -1412,7 +1461,6 @@ static void ppc_spapr_init(MachineState *machine) msi_supported = true; - spapr = g_malloc0(sizeof(*spapr)); QLIST_INIT(&spapr->phbs); cpu_ppc_hypercall = emulate_spapr_hypercall; @@ -1459,7 +1507,7 @@ static void ppc_spapr_init(MachineState *machine) * more than needed for the Linux guests we support. */ spapr->htab_shift = 18; /* Minimum architected size */ while (spapr->htab_shift <= 46) { - if ((1ULL << (spapr->htab_shift + 7)) >= ram_size) { + if ((1ULL << (spapr->htab_shift + 7)) >= machine->ram_size) { break; } spapr->htab_shift++; @@ -1467,43 +1515,21 @@ static void ppc_spapr_init(MachineState *machine) /* Set up Interrupt Controller before we create the VCPUs */ spapr->icp = xics_system_init(machine, - smp_cpus * kvmppc_smt_threads() / smp_threads, + DIV_ROUND_UP(max_cpus * kvmppc_smt_threads(), + smp_threads), XICS_IRQS); /* init CPUs */ - if (cpu_model == NULL) { - cpu_model = kvm_enabled() ? "host" : "POWER7"; + if (machine->cpu_model == NULL) { + machine->cpu_model = kvm_enabled() ? "host" : "POWER7"; } for (i = 0; i < smp_cpus; i++) { - cpu = cpu_ppc_init(cpu_model); + cpu = cpu_ppc_init(machine->cpu_model); if (cpu == NULL) { fprintf(stderr, "Unable to find PowerPC CPU definition\n"); exit(1); } - env = &cpu->env; - - /* Set time-base frequency to 512 MHz */ - cpu_ppc_tb_init(env, TIMEBASE_FREQ); - - /* PAPR always has exception vectors in RAM not ROM. To ensure this, - * MSR[IP] should never be set. - */ - env->msr_mask &= ~(1 << 6); - - /* Tell KVM that we're in PAPR mode */ - if (kvm_enabled()) { - kvmppc_set_papr(cpu); - } - - if (cpu->max_compat) { - if (ppc_set_compat(cpu, cpu->max_compat) < 0) { - exit(1); - } - } - - xics_cpu_setup(spapr->icp, cpu); - - qemu_register_reset(spapr_cpu_reset, cpu); + spapr_cpu_init(spapr, cpu); } if (kvm_enabled()) { @@ -1512,9 +1538,8 @@ static void ppc_spapr_init(MachineState *machine) } /* allocate RAM */ - spapr->ram_limit = ram_size; memory_region_allocate_system_memory(ram, NULL, "ppc_spapr.ram", - spapr->ram_limit); + machine->ram_size); memory_region_add_subregion(sysmem, 0, ram); if (rma_alloc_size && rma) { @@ -1658,8 +1683,9 @@ static void ppc_spapr_init(MachineState *machine) } g_free(filename); - spapr->entry_point = 0x100; - + /* FIXME: Should register things through the MachineState's qdev + * interface, this is a legacy from the sPAPREnvironment structure + * which predated MachineState but had a similar function */ vmstate_register(NULL, 0, &vmstate_spapr, spapr); register_savevm_live(NULL, "spapr/htab", -1, 1, &savevm_htab_handlers, spapr); @@ -1755,17 +1781,17 @@ static char *spapr_get_fw_dev_path(FWPathProvider *p, BusState *bus, static char *spapr_get_kvm_type(Object *obj, Error **errp) { - sPAPRMachineState *sm = SPAPR_MACHINE(obj); + sPAPRMachineState *spapr = SPAPR_MACHINE(obj); - return g_strdup(sm->kvm_type); + return g_strdup(spapr->kvm_type); } static void spapr_set_kvm_type(Object *obj, const char *value, Error **errp) { - sPAPRMachineState *sm = SPAPR_MACHINE(obj); + sPAPRMachineState *spapr = SPAPR_MACHINE(obj); - g_free(sm->kvm_type); - sm->kvm_type = g_strdup(value); + g_free(spapr->kvm_type); + spapr->kvm_type = g_strdup(value); } static void spapr_machine_initfn(Object *obj) @@ -1820,6 +1846,7 @@ static const TypeInfo spapr_machine_info = { .abstract = true, .instance_size = sizeof(sPAPRMachineState), .instance_init = spapr_machine_initfn, + .class_size = sizeof(sPAPRMachineClass), .class_init = spapr_machine_class_init, .interfaces = (InterfaceInfo[]) { { TYPE_FW_PATH_PROVIDER }, @@ -1851,6 +1878,8 @@ static const TypeInfo spapr_machine_info = { static void spapr_compat_2_3(Object *obj) { + savevm_skip_section_footers(); + global_state_set_optional(); } static void spapr_compat_2_2(Object *obj) diff --git a/hw/ppc/spapr_events.c b/hw/ppc/spapr_events.c index fda9e3590a..f626eb7b34 100644 --- a/hw/ppc/spapr_events.c +++ b/hw/ppc/spapr_events.c @@ -238,6 +238,7 @@ void spapr_events_fdt_skel(void *fdt, uint32_t check_exception_irq) static void rtas_event_log_queue(int log_type, void *data, bool exception) { + sPAPRMachineState *spapr = SPAPR_MACHINE(qdev_get_machine()); sPAPREventLogEntry *entry = g_new(sPAPREventLogEntry, 1); g_assert(data); @@ -250,6 +251,7 @@ static void rtas_event_log_queue(int log_type, void *data, bool exception) static sPAPREventLogEntry *rtas_event_log_dequeue(uint32_t event_mask, bool exception) { + sPAPRMachineState *spapr = SPAPR_MACHINE(qdev_get_machine()); sPAPREventLogEntry *entry = NULL; /* we only queue EPOW events atm. */ @@ -278,6 +280,7 @@ static sPAPREventLogEntry *rtas_event_log_dequeue(uint32_t event_mask, static bool rtas_event_log_contains(uint32_t event_mask, bool exception) { + sPAPRMachineState *spapr = SPAPR_MACHINE(qdev_get_machine()); sPAPREventLogEntry *entry = NULL; /* we only queue EPOW events atm. */ @@ -314,6 +317,7 @@ static void spapr_init_v6hdr(struct rtas_event_log_v6 *v6hdr) static void spapr_init_maina(struct rtas_event_log_v6_maina *maina, int section_count) { + sPAPRMachineState *spapr = SPAPR_MACHINE(qdev_get_machine()); struct tm tm; int year; @@ -336,7 +340,7 @@ static void spapr_init_maina(struct rtas_event_log_v6_maina *maina, static void spapr_powerdown_req(Notifier *n, void *opaque) { - sPAPREnvironment *spapr = container_of(n, sPAPREnvironment, epow_notifier); + sPAPRMachineState *spapr = SPAPR_MACHINE(qdev_get_machine()); struct rtas_error_log *hdr; struct rtas_event_log_v6 *v6hdr; struct rtas_event_log_v6_maina *maina; @@ -384,6 +388,7 @@ static void spapr_powerdown_req(Notifier *n, void *opaque) static void spapr_hotplug_req_event(sPAPRDRConnector *drc, uint8_t hp_action) { + sPAPRMachineState *spapr = SPAPR_MACHINE(qdev_get_machine()); struct hp_log_full *new_hp; struct rtas_error_log *hdr; struct rtas_event_log_v6 *v6hdr; @@ -453,7 +458,7 @@ void spapr_hotplug_req_remove_event(sPAPRDRConnector *drc) spapr_hotplug_req_event(drc, RTAS_LOG_V6_HP_ACTION_REMOVE); } -static void check_exception(PowerPCCPU *cpu, sPAPREnvironment *spapr, +static void check_exception(PowerPCCPU *cpu, sPAPRMachineState *spapr, uint32_t token, uint32_t nargs, target_ulong args, uint32_t nret, target_ulong rets) @@ -508,7 +513,7 @@ out_no_events: rtas_st(rets, 0, RTAS_OUT_NO_ERRORS_FOUND); } -static void event_scan(PowerPCCPU *cpu, sPAPREnvironment *spapr, +static void event_scan(PowerPCCPU *cpu, sPAPRMachineState *spapr, uint32_t token, uint32_t nargs, target_ulong args, uint32_t nret, target_ulong rets) @@ -548,7 +553,7 @@ out_no_events: rtas_st(rets, 0, RTAS_OUT_NO_ERRORS_FOUND); } -void spapr_events_init(sPAPREnvironment *spapr) +void spapr_events_init(sPAPRMachineState *spapr) { QTAILQ_INIT(&spapr->pending_events); spapr->check_exception_irq = xics_alloc(spapr->icp, 0, 0, false); diff --git a/hw/ppc/spapr_hcall.c b/hw/ppc/spapr_hcall.c index 4f76f1cbfe..652ddf6e37 100644 --- a/hw/ppc/spapr_hcall.c +++ b/hw/ppc/spapr_hcall.c @@ -84,9 +84,10 @@ static inline bool valid_pte_index(CPUPPCState *env, target_ulong pte_index) return true; } -static target_ulong h_enter(PowerPCCPU *cpu, sPAPREnvironment *spapr, +static target_ulong h_enter(PowerPCCPU *cpu, sPAPRMachineState *spapr, target_ulong opcode, target_ulong *args) { + MachineState *machine = MACHINE(spapr); CPUPPCState *env = &cpu->env; target_ulong flags = args[0]; target_ulong pte_index = args[1]; @@ -118,7 +119,7 @@ static target_ulong h_enter(PowerPCCPU *cpu, sPAPREnvironment *spapr, raddr = (ptel & HPTE64_R_RPN) & ~((1ULL << page_shift) - 1); - if (raddr < spapr->ram_limit) { + if (raddr < machine->ram_size) { /* Regular RAM - should have WIMG=0010 */ if ((ptel & HPTE64_R_WIMG) != HPTE64_R_M) { return H_PARAMETER; @@ -205,7 +206,7 @@ static RemoveResult remove_hpte(CPUPPCState *env, target_ulong ptex, return REMOVE_SUCCESS; } -static target_ulong h_remove(PowerPCCPU *cpu, sPAPREnvironment *spapr, +static target_ulong h_remove(PowerPCCPU *cpu, sPAPRMachineState *spapr, target_ulong opcode, target_ulong *args) { CPUPPCState *env = &cpu->env; @@ -252,7 +253,7 @@ static target_ulong h_remove(PowerPCCPU *cpu, sPAPREnvironment *spapr, #define H_BULK_REMOVE_MAX_BATCH 4 -static target_ulong h_bulk_remove(PowerPCCPU *cpu, sPAPREnvironment *spapr, +static target_ulong h_bulk_remove(PowerPCCPU *cpu, sPAPRMachineState *spapr, target_ulong opcode, target_ulong *args) { CPUPPCState *env = &cpu->env; @@ -299,7 +300,7 @@ static target_ulong h_bulk_remove(PowerPCCPU *cpu, sPAPREnvironment *spapr, return H_SUCCESS; } -static target_ulong h_protect(PowerPCCPU *cpu, sPAPREnvironment *spapr, +static target_ulong h_protect(PowerPCCPU *cpu, sPAPRMachineState *spapr, target_ulong opcode, target_ulong *args) { CPUPPCState *env = &cpu->env; @@ -337,7 +338,7 @@ static target_ulong h_protect(PowerPCCPU *cpu, sPAPREnvironment *spapr, return H_SUCCESS; } -static target_ulong h_read(PowerPCCPU *cpu, sPAPREnvironment *spapr, +static target_ulong h_read(PowerPCCPU *cpu, sPAPRMachineState *spapr, target_ulong opcode, target_ulong *args) { CPUPPCState *env = &cpu->env; @@ -367,7 +368,7 @@ static target_ulong h_read(PowerPCCPU *cpu, sPAPREnvironment *spapr, return H_SUCCESS; } -static target_ulong h_set_dabr(PowerPCCPU *cpu, sPAPREnvironment *spapr, +static target_ulong h_set_dabr(PowerPCCPU *cpu, sPAPRMachineState *spapr, target_ulong opcode, target_ulong *args) { /* FIXME: actually implement this */ @@ -506,7 +507,7 @@ static target_ulong deregister_dtl(CPUPPCState *env, target_ulong addr) return H_SUCCESS; } -static target_ulong h_register_vpa(PowerPCCPU *cpu, sPAPREnvironment *spapr, +static target_ulong h_register_vpa(PowerPCCPU *cpu, sPAPRMachineState *spapr, target_ulong opcode, target_ulong *args) { target_ulong flags = args[0]; @@ -551,7 +552,7 @@ static target_ulong h_register_vpa(PowerPCCPU *cpu, sPAPREnvironment *spapr, return ret; } -static target_ulong h_cede(PowerPCCPU *cpu, sPAPREnvironment *spapr, +static target_ulong h_cede(PowerPCCPU *cpu, sPAPRMachineState *spapr, target_ulong opcode, target_ulong *args) { CPUPPCState *env = &cpu->env; @@ -567,7 +568,7 @@ static target_ulong h_cede(PowerPCCPU *cpu, sPAPREnvironment *spapr, return H_SUCCESS; } -static target_ulong h_rtas(PowerPCCPU *cpu, sPAPREnvironment *spapr, +static target_ulong h_rtas(PowerPCCPU *cpu, sPAPRMachineState *spapr, target_ulong opcode, target_ulong *args) { target_ulong rtas_r3 = args[0]; @@ -579,7 +580,7 @@ static target_ulong h_rtas(PowerPCCPU *cpu, sPAPREnvironment *spapr, nret, rtas_r3 + 12 + 4*nargs); } -static target_ulong h_logical_load(PowerPCCPU *cpu, sPAPREnvironment *spapr, +static target_ulong h_logical_load(PowerPCCPU *cpu, sPAPRMachineState *spapr, target_ulong opcode, target_ulong *args) { CPUState *cs = CPU(cpu); @@ -603,7 +604,7 @@ static target_ulong h_logical_load(PowerPCCPU *cpu, sPAPREnvironment *spapr, return H_PARAMETER; } -static target_ulong h_logical_store(PowerPCCPU *cpu, sPAPREnvironment *spapr, +static target_ulong h_logical_store(PowerPCCPU *cpu, sPAPRMachineState *spapr, target_ulong opcode, target_ulong *args) { CPUState *cs = CPU(cpu); @@ -629,7 +630,7 @@ static target_ulong h_logical_store(PowerPCCPU *cpu, sPAPREnvironment *spapr, return H_PARAMETER; } -static target_ulong h_logical_memop(PowerPCCPU *cpu, sPAPREnvironment *spapr, +static target_ulong h_logical_memop(PowerPCCPU *cpu, sPAPRMachineState *spapr, target_ulong opcode, target_ulong *args) { CPUState *cs = CPU(cpu); @@ -698,14 +699,14 @@ static target_ulong h_logical_memop(PowerPCCPU *cpu, sPAPREnvironment *spapr, return H_SUCCESS; } -static target_ulong h_logical_icbi(PowerPCCPU *cpu, sPAPREnvironment *spapr, +static target_ulong h_logical_icbi(PowerPCCPU *cpu, sPAPRMachineState *spapr, target_ulong opcode, target_ulong *args) { /* Nothing to do on emulation, KVM will trap this in the kernel */ return H_SUCCESS; } -static target_ulong h_logical_dcbf(PowerPCCPU *cpu, sPAPREnvironment *spapr, +static target_ulong h_logical_dcbf(PowerPCCPU *cpu, sPAPRMachineState *spapr, target_ulong opcode, target_ulong *args) { /* Nothing to do on emulation, KVM will trap this in the kernel */ @@ -788,7 +789,7 @@ static target_ulong h_set_mode_resource_addr_trans_mode(PowerPCCPU *cpu, return H_SUCCESS; } -static target_ulong h_set_mode(PowerPCCPU *cpu, sPAPREnvironment *spapr, +static target_ulong h_set_mode(PowerPCCPU *cpu, sPAPRMachineState *spapr, target_ulong opcode, target_ulong *args) { target_ulong resource = args[1]; @@ -828,7 +829,7 @@ static void do_set_compat(void *arg) ((cpuver) == CPU_POWERPC_LOGICAL_2_07) ? 2070 : 0) static target_ulong h_client_architecture_support(PowerPCCPU *cpu_, - sPAPREnvironment *spapr, + sPAPRMachineState *spapr, target_ulong opcode, target_ulong *args) { @@ -921,7 +922,7 @@ static target_ulong h_client_architecture_support(PowerPCCPU *cpu_, return H_SUCCESS; } - if (spapr_h_cas_compose_response(args[1], args[2])) { + if (spapr_h_cas_compose_response(spapr, args[1], args[2])) { qemu_system_reset_request(); } @@ -952,6 +953,8 @@ void spapr_register_hypercall(target_ulong opcode, spapr_hcall_fn fn) target_ulong spapr_hypercall(PowerPCCPU *cpu, target_ulong opcode, target_ulong *args) { + sPAPRMachineState *spapr = SPAPR_MACHINE(qdev_get_machine()); + if ((opcode <= MAX_HCALL_OPCODE) && ((opcode & 0x3) == 0)) { spapr_hcall_fn fn = papr_hypercall_table[opcode / 4]; diff --git a/hw/ppc/spapr_iommu.c b/hw/ppc/spapr_iommu.c index 8cd9dba9ac..f61504e0c5 100644 --- a/hw/ppc/spapr_iommu.c +++ b/hw/ppc/spapr_iommu.c @@ -60,6 +60,20 @@ sPAPRTCETable *spapr_tce_find_by_liobn(target_ulong liobn) return NULL; } +static IOMMUAccessFlags spapr_tce_iommu_access_flags(uint64_t tce) +{ + switch (tce & SPAPR_TCE_RW) { + case SPAPR_TCE_FAULT: + return IOMMU_NONE; + case SPAPR_TCE_RO: + return IOMMU_RO; + case SPAPR_TCE_WO: + return IOMMU_WO; + default: /* SPAPR_TCE_RW */ + return IOMMU_RW; + } +} + /* Called from RCU critical section */ static IOMMUTLBEntry spapr_tce_translate_iommu(MemoryRegion *iommu, hwaddr addr, bool is_write) @@ -82,7 +96,7 @@ static IOMMUTLBEntry spapr_tce_translate_iommu(MemoryRegion *iommu, hwaddr addr, ret.iova = addr & page_mask; ret.translated_addr = tce & page_mask; ret.addr_mask = ~page_mask; - ret.perm = tce & IOMMU_RW; + ret.perm = spapr_tce_iommu_access_flags(tce); } trace_spapr_iommu_xlate(tcet->liobn, addr, ret.iova, ret.perm, ret.addr_mask); @@ -233,14 +247,14 @@ static target_ulong put_tce_emu(sPAPRTCETable *tcet, target_ulong ioba, entry.iova = ioba & page_mask; entry.translated_addr = tce & page_mask; entry.addr_mask = ~page_mask; - entry.perm = tce & IOMMU_RW; + entry.perm = spapr_tce_iommu_access_flags(tce); memory_region_notify_iommu(&tcet->iommu, entry); return H_SUCCESS; } static target_ulong h_put_tce_indirect(PowerPCCPU *cpu, - sPAPREnvironment *spapr, + sPAPRMachineState *spapr, target_ulong opcode, target_ulong *args) { int i; @@ -267,9 +281,7 @@ static target_ulong h_put_tce_indirect(PowerPCCPU *cpu, ioba &= page_mask; for (i = 0; i < npages; ++i, ioba += page_size) { - target_ulong off = (tce_list & ~SPAPR_TCE_RW) + - i * sizeof(target_ulong); - tce = ldq_be_phys(cs->as, off); + tce = ldq_be_phys(cs->as, tce_list + i * sizeof(target_ulong)); ret = put_tce_emu(tcet, ioba, tce); if (ret) { @@ -287,7 +299,7 @@ static target_ulong h_put_tce_indirect(PowerPCCPU *cpu, return ret; } -static target_ulong h_stuff_tce(PowerPCCPU *cpu, sPAPREnvironment *spapr, +static target_ulong h_stuff_tce(PowerPCCPU *cpu, sPAPRMachineState *spapr, target_ulong opcode, target_ulong *args) { int i; @@ -326,7 +338,7 @@ static target_ulong h_stuff_tce(PowerPCCPU *cpu, sPAPREnvironment *spapr, return ret; } -static target_ulong h_put_tce(PowerPCCPU *cpu, sPAPREnvironment *spapr, +static target_ulong h_put_tce(PowerPCCPU *cpu, sPAPRMachineState *spapr, target_ulong opcode, target_ulong *args) { target_ulong liobn = args[0]; @@ -367,7 +379,7 @@ static target_ulong get_tce_emu(sPAPRTCETable *tcet, target_ulong ioba, return H_SUCCESS; } -static target_ulong h_get_tce(PowerPCCPU *cpu, sPAPREnvironment *spapr, +static target_ulong h_get_tce(PowerPCCPU *cpu, sPAPRMachineState *spapr, target_ulong opcode, target_ulong *args) { target_ulong liobn = args[0]; diff --git a/hw/ppc/spapr_pci.c b/hw/ppc/spapr_pci.c index d4a6150527..a8f79d8003 100644 --- a/hw/ppc/spapr_pci.c +++ b/hw/ppc/spapr_pci.c @@ -23,6 +23,7 @@ * THE SOFTWARE. */ #include "hw/hw.h" +#include "hw/sysbus.h" #include "hw/pci/pci.h" #include "hw/pci/msi.h" #include "hw/pci/msix.h" @@ -35,6 +36,7 @@ #include "qemu/error-report.h" #include "qapi/qmp/qerror.h" +#include "hw/pci/pci_bridge.h" #include "hw/pci/pci_bus.h" #include "hw/ppc/spapr_drc.h" #include "sysemu/device_tree.h" @@ -50,6 +52,8 @@ #define RTAS_TYPE_MSI 1 #define RTAS_TYPE_MSIX 2 +#define FDT_NAME_MAX 128 + #define _FDT(exp) \ do { \ int ret = (exp); \ @@ -58,7 +62,7 @@ } \ } while (0) -sPAPRPHBState *spapr_pci_find_phb(sPAPREnvironment *spapr, uint64_t buid) +sPAPRPHBState *spapr_pci_find_phb(sPAPRMachineState *spapr, uint64_t buid) { sPAPRPHBState *sphb; @@ -72,7 +76,7 @@ sPAPRPHBState *spapr_pci_find_phb(sPAPREnvironment *spapr, uint64_t buid) return NULL; } -PCIDevice *spapr_pci_find_dev(sPAPREnvironment *spapr, uint64_t buid, +PCIDevice *spapr_pci_find_dev(sPAPRMachineState *spapr, uint64_t buid, uint32_t config_addr) { sPAPRPHBState *sphb = spapr_pci_find_phb(spapr, buid); @@ -93,7 +97,7 @@ static uint32_t rtas_pci_cfgaddr(uint32_t arg) return ((arg >> 20) & 0xf00) | (arg & 0xff); } -static void finish_read_pci_config(sPAPREnvironment *spapr, uint64_t buid, +static void finish_read_pci_config(sPAPRMachineState *spapr, uint64_t buid, uint32_t addr, uint32_t size, target_ulong rets) { @@ -123,7 +127,7 @@ static void finish_read_pci_config(sPAPREnvironment *spapr, uint64_t buid, rtas_st(rets, 1, val); } -static void rtas_ibm_read_pci_config(PowerPCCPU *cpu, sPAPREnvironment *spapr, +static void rtas_ibm_read_pci_config(PowerPCCPU *cpu, sPAPRMachineState *spapr, uint32_t token, uint32_t nargs, target_ulong args, uint32_t nret, target_ulong rets) @@ -143,7 +147,7 @@ static void rtas_ibm_read_pci_config(PowerPCCPU *cpu, sPAPREnvironment *spapr, finish_read_pci_config(spapr, buid, addr, size, rets); } -static void rtas_read_pci_config(PowerPCCPU *cpu, sPAPREnvironment *spapr, +static void rtas_read_pci_config(PowerPCCPU *cpu, sPAPRMachineState *spapr, uint32_t token, uint32_t nargs, target_ulong args, uint32_t nret, target_ulong rets) @@ -161,7 +165,7 @@ static void rtas_read_pci_config(PowerPCCPU *cpu, sPAPREnvironment *spapr, finish_read_pci_config(spapr, 0, addr, size, rets); } -static void finish_write_pci_config(sPAPREnvironment *spapr, uint64_t buid, +static void finish_write_pci_config(sPAPRMachineState *spapr, uint64_t buid, uint32_t addr, uint32_t size, uint32_t val, target_ulong rets) { @@ -189,7 +193,7 @@ static void finish_write_pci_config(sPAPREnvironment *spapr, uint64_t buid, rtas_st(rets, 0, RTAS_OUT_SUCCESS); } -static void rtas_ibm_write_pci_config(PowerPCCPU *cpu, sPAPREnvironment *spapr, +static void rtas_ibm_write_pci_config(PowerPCCPU *cpu, sPAPRMachineState *spapr, uint32_t token, uint32_t nargs, target_ulong args, uint32_t nret, target_ulong rets) @@ -210,7 +214,7 @@ static void rtas_ibm_write_pci_config(PowerPCCPU *cpu, sPAPREnvironment *spapr, finish_write_pci_config(spapr, buid, addr, size, val, rets); } -static void rtas_write_pci_config(PowerPCCPU *cpu, sPAPREnvironment *spapr, +static void rtas_write_pci_config(PowerPCCPU *cpu, sPAPRMachineState *spapr, uint32_t token, uint32_t nargs, target_ulong args, uint32_t nret, target_ulong rets) @@ -259,7 +263,7 @@ static void spapr_msi_setmsg(PCIDevice *pdev, hwaddr addr, bool msix, } } -static void rtas_ibm_change_msi(PowerPCCPU *cpu, sPAPREnvironment *spapr, +static void rtas_ibm_change_msi(PowerPCCPU *cpu, sPAPRMachineState *spapr, uint32_t token, uint32_t nargs, target_ulong args, uint32_t nret, target_ulong rets) @@ -377,7 +381,7 @@ out: } static void rtas_ibm_query_interrupt_source_number(PowerPCCPU *cpu, - sPAPREnvironment *spapr, + sPAPRMachineState *spapr, uint32_t token, uint32_t nargs, target_ulong args, @@ -418,13 +422,14 @@ static void rtas_ibm_query_interrupt_source_number(PowerPCCPU *cpu, } static void rtas_ibm_set_eeh_option(PowerPCCPU *cpu, - sPAPREnvironment *spapr, + sPAPRMachineState *spapr, uint32_t token, uint32_t nargs, target_ulong args, uint32_t nret, target_ulong rets) { sPAPRPHBState *sphb; sPAPRPHBClass *spc; + PCIDevice *pdev; uint32_t addr, option; uint64_t buid; int ret; @@ -442,6 +447,12 @@ static void rtas_ibm_set_eeh_option(PowerPCCPU *cpu, goto param_error_exit; } + pdev = pci_find_device(PCI_HOST_BRIDGE(sphb)->bus, + (addr >> 16) & 0xFF, (addr >> 8) & 0xFF); + if (!pdev || !object_dynamic_cast(OBJECT(pdev), "vfio-pci")) { + goto param_error_exit; + } + spc = SPAPR_PCI_HOST_BRIDGE_GET_CLASS(sphb); if (!spc->eeh_set_option) { goto param_error_exit; @@ -456,7 +467,7 @@ param_error_exit: } static void rtas_ibm_get_config_addr_info2(PowerPCCPU *cpu, - sPAPREnvironment *spapr, + sPAPRMachineState *spapr, uint32_t token, uint32_t nargs, target_ulong args, uint32_t nret, target_ulong rets) @@ -512,7 +523,7 @@ param_error_exit: } static void rtas_ibm_read_slot_reset_state2(PowerPCCPU *cpu, - sPAPREnvironment *spapr, + sPAPRMachineState *spapr, uint32_t token, uint32_t nargs, target_ulong args, uint32_t nret, target_ulong rets) @@ -556,7 +567,7 @@ param_error_exit: } static void rtas_ibm_set_slot_reset(PowerPCCPU *cpu, - sPAPREnvironment *spapr, + sPAPRMachineState *spapr, uint32_t token, uint32_t nargs, target_ulong args, uint32_t nret, target_ulong rets) @@ -592,7 +603,7 @@ param_error_exit: } static void rtas_ibm_configure_pe(PowerPCCPU *cpu, - sPAPREnvironment *spapr, + sPAPRMachineState *spapr, uint32_t token, uint32_t nargs, target_ulong args, uint32_t nret, target_ulong rets) @@ -627,7 +638,7 @@ param_error_exit: /* To support it later */ static void rtas_ibm_slot_error_detail(PowerPCCPU *cpu, - sPAPREnvironment *spapr, + sPAPRMachineState *spapr, uint32_t token, uint32_t nargs, target_ulong args, uint32_t nret, target_ulong rets) @@ -718,6 +729,7 @@ static PCIINTxRoute spapr_route_intx_pin_to_irq(void *opaque, int pin) static void spapr_msi_write(void *opaque, hwaddr addr, uint64_t data, unsigned size) { + sPAPRMachineState *spapr = SPAPR_MACHINE(qdev_get_machine()); uint32_t irq = data; trace_spapr_pci_msi_write(addr, data, irq); @@ -742,6 +754,60 @@ static AddressSpace *spapr_pci_dma_iommu(PCIBus *bus, void *opaque, int devfn) return &phb->iommu_as; } +static char *spapr_phb_vfio_get_loc_code(sPAPRPHBState *sphb, PCIDevice *pdev) +{ + char *path = NULL, *buf = NULL, *host = NULL; + + /* Get the PCI VFIO host id */ + host = object_property_get_str(OBJECT(pdev), "host", NULL); + if (!host) { + goto err_out; + } + + /* Construct the path of the file that will give us the DT location */ + path = g_strdup_printf("/sys/bus/pci/devices/%s/devspec", host); + g_free(host); + if (!path || !g_file_get_contents(path, &buf, NULL, NULL)) { + goto err_out; + } + g_free(path); + + /* Construct and read from host device tree the loc-code */ + path = g_strdup_printf("/proc/device-tree%s/ibm,loc-code", buf); + g_free(buf); + if (!path || !g_file_get_contents(path, &buf, NULL, NULL)) { + goto err_out; + } + return buf; + +err_out: + g_free(path); + return NULL; +} + +static char *spapr_phb_get_loc_code(sPAPRPHBState *sphb, PCIDevice *pdev) +{ + char *buf; + const char *devtype = "qemu"; + uint32_t busnr = pci_bus_num(PCI_BUS(qdev_get_parent_bus(DEVICE(pdev)))); + + if (object_dynamic_cast(OBJECT(pdev), "vfio-pci")) { + buf = spapr_phb_vfio_get_loc_code(sphb, pdev); + if (buf) { + return buf; + } + devtype = "vfio"; + } + /* + * For emulated devices and VFIO-failure case, make up + * the loc-code. + */ + buf = g_strdup_printf("%s_%s:%04x:%02x:%02x.%x", + devtype, pdev->name, sphb->index, busnr, + PCI_SLOT(pdev->devfn), PCI_FUNC(pdev->devfn)); + return buf; +} + /* Macros to operate with address in OF binding to PCI */ #define b_x(x, p, l) (((x) & ((1<<(l))-1)) << (p)) #define b_n(x) b_x((x), 31, 1) /* 0 if relocatable */ @@ -786,7 +852,13 @@ typedef struct ResourceProps { * phys.hi = 0xYYXXXXZZ, where: * 0xYY = npt000ss * ||| | - * ||| +-- space code: 1 if IO region, 2 if MEM region + * ||| +-- space code + * ||| | + * ||| + 00 if configuration space + * ||| + 01 if IO region, + * ||| + 10 if 32-bit MEM region + * ||| + 11 if 64-bit MEM region + * ||| * ||+------ for non-relocatable IO: 1 if aliased * || for relocatable IO: 1 if below 64KB * || for MEM: 1 if below 1MB @@ -846,6 +918,8 @@ static void populate_resource_props(PCIDevice *d, ResourceProps *rp) reg->phys_hi = cpu_to_be32(dev_id | b_rrrrrrrr(pci_bar(d, i))); if (d->io_regions[i].type & PCI_BASE_ADDRESS_SPACE_IO) { reg->phys_hi |= cpu_to_be32(b_ss(1)); + } else if (d->io_regions[i].type & PCI_BASE_ADDRESS_MEM_TYPE_64) { + reg->phys_hi |= cpu_to_be32(b_ss(3)); } else { reg->phys_hi |= cpu_to_be32(b_ss(2)); } @@ -870,13 +944,17 @@ static void populate_resource_props(PCIDevice *d, ResourceProps *rp) rp->assigned_len = assigned_idx * sizeof(ResourceFields); } +static uint32_t spapr_phb_get_pci_drc_index(sPAPRPHBState *phb, + PCIDevice *pdev); + static int spapr_populate_pci_child_dt(PCIDevice *dev, void *fdt, int offset, - int phb_index, int drc_index, - const char *drc_name) + sPAPRPHBState *sphb) { ResourceProps rp; bool is_bridge = false; - int pci_status; + int pci_status, err; + char *buf = NULL; + uint32_t drc_index = spapr_phb_get_pci_drc_index(sphb, dev); if (pci_default_read_config(dev, PCI_HEADER_TYPE, 1) == PCI_HEADER_TYPE_BRIDGE) { @@ -891,8 +969,7 @@ static int spapr_populate_pci_child_dt(PCIDevice *dev, void *fdt, int offset, _FDT(fdt_setprop_cell(fdt, offset, "revision-id", pci_default_read_config(dev, PCI_REVISION_ID, 1))); _FDT(fdt_setprop_cell(fdt, offset, "class-code", - pci_default_read_config(dev, PCI_CLASS_DEVICE, 2) - << 8)); + pci_default_read_config(dev, PCI_CLASS_PROG, 3))); if (pci_default_read_config(dev, PCI_INTERRUPT_PIN, 1)) { _FDT(fdt_setprop_cell(fdt, offset, "interrupts", pci_default_read_config(dev, PCI_INTERRUPT_PIN, 1))); @@ -938,8 +1015,21 @@ static int spapr_populate_pci_child_dt(PCIDevice *dev, void *fdt, int offset, * processed by OF beforehand */ _FDT(fdt_setprop_string(fdt, offset, "name", "pci")); - _FDT(fdt_setprop(fdt, offset, "ibm,loc-code", drc_name, strlen(drc_name))); - _FDT(fdt_setprop_cell(fdt, offset, "ibm,my-drc-index", drc_index)); + buf = spapr_phb_get_loc_code(sphb, dev); + if (!buf) { + error_report("Failed setting the ibm,loc-code"); + return -1; + } + + err = fdt_setprop_string(fdt, offset, "ibm,loc-code", buf); + g_free(buf); + if (err < 0) { + return err; + } + + if (drc_index) { + _FDT(fdt_setprop_cell(fdt, offset, "ibm,my-drc-index", drc_index)); + } _FDT(fdt_setprop_cell(fdt, offset, "#address-cells", RESOURCE_CELLS_ADDRESS)); @@ -957,29 +1047,27 @@ static int spapr_populate_pci_child_dt(PCIDevice *dev, void *fdt, int offset, } /* create OF node for pci device and required OF DT properties */ -static void *spapr_create_pci_child_dt(sPAPRPHBState *phb, PCIDevice *dev, - int drc_index, const char *drc_name, - int *dt_offset) +static int spapr_create_pci_child_dt(sPAPRPHBState *phb, PCIDevice *dev, + void *fdt, int node_offset) { - void *fdt; - int offset, ret, fdt_size; + int offset, ret; int slot = PCI_SLOT(dev->devfn); int func = PCI_FUNC(dev->devfn); - char nodename[512]; + char nodename[FDT_NAME_MAX]; - fdt = create_device_tree(&fdt_size); if (func != 0) { - sprintf(nodename, "pci@%d,%d", slot, func); + snprintf(nodename, FDT_NAME_MAX, "pci@%x,%x", slot, func); } else { - sprintf(nodename, "pci@%d", slot); + snprintf(nodename, FDT_NAME_MAX, "pci@%x", slot); } - offset = fdt_add_subnode(fdt, 0, nodename); - ret = spapr_populate_pci_child_dt(dev, fdt, offset, phb->index, drc_index, - drc_name); - g_assert(!ret); + offset = fdt_add_subnode(fdt, node_offset, nodename); + ret = spapr_populate_pci_child_dt(dev, fdt, offset, phb); - *dt_offset = offset; - return fdt; + g_assert(!ret); + if (ret) { + return 0; + } + return offset; } static void spapr_phb_add_pci_device(sPAPRDRConnector *drc, @@ -989,22 +1077,21 @@ static void spapr_phb_add_pci_device(sPAPRDRConnector *drc, { sPAPRDRConnectorClass *drck = SPAPR_DR_CONNECTOR_GET_CLASS(drc); DeviceState *dev = DEVICE(pdev); - int drc_index = drck->get_index(drc); - const char *drc_name = drck->get_name(drc); void *fdt = NULL; - int fdt_start_offset = 0; + int fdt_start_offset = 0, fdt_size; - /* boot-time devices get their device tree node created by SLOF, but for - * hotplugged devices we need QEMU to generate it so the guest can fetch - * it via RTAS - */ if (dev->hotplugged) { - fdt = spapr_create_pci_child_dt(phb, pdev, drc_index, drc_name, - &fdt_start_offset); + fdt = create_device_tree(&fdt_size); + fdt_start_offset = spapr_create_pci_child_dt(phb, pdev, fdt, 0); + if (!fdt_start_offset) { + error_setg(errp, "Failed to create pci child device tree node"); + goto out; + } } drck->attach(drc, DEVICE(pdev), fdt, fdt_start_offset, !dev->hotplugged, errp); +out: if (*errp) { g_free(fdt); } @@ -1046,6 +1133,20 @@ static sPAPRDRConnector *spapr_phb_get_pci_drc(sPAPRPHBState *phb, pdev->devfn); } +static uint32_t spapr_phb_get_pci_drc_index(sPAPRPHBState *phb, + PCIDevice *pdev) +{ + sPAPRDRConnector *drc = spapr_phb_get_pci_drc(phb, pdev); + sPAPRDRConnectorClass *drck; + + if (!drc) { + return 0; + } + + drck = SPAPR_DR_CONNECTOR_GET_CLASS(drc); + return drck->get_index(drc); +} + static void spapr_phb_hot_plug_child(HotplugHandler *plug_handler, DeviceState *plugged_dev, Error **errp) { @@ -1110,6 +1211,7 @@ static void spapr_phb_hot_unplug_child(HotplugHandler *plug_handler, static void spapr_phb_realize(DeviceState *dev, Error **errp) { + sPAPRMachineState *spapr = SPAPR_MACHINE(qdev_get_machine()); SysBusDevice *s = SYS_BUS_DEVICE(dev); sPAPRPHBState *sphb = SPAPR_PCI_HOST_BRIDGE(s); PCIHostState *phb = PCI_HOST_BRIDGE(s); @@ -1351,34 +1453,28 @@ static const VMStateDescription vmstate_spapr_pci_msi = { }, }; -static void spapr_pci_fill_msi_devs(gpointer key, gpointer value, - gpointer opaque) -{ - sPAPRPHBState *sphb = opaque; - - sphb->msi_devs[sphb->msi_devs_num].key = *(uint32_t *)key; - sphb->msi_devs[sphb->msi_devs_num].value = *(spapr_pci_msi *)value; - sphb->msi_devs_num++; -} - static void spapr_pci_pre_save(void *opaque) { sPAPRPHBState *sphb = opaque; - int msi_devs_num; + GHashTableIter iter; + gpointer key, value; + int i; if (sphb->msi_devs) { g_free(sphb->msi_devs); sphb->msi_devs = NULL; } - sphb->msi_devs_num = 0; - msi_devs_num = g_hash_table_size(sphb->msi); - if (!msi_devs_num) { + sphb->msi_devs_num = g_hash_table_size(sphb->msi); + if (!sphb->msi_devs_num) { return; } - sphb->msi_devs = g_malloc(msi_devs_num * sizeof(spapr_pci_msi_mig)); + sphb->msi_devs = g_malloc(sphb->msi_devs_num * sizeof(spapr_pci_msi_mig)); - g_hash_table_foreach(sphb->msi, spapr_pci_fill_msi_devs, sphb); - assert(sphb->msi_devs_num == msi_devs_num); + g_hash_table_iter_init(&iter, sphb->msi); + for (i = 0; g_hash_table_iter_next(&iter, &key, &value); ++i) { + sphb->msi_devs[i].key = *(uint32_t *) key; + sphb->msi_devs[i].value = *(spapr_pci_msi *) value; + } } static int spapr_pci_post_load(void *opaque, int version_id) @@ -1464,7 +1560,7 @@ static const TypeInfo spapr_phb_info = { } }; -PCIHostState *spapr_create_phb(sPAPREnvironment *spapr, int index) +PCIHostState *spapr_create_phb(sPAPRMachineState *spapr, int index) { DeviceState *dev; @@ -1475,12 +1571,90 @@ PCIHostState *spapr_create_phb(sPAPREnvironment *spapr, int index) return PCI_HOST_BRIDGE(dev); } +typedef struct sPAPRFDT { + void *fdt; + int node_off; + sPAPRPHBState *sphb; +} sPAPRFDT; + +static void spapr_populate_pci_devices_dt(PCIBus *bus, PCIDevice *pdev, + void *opaque) +{ + PCIBus *sec_bus; + sPAPRFDT *p = opaque; + int offset; + sPAPRFDT s_fdt; + + offset = spapr_create_pci_child_dt(p->sphb, pdev, p->fdt, p->node_off); + if (!offset) { + error_report("Failed to create pci child device tree node"); + return; + } + + if ((pci_default_read_config(pdev, PCI_HEADER_TYPE, 1) != + PCI_HEADER_TYPE_BRIDGE)) { + return; + } + + sec_bus = pci_bridge_get_sec_bus(PCI_BRIDGE(pdev)); + if (!sec_bus) { + return; + } + + s_fdt.fdt = p->fdt; + s_fdt.node_off = offset; + s_fdt.sphb = p->sphb; + pci_for_each_device(sec_bus, pci_bus_num(sec_bus), + spapr_populate_pci_devices_dt, + &s_fdt); +} + +static void spapr_phb_pci_enumerate_bridge(PCIBus *bus, PCIDevice *pdev, + void *opaque) +{ + unsigned int *bus_no = opaque; + unsigned int primary = *bus_no; + unsigned int subordinate = 0xff; + PCIBus *sec_bus = NULL; + + if ((pci_default_read_config(pdev, PCI_HEADER_TYPE, 1) != + PCI_HEADER_TYPE_BRIDGE)) { + return; + } + + (*bus_no)++; + pci_default_write_config(pdev, PCI_PRIMARY_BUS, primary, 1); + pci_default_write_config(pdev, PCI_SECONDARY_BUS, *bus_no, 1); + pci_default_write_config(pdev, PCI_SUBORDINATE_BUS, *bus_no, 1); + + sec_bus = pci_bridge_get_sec_bus(PCI_BRIDGE(pdev)); + if (!sec_bus) { + return; + } + + pci_default_write_config(pdev, PCI_SUBORDINATE_BUS, subordinate, 1); + pci_for_each_device(sec_bus, pci_bus_num(sec_bus), + spapr_phb_pci_enumerate_bridge, bus_no); + pci_default_write_config(pdev, PCI_SUBORDINATE_BUS, *bus_no, 1); +} + +static void spapr_phb_pci_enumerate(sPAPRPHBState *phb) +{ + PCIBus *bus = PCI_HOST_BRIDGE(phb)->bus; + unsigned int bus_no = 0; + + pci_for_each_device(bus, pci_bus_num(bus), + spapr_phb_pci_enumerate_bridge, + &bus_no); + +} + int spapr_populate_pci_dt(sPAPRPHBState *phb, uint32_t xics_phandle, void *fdt) { int bus_off, i, j, ret; - char nodename[256]; + char nodename[FDT_NAME_MAX]; uint32_t bus_range[] = { cpu_to_be32(0), cpu_to_be32(0xff) }; const uint64_t mmiosize = memory_region_size(&phb->memwindow); const uint64_t w32max = (1ULL << 32) - SPAPR_PCI_MEM_WIN_BUS_OFFSET; @@ -1514,9 +1688,11 @@ int spapr_populate_pci_dt(sPAPRPHBState *phb, cpu_to_be32(b_ddddd(-1)|b_fff(0)), 0x0, 0x0, cpu_to_be32(-1)}; uint32_t interrupt_map[PCI_SLOT_MAX * PCI_NUM_PINS][7]; sPAPRTCETable *tcet; + PCIBus *bus = PCI_HOST_BRIDGE(phb)->bus; + sPAPRFDT s_fdt; /* Start populating the FDT */ - sprintf(nodename, "pci@%" PRIx64, phb->buid); + snprintf(nodename, FDT_NAME_MAX, "pci@%" PRIx64, phb->buid); bus_off = fdt_add_subnode(fdt, 0, nodename); if (bus_off < 0) { return bus_off; @@ -1563,6 +1739,18 @@ int spapr_populate_pci_dt(sPAPRPHBState *phb, tcet->liobn, tcet->bus_offset, tcet->nb_table << tcet->page_shift); + /* Walk the bridges and program the bus numbers*/ + spapr_phb_pci_enumerate(phb); + _FDT(fdt_setprop_cell(fdt, bus_off, "qemu,phb-enumerated", 0x1)); + + /* Populate tree nodes with PCI devices attached */ + s_fdt.fdt = fdt; + s_fdt.node_off = bus_off; + s_fdt.sphb = phb; + pci_for_each_device(bus, pci_bus_num(bus), + spapr_populate_pci_devices_dt, + &s_fdt); + ret = spapr_drc_populate_dt(fdt, bus_off, OBJECT(phb), SPAPR_DR_CONNECTOR_TYPE_PCI); if (ret) { @@ -1631,6 +1819,7 @@ static int spapr_switch_one_vga(DeviceState *dev, void *opaque) void spapr_pci_switch_vga(bool big_endian) { + sPAPRMachineState *spapr = SPAPR_MACHINE(qdev_get_machine()); sPAPRPHBState *sphb; /* diff --git a/hw/ppc/spapr_pci_vfio.c b/hw/ppc/spapr_pci_vfio.c index 99a1be5113..cca45ed312 100644 --- a/hw/ppc/spapr_pci_vfio.c +++ b/hw/ppc/spapr_pci_vfio.c @@ -19,6 +19,7 @@ #include "hw/ppc/spapr.h" #include "hw/pci-host/spapr.h" +#include "hw/pci/msix.h" #include "linux/vfio.h" #include "hw/vfio/vfio.h" @@ -71,9 +72,26 @@ static void spapr_phb_vfio_finish_realize(sPAPRPHBState *sphb, Error **errp) spapr_tce_get_iommu(tcet)); } +static void spapr_phb_vfio_eeh_reenable(sPAPRPHBVFIOState *svphb) +{ + struct vfio_eeh_pe_op op = { + .argsz = sizeof(op), + .op = VFIO_EEH_PE_ENABLE + }; + + vfio_container_ioctl(&svphb->phb.iommu_as, + svphb->iommugroupid, VFIO_EEH_PE_OP, &op); +} + static void spapr_phb_vfio_reset(DeviceState *qdev) { - /* Do nothing */ + /* + * The PE might be in frozen state. To reenable the EEH + * functionality on it will clean the frozen state, which + * ensures that the contained PCI devices will work properly + * after reboot. + */ + spapr_phb_vfio_eeh_reenable(SPAPR_PCI_VFIO_HOST_BRIDGE(qdev)); } static int spapr_phb_vfio_eeh_set_option(sPAPRPHBState *sphb, @@ -142,6 +160,49 @@ static int spapr_phb_vfio_eeh_get_state(sPAPRPHBState *sphb, int *state) return RTAS_OUT_SUCCESS; } +static void spapr_phb_vfio_eeh_clear_dev_msix(PCIBus *bus, + PCIDevice *pdev, + void *opaque) +{ + /* Check if the device is VFIO PCI device */ + if (!object_dynamic_cast(OBJECT(pdev), "vfio-pci")) { + return; + } + + /* + * The MSIx table will be cleaned out by reset. We need + * disable it so that it can be reenabled properly. Also, + * the cached MSIx table should be cleared as it's not + * reflecting the contents in hardware. + */ + if (msix_enabled(pdev)) { + uint16_t flags; + + flags = pci_host_config_read_common(pdev, + pdev->msix_cap + PCI_MSIX_FLAGS, + pci_config_size(pdev), 2); + flags &= ~PCI_MSIX_FLAGS_ENABLE; + pci_host_config_write_common(pdev, + pdev->msix_cap + PCI_MSIX_FLAGS, + pci_config_size(pdev), flags, 2); + } + + msix_reset(pdev); +} + +static void spapr_phb_vfio_eeh_clear_bus_msix(PCIBus *bus, void *opaque) +{ + pci_for_each_device(bus, pci_bus_num(bus), + spapr_phb_vfio_eeh_clear_dev_msix, NULL); +} + +static void spapr_phb_vfio_eeh_pre_reset(sPAPRPHBState *sphb) +{ + PCIHostState *phb = PCI_HOST_BRIDGE(sphb); + + pci_for_each_bus(phb->bus, spapr_phb_vfio_eeh_clear_bus_msix, NULL); +} + static int spapr_phb_vfio_eeh_reset(sPAPRPHBState *sphb, int option) { sPAPRPHBVFIOState *svphb = SPAPR_PCI_VFIO_HOST_BRIDGE(sphb); @@ -153,9 +214,11 @@ static int spapr_phb_vfio_eeh_reset(sPAPRPHBState *sphb, int option) op.op = VFIO_EEH_PE_RESET_DEACTIVATE; break; case RTAS_SLOT_RESET_HOT: + spapr_phb_vfio_eeh_pre_reset(sphb); op.op = VFIO_EEH_PE_RESET_HOT; break; case RTAS_SLOT_RESET_FUNDAMENTAL: + spapr_phb_vfio_eeh_pre_reset(sphb); op.op = VFIO_EEH_PE_RESET_FUNDAMENTAL; break; default: diff --git a/hw/ppc/spapr_rtas.c b/hw/ppc/spapr_rtas.c index fa28d43f81..2986f94f03 100644 --- a/hw/ppc/spapr_rtas.c +++ b/hw/ppc/spapr_rtas.c @@ -29,6 +29,7 @@ #include "sysemu/char.h" #include "hw/qdev.h" #include "sysemu/device_tree.h" +#include "sysemu/cpus.h" #include "hw/ppc/spapr.h" #include "hw/ppc/spapr_vio.h" @@ -47,7 +48,7 @@ do { } while (0) #endif -static sPAPRConfigureConnectorState *spapr_ccs_find(sPAPREnvironment *spapr, +static sPAPRConfigureConnectorState *spapr_ccs_find(sPAPRMachineState *spapr, uint32_t drc_index) { sPAPRConfigureConnectorState *ccs = NULL; @@ -61,14 +62,14 @@ static sPAPRConfigureConnectorState *spapr_ccs_find(sPAPREnvironment *spapr, return ccs; } -static void spapr_ccs_add(sPAPREnvironment *spapr, +static void spapr_ccs_add(sPAPRMachineState *spapr, sPAPRConfigureConnectorState *ccs) { g_assert(!spapr_ccs_find(spapr, ccs->drc_index)); QTAILQ_INSERT_HEAD(&spapr->ccs_list, ccs, next); } -static void spapr_ccs_remove(sPAPREnvironment *spapr, +static void spapr_ccs_remove(sPAPRMachineState *spapr, sPAPRConfigureConnectorState *ccs) { QTAILQ_REMOVE(&spapr->ccs_list, ccs, next); @@ -77,7 +78,7 @@ static void spapr_ccs_remove(sPAPREnvironment *spapr, void spapr_ccs_reset_hook(void *opaque) { - sPAPREnvironment *spapr = opaque; + sPAPRMachineState *spapr = opaque; sPAPRConfigureConnectorState *ccs, *ccs_tmp; QTAILQ_FOREACH_SAFE(ccs, &spapr->ccs_list, next, ccs_tmp) { @@ -85,7 +86,7 @@ void spapr_ccs_reset_hook(void *opaque) } } -static void rtas_display_character(PowerPCCPU *cpu, sPAPREnvironment *spapr, +static void rtas_display_character(PowerPCCPU *cpu, sPAPRMachineState *spapr, uint32_t token, uint32_t nargs, target_ulong args, uint32_t nret, target_ulong rets) @@ -101,7 +102,7 @@ static void rtas_display_character(PowerPCCPU *cpu, sPAPREnvironment *spapr, } } -static void rtas_power_off(PowerPCCPU *cpu, sPAPREnvironment *spapr, +static void rtas_power_off(PowerPCCPU *cpu, sPAPRMachineState *spapr, uint32_t token, uint32_t nargs, target_ulong args, uint32_t nret, target_ulong rets) { @@ -113,7 +114,7 @@ static void rtas_power_off(PowerPCCPU *cpu, sPAPREnvironment *spapr, rtas_st(rets, 0, RTAS_OUT_SUCCESS); } -static void rtas_system_reboot(PowerPCCPU *cpu, sPAPREnvironment *spapr, +static void rtas_system_reboot(PowerPCCPU *cpu, sPAPRMachineState *spapr, uint32_t token, uint32_t nargs, target_ulong args, uint32_t nret, target_ulong rets) @@ -127,7 +128,7 @@ static void rtas_system_reboot(PowerPCCPU *cpu, sPAPREnvironment *spapr, } static void rtas_query_cpu_stopped_state(PowerPCCPU *cpu_, - sPAPREnvironment *spapr, + sPAPRMachineState *spapr, uint32_t token, uint32_t nargs, target_ulong args, uint32_t nret, target_ulong rets) @@ -157,7 +158,7 @@ static void rtas_query_cpu_stopped_state(PowerPCCPU *cpu_, rtas_st(rets, 0, RTAS_OUT_PARAM_ERROR); } -static void rtas_start_cpu(PowerPCCPU *cpu_, sPAPREnvironment *spapr, +static void rtas_start_cpu(PowerPCCPU *cpu_, sPAPRMachineState *spapr, uint32_t token, uint32_t nargs, target_ulong args, uint32_t nret, target_ulong rets) @@ -204,7 +205,7 @@ static void rtas_start_cpu(PowerPCCPU *cpu_, sPAPREnvironment *spapr, rtas_st(rets, 0, RTAS_OUT_PARAM_ERROR); } -static void rtas_stop_self(PowerPCCPU *cpu, sPAPREnvironment *spapr, +static void rtas_stop_self(PowerPCCPU *cpu, sPAPRMachineState *spapr, uint32_t token, uint32_t nargs, target_ulong args, uint32_t nret, target_ulong rets) @@ -227,7 +228,7 @@ static void rtas_stop_self(PowerPCCPU *cpu, sPAPREnvironment *spapr, } static void rtas_ibm_get_system_parameter(PowerPCCPU *cpu, - sPAPREnvironment *spapr, + sPAPRMachineState *spapr, uint32_t token, uint32_t nargs, target_ulong args, uint32_t nret, target_ulong rets) @@ -262,7 +263,7 @@ static void rtas_ibm_get_system_parameter(PowerPCCPU *cpu, } static void rtas_ibm_set_system_parameter(PowerPCCPU *cpu, - sPAPREnvironment *spapr, + sPAPRMachineState *spapr, uint32_t token, uint32_t nargs, target_ulong args, uint32_t nret, target_ulong rets) @@ -282,7 +283,7 @@ static void rtas_ibm_set_system_parameter(PowerPCCPU *cpu, } static void rtas_ibm_os_term(PowerPCCPU *cpu, - sPAPREnvironment *spapr, + sPAPRMachineState *spapr, uint32_t token, uint32_t nargs, target_ulong args, uint32_t nret, target_ulong rets) @@ -294,7 +295,7 @@ static void rtas_ibm_os_term(PowerPCCPU *cpu, rtas_st(rets, 0, ret); } -static void rtas_set_power_level(PowerPCCPU *cpu, sPAPREnvironment *spapr, +static void rtas_set_power_level(PowerPCCPU *cpu, sPAPRMachineState *spapr, uint32_t token, uint32_t nargs, target_ulong args, uint32_t nret, target_ulong rets) @@ -319,7 +320,7 @@ static void rtas_set_power_level(PowerPCCPU *cpu, sPAPREnvironment *spapr, rtas_st(rets, 1, 100); } -static void rtas_get_power_level(PowerPCCPU *cpu, sPAPREnvironment *spapr, +static void rtas_get_power_level(PowerPCCPU *cpu, sPAPRMachineState *spapr, uint32_t token, uint32_t nargs, target_ulong args, uint32_t nret, target_ulong rets) @@ -356,7 +357,7 @@ static bool sensor_type_is_dr(uint32_t sensor_type) return false; } -static void rtas_set_indicator(PowerPCCPU *cpu, sPAPREnvironment *spapr, +static void rtas_set_indicator(PowerPCCPU *cpu, sPAPRMachineState *spapr, uint32_t token, uint32_t nargs, target_ulong args, uint32_t nret, target_ulong rets) @@ -427,7 +428,7 @@ out_unimplemented: rtas_st(rets, 0, RTAS_OUT_NOT_SUPPORTED); } -static void rtas_get_sensor_state(PowerPCCPU *cpu, sPAPREnvironment *spapr, +static void rtas_get_sensor_state(PowerPCCPU *cpu, sPAPRMachineState *spapr, uint32_t token, uint32_t nargs, target_ulong args, uint32_t nret, target_ulong rets) @@ -481,7 +482,7 @@ static void rtas_get_sensor_state(PowerPCCPU *cpu, sPAPREnvironment *spapr, #define CC_WA_LEN 4096 static void rtas_ibm_configure_connector(PowerPCCPU *cpu, - sPAPREnvironment *spapr, + sPAPRMachineState *spapr, uint32_t token, uint32_t nargs, target_ulong args, uint32_t nret, target_ulong rets) @@ -601,7 +602,7 @@ static struct rtas_call { spapr_rtas_fn fn; } rtas_table[RTAS_TOKEN_MAX - RTAS_TOKEN_BASE]; -target_ulong spapr_rtas_call(PowerPCCPU *cpu, sPAPREnvironment *spapr, +target_ulong spapr_rtas_call(PowerPCCPU *cpu, sPAPRMachineState *spapr, uint32_t token, uint32_t nargs, target_ulong args, uint32_t nret, target_ulong rets) { @@ -651,6 +652,8 @@ int spapr_rtas_device_tree_setup(void *fdt, hwaddr rtas_addr, { int ret; int i; + uint32_t lrdr_capacity[5]; + MachineState *machine = MACHINE(qdev_get_machine()); ret = fdt_add_mem_rsv(fdt, rtas_addr, rtas_size); if (ret < 0) { @@ -699,6 +702,19 @@ int spapr_rtas_device_tree_setup(void *fdt, hwaddr rtas_addr, } } + + lrdr_capacity[0] = cpu_to_be32(((uint64_t)machine->maxram_size) >> 32); + lrdr_capacity[1] = cpu_to_be32(machine->maxram_size & 0xffffffff); + lrdr_capacity[2] = 0; + lrdr_capacity[3] = cpu_to_be32(SPAPR_MEMORY_BLOCK_SIZE); + lrdr_capacity[4] = cpu_to_be32(max_cpus/smp_threads); + ret = qemu_fdt_setprop(fdt, "/rtas", "ibm,lrdr-capacity", lrdr_capacity, + sizeof(lrdr_capacity)); + if (ret < 0) { + fprintf(stderr, "Couldn't add ibm,lrdr-capacity rtas property\n"); + return ret; + } + return 0; } diff --git a/hw/ppc/spapr_rtc.c b/hw/ppc/spapr_rtc.c index 9da3746e7c..d20b8f270c 100644 --- a/hw/ppc/spapr_rtc.c +++ b/hw/ppc/spapr_rtc.c @@ -76,7 +76,7 @@ int spapr_rtc_import_offset(DeviceState *dev, int64_t legacy_offset) return 0; } -static void rtas_get_time_of_day(PowerPCCPU *cpu, sPAPREnvironment *spapr, +static void rtas_get_time_of_day(PowerPCCPU *cpu, sPAPRMachineState *spapr, uint32_t token, uint32_t nargs, target_ulong args, uint32_t nret, target_ulong rets) @@ -106,7 +106,7 @@ static void rtas_get_time_of_day(PowerPCCPU *cpu, sPAPREnvironment *spapr, rtas_st(rets, 7, ns); } -static void rtas_set_time_of_day(PowerPCCPU *cpu, sPAPREnvironment *spapr, +static void rtas_set_time_of_day(PowerPCCPU *cpu, sPAPRMachineState *spapr, uint32_t token, uint32_t nargs, target_ulong args, uint32_t nret, target_ulong rets) diff --git a/hw/ppc/spapr_vio.c b/hw/ppc/spapr_vio.c index 8b59b64b7e..c51eb8e244 100644 --- a/hw/ppc/spapr_vio.c +++ b/hw/ppc/spapr_vio.c @@ -160,7 +160,7 @@ static int vio_make_devnode(VIOsPAPRDevice *dev, /* * CRQ handling */ -static target_ulong h_reg_crq(PowerPCCPU *cpu, sPAPREnvironment *spapr, +static target_ulong h_reg_crq(PowerPCCPU *cpu, sPAPRMachineState *spapr, target_ulong opcode, target_ulong *args) { target_ulong reg = args[0]; @@ -218,7 +218,7 @@ static target_ulong free_crq(VIOsPAPRDevice *dev) return H_SUCCESS; } -static target_ulong h_free_crq(PowerPCCPU *cpu, sPAPREnvironment *spapr, +static target_ulong h_free_crq(PowerPCCPU *cpu, sPAPRMachineState *spapr, target_ulong opcode, target_ulong *args) { target_ulong reg = args[0]; @@ -232,7 +232,7 @@ static target_ulong h_free_crq(PowerPCCPU *cpu, sPAPREnvironment *spapr, return free_crq(dev); } -static target_ulong h_send_crq(PowerPCCPU *cpu, sPAPREnvironment *spapr, +static target_ulong h_send_crq(PowerPCCPU *cpu, sPAPRMachineState *spapr, target_ulong opcode, target_ulong *args) { target_ulong reg = args[0]; @@ -255,7 +255,7 @@ static target_ulong h_send_crq(PowerPCCPU *cpu, sPAPREnvironment *spapr, return H_HARDWARE; } -static target_ulong h_enable_crq(PowerPCCPU *cpu, sPAPREnvironment *spapr, +static target_ulong h_enable_crq(PowerPCCPU *cpu, sPAPRMachineState *spapr, target_ulong opcode, target_ulong *args) { target_ulong reg = args[0]; @@ -333,7 +333,7 @@ void spapr_vio_set_bypass(VIOsPAPRDevice *dev, bool bypass) dev->tcet->bypass = bypass; } -static void rtas_set_tce_bypass(PowerPCCPU *cpu, sPAPREnvironment *spapr, +static void rtas_set_tce_bypass(PowerPCCPU *cpu, sPAPRMachineState *spapr, uint32_t token, uint32_t nargs, target_ulong args, uint32_t nret, target_ulong rets) @@ -364,7 +364,7 @@ static void rtas_set_tce_bypass(PowerPCCPU *cpu, sPAPREnvironment *spapr, rtas_st(rets, 0, RTAS_OUT_SUCCESS); } -static void rtas_quiesce(PowerPCCPU *cpu, sPAPREnvironment *spapr, +static void rtas_quiesce(PowerPCCPU *cpu, sPAPRMachineState *spapr, uint32_t token, uint32_t nargs, target_ulong args, uint32_t nret, target_ulong rets) @@ -426,6 +426,7 @@ static void spapr_vio_busdev_reset(DeviceState *qdev) static void spapr_vio_busdev_realize(DeviceState *qdev, Error **errp) { + sPAPRMachineState *spapr = SPAPR_MACHINE(qdev_get_machine()); VIOsPAPRDevice *dev = (VIOsPAPRDevice *)qdev; VIOsPAPRDeviceClass *pc = VIO_SPAPR_DEVICE_GET_CLASS(dev); char *id; @@ -491,7 +492,7 @@ static void spapr_vio_busdev_realize(DeviceState *qdev, Error **errp) pc->realize(dev, errp); } -static target_ulong h_vio_signal(PowerPCCPU *cpu, sPAPREnvironment *spapr, +static target_ulong h_vio_signal(PowerPCCPU *cpu, sPAPRMachineState *spapr, target_ulong opcode, target_ulong *args) { diff --git a/hw/ppc/virtex_ml507.c b/hw/ppc/virtex_ml507.c index 439732f7ab..de86f7c64c 100644 --- a/hw/ppc/virtex_ml507.c +++ b/hw/ppc/virtex_ml507.c @@ -197,7 +197,6 @@ static int xilinx_load_device_tree(hwaddr addr, static void virtex_init(MachineState *machine) { ram_addr_t ram_size = machine->ram_size; - const char *cpu_model = machine->cpu_model; const char *kernel_filename = machine->kernel_filename; const char *kernel_cmdline = machine->kernel_cmdline; hwaddr initrd_base = 0; @@ -214,11 +213,11 @@ static void virtex_init(MachineState *machine) int i; /* init CPUs */ - if (cpu_model == NULL) { - cpu_model = "440-Xilinx"; + if (machine->cpu_model == NULL) { + machine->cpu_model = "440-Xilinx"; } - cpu = ppc440_init_xilinx(&ram_size, 1, cpu_model, 400000000); + cpu = ppc440_init_xilinx(&ram_size, 1, machine->cpu_model, 400000000); env = &cpu->env; qemu_register_reset(main_cpu_reset, cpu); diff --git a/hw/s390x/virtio-ccw.c b/hw/s390x/virtio-ccw.c index d631337e11..e345a6e9b8 100644 --- a/hw/s390x/virtio-ccw.c +++ b/hw/s390x/virtio-ccw.c @@ -1316,8 +1316,8 @@ static int virtio_ccw_add_irqfd(VirtioCcwDevice *dev, int n) VirtQueue *vq = virtio_get_queue(vdev, n); EventNotifier *notifier = virtio_queue_get_guest_notifier(vq); - return kvm_irqchip_add_irqfd_notifier(kvm_state, notifier, NULL, - dev->routes.gsi[n]); + return kvm_irqchip_add_irqfd_notifier_gsi(kvm_state, notifier, NULL, + dev->routes.gsi[n]); } static void virtio_ccw_remove_irqfd(VirtioCcwDevice *dev, int n) @@ -1327,8 +1327,8 @@ static void virtio_ccw_remove_irqfd(VirtioCcwDevice *dev, int n) EventNotifier *notifier = virtio_queue_get_guest_notifier(vq); int ret; - ret = kvm_irqchip_remove_irqfd_notifier(kvm_state, notifier, - dev->routes.gsi[n]); + ret = kvm_irqchip_remove_irqfd_notifier_gsi(kvm_state, notifier, + dev->routes.gsi[n]); assert(ret == 0); } diff --git a/hw/vfio/common.c b/hw/vfio/common.c index b1045da857..85ee9b005e 100644 --- a/hw/vfio/common.c +++ b/hw/vfio/common.c @@ -772,11 +772,19 @@ static void vfio_disconnect_container(VFIOGroup *group) if (QLIST_EMPTY(&container->group_list)) { VFIOAddressSpace *space = container->space; + VFIOGuestIOMMU *giommu, *tmp; if (container->iommu_data.release) { container->iommu_data.release(container); } QLIST_REMOVE(container, next); + + QLIST_FOREACH_SAFE(giommu, &container->giommu_list, giommu_next, tmp) { + memory_region_unregister_iommu_notifier(&giommu->n); + QLIST_REMOVE(giommu, giommu_next); + g_free(giommu); + } + trace_vfio_disconnect_container(container->fd); close(container->fd); g_free(container); diff --git a/hw/vfio/pci.c b/hw/vfio/pci.c index e0e339a534..2ed877fe9f 100644 --- a/hw/vfio/pci.c +++ b/hw/vfio/pci.c @@ -597,7 +597,7 @@ static void vfio_add_kvm_msi_virq(VFIOMSIVector *vector, MSIMessage *msg, return; } - if (kvm_irqchip_add_irqfd_notifier(kvm_state, &vector->kvm_interrupt, + if (kvm_irqchip_add_irqfd_notifier_gsi(kvm_state, &vector->kvm_interrupt, NULL, virq) < 0) { kvm_irqchip_release_virq(kvm_state, virq); event_notifier_cleanup(&vector->kvm_interrupt); @@ -609,8 +609,8 @@ static void vfio_add_kvm_msi_virq(VFIOMSIVector *vector, MSIMessage *msg, static void vfio_remove_kvm_msi_virq(VFIOMSIVector *vector) { - kvm_irqchip_remove_irqfd_notifier(kvm_state, &vector->kvm_interrupt, - vector->virq); + kvm_irqchip_remove_irqfd_notifier_gsi(kvm_state, &vector->kvm_interrupt, + vector->virq); kvm_irqchip_release_virq(kvm_state, vector->virq); vector->virq = -1; event_notifier_cleanup(&vector->kvm_interrupt); @@ -939,7 +939,7 @@ static void vfio_pci_load_rom(VFIOPCIDevice *vdev) }; uint64_t size; off_t off = 0; - size_t bytes; + ssize_t bytes; if (ioctl(vdev->vbasedev.fd, VFIO_DEVICE_GET_REGION_INFO, ®_info)) { error_report("vfio: Error getting ROM info: %m"); @@ -2252,6 +2252,33 @@ static int vfio_early_setup_msix(VFIOPCIDevice *vdev) vdev->msix->pba_offset = pba & ~PCI_MSIX_FLAGS_BIRMASK; vdev->msix->entries = (ctrl & PCI_MSIX_FLAGS_QSIZE) + 1; + /* + * Test the size of the pba_offset variable and catch if it extends outside + * of the specified BAR. If it is the case, we need to apply a hardware + * specific quirk if the device is known or we have a broken configuration. + */ + if (vdev->msix->pba_offset >= + vdev->bars[vdev->msix->pba_bar].region.size) { + + PCIDevice *pdev = &vdev->pdev; + uint16_t vendor = pci_get_word(pdev->config + PCI_VENDOR_ID); + uint16_t device = pci_get_word(pdev->config + PCI_DEVICE_ID); + + /* + * Chelsio T5 Virtual Function devices are encoded as 0x58xx for T5 + * adapters. The T5 hardware returns an incorrect value of 0x8000 for + * the VF PBA offset while the BAR itself is only 8k. The correct value + * is 0x1000, so we hard code that here. + */ + if (vendor == PCI_VENDOR_ID_CHELSIO && (device & 0xff00) == 0x5800) { + vdev->msix->pba_offset = 0x1000; + } else { + error_report("vfio: Hardware reports invalid configuration, " + "MSIX PBA outside of specified BAR"); + return -EINVAL; + } + } + trace_vfio_early_setup_msix(vdev->vbasedev.name, pos, vdev->msix->table_bar, vdev->msix->table_offset, @@ -2388,7 +2415,7 @@ static void vfio_map_bar(VFIOPCIDevice *vdev, int nr) * potentially insert a direct-mapped subregion before and after it. */ if (vdev->msix && vdev->msix->table_bar == nr) { - size = vdev->msix->table_offset & qemu_host_page_mask; + size = vdev->msix->table_offset & qemu_real_host_page_mask; } strncat(name, " mmap", sizeof(name) - strlen(name) - 1); @@ -2401,8 +2428,9 @@ static void vfio_map_bar(VFIOPCIDevice *vdev, int nr) if (vdev->msix && vdev->msix->table_bar == nr) { uint64_t start; - start = HOST_PAGE_ALIGN((uint64_t)vdev->msix->table_offset + - (vdev->msix->entries * PCI_MSIX_ENTRY_SIZE)); + start = REAL_HOST_PAGE_ALIGN((uint64_t)vdev->msix->table_offset + + (vdev->msix->entries * + PCI_MSIX_ENTRY_SIZE)); size = start < bar->region.size ? bar->region.size - start : 0; strncat(name, " msix-hi", sizeof(name) - strlen(name) - 1); diff --git a/hw/vfio/platform.c b/hw/vfio/platform.c index 5c678b914e..60365d1279 100644 --- a/hw/vfio/platform.c +++ b/hw/vfio/platform.c @@ -26,6 +26,7 @@ #include "hw/sysbus.h" #include "trace.h" #include "hw/platform-bus.h" +#include "sysemu/kvm.h" /* * Functions used whatever the injection method @@ -51,6 +52,7 @@ static VFIOINTp *vfio_init_intp(VFIODevice *vbasedev, intp->pin = info.index; intp->flags = info.flags; intp->state = VFIO_IRQ_INACTIVE; + intp->kvm_accel = false; sysbus_init_irq(sbdev, &intp->qemuirq); @@ -61,6 +63,13 @@ static VFIOINTp *vfio_init_intp(VFIODevice *vbasedev, error_report("vfio: Error: trigger event_notifier_init failed "); return NULL; } + /* Get an eventfd for resample/unmask */ + ret = event_notifier_init(&intp->unmask, 0); + if (ret) { + g_free(intp); + error_report("vfio: Error: resamplefd event_notifier_init failed"); + return NULL; + } QLIST_INSERT_HEAD(&vdev->intp_list, intp, next); return intp; @@ -315,6 +324,94 @@ static int vfio_start_eventfd_injection(VFIOINTp *intp) return ret; } +/* + * Functions used for irqfd + */ + +/** + * vfio_set_resample_eventfd - sets the resamplefd for an IRQ + * @intp: the IRQ struct handle + * programs the VFIO driver to unmask this IRQ when the + * intp->unmask eventfd is triggered + */ +static int vfio_set_resample_eventfd(VFIOINTp *intp) +{ + VFIODevice *vbasedev = &intp->vdev->vbasedev; + struct vfio_irq_set *irq_set; + int argsz, ret; + int32_t *pfd; + + argsz = sizeof(*irq_set) + sizeof(*pfd); + irq_set = g_malloc0(argsz); + irq_set->argsz = argsz; + irq_set->flags = VFIO_IRQ_SET_DATA_EVENTFD | VFIO_IRQ_SET_ACTION_UNMASK; + irq_set->index = intp->pin; + irq_set->start = 0; + irq_set->count = 1; + pfd = (int32_t *)&irq_set->data; + *pfd = event_notifier_get_fd(&intp->unmask); + qemu_set_fd_handler(*pfd, NULL, NULL, NULL); + ret = ioctl(vbasedev->fd, VFIO_DEVICE_SET_IRQS, irq_set); + g_free(irq_set); + if (ret < 0) { + error_report("vfio: Failed to set resample eventfd: %m"); + } + return ret; +} + +static void vfio_start_irqfd_injection(SysBusDevice *sbdev, qemu_irq irq) +{ + VFIOPlatformDevice *vdev = VFIO_PLATFORM_DEVICE(sbdev); + VFIOINTp *intp; + + if (!kvm_irqfds_enabled() || !kvm_resamplefds_enabled() || + !vdev->irqfd_allowed) { + return; + } + + QLIST_FOREACH(intp, &vdev->intp_list, next) { + if (intp->qemuirq == irq) { + break; + } + } + assert(intp); + + /* Get to a known interrupt state */ + qemu_set_fd_handler(event_notifier_get_fd(&intp->interrupt), + NULL, NULL, vdev); + + vfio_mask_single_irqindex(&vdev->vbasedev, intp->pin); + qemu_set_irq(intp->qemuirq, 0); + + if (kvm_irqchip_add_irqfd_notifier(kvm_state, &intp->interrupt, + &intp->unmask, irq) < 0) { + goto fail_irqfd; + } + + if (vfio_set_trigger_eventfd(intp, NULL) < 0) { + goto fail_vfio; + } + if (vfio_set_resample_eventfd(intp) < 0) { + goto fail_vfio; + } + + /* Let's resume injection with irqfd setup */ + vfio_unmask_single_irqindex(&vdev->vbasedev, intp->pin); + + intp->kvm_accel = true; + + trace_vfio_platform_start_irqfd_injection(intp->pin, + event_notifier_get_fd(&intp->interrupt), + event_notifier_get_fd(&intp->unmask)); + return; +fail_vfio: + kvm_irqchip_remove_irqfd_notifier(kvm_state, &intp->interrupt, irq); +fail_irqfd: + vfio_start_eventfd_injection(intp); + vfio_unmask_single_irqindex(&vdev->vbasedev, intp->pin); + return; +} + /* VFIO skeleton */ static void vfio_platform_compute_needs_reset(VFIODevice *vbasedev) @@ -584,17 +681,20 @@ static Property vfio_platform_dev_properties[] = { DEFINE_PROP_BOOL("x-mmap", VFIOPlatformDevice, vbasedev.allow_mmap, true), DEFINE_PROP_UINT32("mmap-timeout-ms", VFIOPlatformDevice, mmap_timeout, 1100), + DEFINE_PROP_BOOL("x-irqfd", VFIOPlatformDevice, irqfd_allowed, true), DEFINE_PROP_END_OF_LIST(), }; static void vfio_platform_class_init(ObjectClass *klass, void *data) { DeviceClass *dc = DEVICE_CLASS(klass); + SysBusDeviceClass *sbc = SYS_BUS_DEVICE_CLASS(klass); dc->realize = vfio_platform_realize; dc->props = vfio_platform_dev_properties; dc->vmsd = &vfio_platform_vmstate; dc->desc = "VFIO-based platform device assignment"; + sbc->connect_irq_notifier = vfio_start_irqfd_injection; set_bit(DEVICE_CATEGORY_MISC, dc->categories); } diff --git a/hw/virtio/dataplane/vring.c b/hw/virtio/dataplane/vring.c index 35891856ee..07fd69c69e 100644 --- a/hw/virtio/dataplane/vring.c +++ b/hw/virtio/dataplane/vring.c @@ -153,22 +153,20 @@ bool vring_should_notify(VirtIODevice *vdev, Vring *vring) return true; } - return vring_need_event(vring_used_event(&vring->vr), new, old); + return vring_need_event(virtio_tswap16(vdev, vring_used_event(&vring->vr)), + new, old); } -static int get_desc(VirtIODevice *vdev, Vring *vring, VirtQueueElement *elem, +static int get_desc(Vring *vring, VirtQueueElement *elem, struct vring_desc *desc) { unsigned *num; struct iovec *iov; hwaddr *addr; MemoryRegion *mr; - int is_write = virtio_tswap16(vdev, desc->flags) & VRING_DESC_F_WRITE; - uint32_t len = virtio_tswap32(vdev, desc->len); - uint64_t desc_addr = virtio_tswap64(vdev, desc->addr); - if (is_write) { + if (desc->flags & VRING_DESC_F_WRITE) { num = &elem->in_num; iov = &elem->in_sg[*num]; addr = &elem->in_addr[*num]; @@ -192,17 +190,18 @@ static int get_desc(VirtIODevice *vdev, Vring *vring, VirtQueueElement *elem, } /* TODO handle non-contiguous memory across region boundaries */ - iov->iov_base = vring_map(&mr, desc_addr, len, is_write); + iov->iov_base = vring_map(&mr, desc->addr, desc->len, + desc->flags & VRING_DESC_F_WRITE); if (!iov->iov_base) { error_report("Failed to map descriptor addr %#" PRIx64 " len %u", - (uint64_t)desc_addr, len); + (uint64_t)desc->addr, desc->len); return -EFAULT; } /* The MemoryRegion is looked up again and unref'ed later, leave the * ref in place. */ - iov->iov_len = len; - *addr = desc_addr; + iov->iov_len = desc->len; + *addr = desc->addr; *num += 1; return 0; } @@ -224,23 +223,21 @@ static int get_indirect(VirtIODevice *vdev, Vring *vring, struct vring_desc desc; unsigned int i = 0, count, found = 0; int ret; - uint32_t len = virtio_tswap32(vdev, indirect->len); - uint64_t addr = virtio_tswap64(vdev, indirect->addr); /* Sanity check */ - if (unlikely(len % sizeof(desc))) { + if (unlikely(indirect->len % sizeof(desc))) { error_report("Invalid length in indirect descriptor: " "len %#x not multiple of %#zx", - len, sizeof(desc)); + indirect->len, sizeof(desc)); vring->broken = true; return -EFAULT; } - count = len / sizeof(desc); + count = indirect->len / sizeof(desc); /* Buffers are chained via a 16 bit next field, so * we can have at most 2^16 of these. */ if (unlikely(count > USHRT_MAX + 1)) { - error_report("Indirect buffer length too big: %d", len); + error_report("Indirect buffer length too big: %d", indirect->len); vring->broken = true; return -EFAULT; } @@ -251,12 +248,12 @@ static int get_indirect(VirtIODevice *vdev, Vring *vring, /* Translate indirect descriptor */ desc_ptr = vring_map(&mr, - addr + found * sizeof(desc), + indirect->addr + found * sizeof(desc), sizeof(desc), false); if (!desc_ptr) { error_report("Failed to map indirect descriptor " "addr %#" PRIx64 " len %zu", - (uint64_t)addr + found * sizeof(desc), + (uint64_t)indirect->addr + found * sizeof(desc), sizeof(desc)); vring->broken = true; return -EFAULT; @@ -274,20 +271,19 @@ static int get_indirect(VirtIODevice *vdev, Vring *vring, return -EFAULT; } - if (unlikely(virtio_tswap16(vdev, desc.flags) - & VRING_DESC_F_INDIRECT)) { + if (unlikely(desc.flags & VRING_DESC_F_INDIRECT)) { error_report("Nested indirect descriptor"); vring->broken = true; return -EFAULT; } - ret = get_desc(vdev, vring, elem, &desc); + ret = get_desc(vring, elem, &desc); if (ret < 0) { vring->broken |= (ret == -EFAULT); return ret; } - i = virtio_tswap16(vdev, desc.next); - } while (virtio_tswap16(vdev, desc.flags) & VRING_DESC_F_NEXT); + i = desc.next; + } while (desc.flags & VRING_DESC_F_NEXT); return 0; } @@ -388,7 +384,7 @@ int vring_pop(VirtIODevice *vdev, Vring *vring, /* Ensure descriptor is loaded before accessing fields */ barrier(); - if (virtio_tswap16(vdev, desc.flags) & VRING_DESC_F_INDIRECT) { + if (desc.flags & VRING_DESC_F_INDIRECT) { ret = get_indirect(vdev, vring, elem, &desc); if (ret < 0) { goto out; @@ -396,18 +392,19 @@ int vring_pop(VirtIODevice *vdev, Vring *vring, continue; } - ret = get_desc(vdev, vring, elem, &desc); + ret = get_desc(vring, elem, &desc); if (ret < 0) { goto out; } - i = virtio_tswap16(vdev, desc.next); - } while (virtio_tswap16(vdev, desc.flags) & VRING_DESC_F_NEXT); + i = desc.next; + } while (desc.flags & VRING_DESC_F_NEXT); /* On success, increment avail index. */ vring->last_avail_idx++; if (virtio_has_feature(vdev, VIRTIO_RING_F_EVENT_IDX)) { - vring_avail_event(&vring->vr) = vring->last_avail_idx; + vring_avail_event(&vring->vr) = + virtio_tswap16(vdev, vring->last_avail_idx); } return head; diff --git a/hw/virtio/virtio-pci.c b/hw/virtio/virtio-pci.c index 6a0174e9cc..ccca2b6f3b 100644 --- a/hw/virtio/virtio-pci.c +++ b/hw/virtio/virtio-pci.c @@ -443,11 +443,89 @@ static const MemoryRegionOps virtio_pci_config_ops = { .endianness = DEVICE_LITTLE_ENDIAN, }; +/* Below are generic functions to do memcpy from/to an address space, + * without byteswaps, with input validation. + * + * As regular address_space_* APIs all do some kind of byteswap at least for + * some host/target combinations, we are forced to explicitly convert to a + * known-endianness integer value. + * It doesn't really matter which endian format to go through, so the code + * below selects the endian that causes the least amount of work on the given + * host. + * + * Note: host pointer must be aligned. + */ +static +void virtio_address_space_write(AddressSpace *as, hwaddr addr, + const uint8_t *buf, int len) +{ + uint32_t val; + + /* address_space_* APIs assume an aligned address. + * As address is under guest control, handle illegal values. + */ + addr &= ~(len - 1); + + /* Make sure caller aligned buf properly */ + assert(!(((uintptr_t)buf) & (len - 1))); + + switch (len) { + case 1: + val = pci_get_byte(buf); + address_space_stb(as, addr, val, MEMTXATTRS_UNSPECIFIED, NULL); + break; + case 2: + val = pci_get_word(buf); + address_space_stw_le(as, addr, val, MEMTXATTRS_UNSPECIFIED, NULL); + break; + case 4: + val = pci_get_long(buf); + address_space_stl_le(as, addr, val, MEMTXATTRS_UNSPECIFIED, NULL); + break; + default: + /* As length is under guest control, handle illegal values. */ + break; + } +} + +static void +virtio_address_space_read(AddressSpace *as, hwaddr addr, uint8_t *buf, int len) +{ + uint32_t val; + + /* address_space_* APIs assume an aligned address. + * As address is under guest control, handle illegal values. + */ + addr &= ~(len - 1); + + /* Make sure caller aligned buf properly */ + assert(!(((uintptr_t)buf) & (len - 1))); + + switch (len) { + case 1: + val = address_space_ldub(as, addr, MEMTXATTRS_UNSPECIFIED, NULL); + pci_set_byte(buf, val); + break; + case 2: + val = address_space_lduw_le(as, addr, MEMTXATTRS_UNSPECIFIED, NULL); + pci_set_word(buf, val); + break; + case 4: + val = address_space_ldl_le(as, addr, MEMTXATTRS_UNSPECIFIED, NULL); + pci_set_long(buf, val); + break; + default: + /* As length is under guest control, handle illegal values. */ + break; + } +} + static void virtio_write_config(PCIDevice *pci_dev, uint32_t address, uint32_t val, int len) { VirtIOPCIProxy *proxy = DO_UPCAST(VirtIOPCIProxy, pci_dev, pci_dev); VirtIODevice *vdev = virtio_bus_get_device(&proxy->bus); + struct virtio_pci_cfg_cap *cfg; pci_default_write_config(pci_dev, address, val, len); @@ -456,6 +534,49 @@ static void virtio_write_config(PCIDevice *pci_dev, uint32_t address, virtio_pci_stop_ioeventfd(proxy); virtio_set_status(vdev, vdev->status & ~VIRTIO_CONFIG_S_DRIVER_OK); } + + if (proxy->config_cap && + ranges_overlap(address, len, proxy->config_cap + offsetof(struct virtio_pci_cfg_cap, + pci_cfg_data), + sizeof cfg->pci_cfg_data)) { + uint32_t off; + uint32_t len; + + cfg = (void *)(proxy->pci_dev.config + proxy->config_cap); + off = le32_to_cpu(cfg->cap.offset); + len = le32_to_cpu(cfg->cap.length); + + if (len <= sizeof cfg->pci_cfg_data) { + virtio_address_space_write(&proxy->modern_as, off, + cfg->pci_cfg_data, len); + } + } +} + +static uint32_t virtio_read_config(PCIDevice *pci_dev, + uint32_t address, int len) +{ + VirtIOPCIProxy *proxy = DO_UPCAST(VirtIOPCIProxy, pci_dev, pci_dev); + struct virtio_pci_cfg_cap *cfg; + + if (proxy->config_cap && + ranges_overlap(address, len, proxy->config_cap + offsetof(struct virtio_pci_cfg_cap, + pci_cfg_data), + sizeof cfg->pci_cfg_data)) { + uint32_t off; + uint32_t len; + + cfg = (void *)(proxy->pci_dev.config + proxy->config_cap); + off = le32_to_cpu(cfg->cap.offset); + len = le32_to_cpu(cfg->cap.length); + + if (len <= sizeof cfg->pci_cfg_data) { + virtio_address_space_read(&proxy->modern_as, off, + cfg->pci_cfg_data, len); + } + } + + return pci_default_read_config(pci_dev, address, len); } static int kvm_virtio_pci_vq_vector_use(VirtIOPCIProxy *proxy, @@ -495,7 +616,7 @@ static int kvm_virtio_pci_irqfd_use(VirtIOPCIProxy *proxy, VirtQueue *vq = virtio_get_queue(vdev, queue_no); EventNotifier *n = virtio_queue_get_guest_notifier(vq); int ret; - ret = kvm_irqchip_add_irqfd_notifier(kvm_state, n, NULL, irqfd->virq); + ret = kvm_irqchip_add_irqfd_notifier_gsi(kvm_state, n, NULL, irqfd->virq); return ret; } @@ -509,7 +630,7 @@ static void kvm_virtio_pci_irqfd_release(VirtIOPCIProxy *proxy, VirtIOIRQFD *irqfd = &proxy->vector_irqfd[vector]; int ret; - ret = kvm_irqchip_remove_irqfd_notifier(kvm_state, n, irqfd->virq); + ret = kvm_irqchip_remove_irqfd_notifier_gsi(kvm_state, n, irqfd->virq); assert(ret == 0); } @@ -942,7 +1063,7 @@ static int virtio_pci_query_nvectors(DeviceState *d) return proxy->nvectors; } -static void virtio_pci_add_mem_cap(VirtIOPCIProxy *proxy, +static int virtio_pci_add_mem_cap(VirtIOPCIProxy *proxy, struct virtio_pci_cap *cap) { PCIDevice *dev = &proxy->pci_dev; @@ -954,6 +1075,8 @@ static void virtio_pci_add_mem_cap(VirtIOPCIProxy *proxy, assert(cap->cap_len >= sizeof *cap); memcpy(dev->config + offset + PCI_CAP_FLAGS, &cap->cap_len, cap->cap_len - PCI_CAP_FLAGS); + + return offset; } static uint64_t virtio_pci_common_read(void *opaque, hwaddr addr, @@ -1329,6 +1452,11 @@ static void virtio_pci_device_plugged(DeviceState *d, Error **errp) .notify_off_multiplier = cpu_to_le32(QEMU_VIRTIO_PCI_QUEUE_MEM_MULT), }; + struct virtio_pci_cfg_cap cfg = { + .cap.cap_len = sizeof cfg, + .cap.cfg_type = VIRTIO_PCI_CAP_PCI_CFG, + }; + struct virtio_pci_cfg_cap *cfg_mask; /* TODO: add io access for speed */ @@ -1338,11 +1466,19 @@ static void virtio_pci_device_plugged(DeviceState *d, Error **errp) virtio_pci_modern_region_map(proxy, &proxy->isr, &cap); virtio_pci_modern_region_map(proxy, &proxy->device, &cap); virtio_pci_modern_region_map(proxy, &proxy->notify, ¬ify.cap); + pci_register_bar(&proxy->pci_dev, proxy->modern_mem_bar, PCI_BASE_ADDRESS_SPACE_MEMORY | PCI_BASE_ADDRESS_MEM_PREFETCH | PCI_BASE_ADDRESS_MEM_TYPE_64, &proxy->modern_bar); + + proxy->config_cap = virtio_pci_add_mem_cap(proxy, &cfg.cap); + cfg_mask = (void *)(proxy->pci_dev.wmask + proxy->config_cap); + pci_set_byte(&cfg_mask->cap.bar, ~0x0); + pci_set_long((uint8_t *)&cfg_mask->cap.offset, ~0x0); + pci_set_long((uint8_t *)&cfg_mask->cap.length, ~0x0); + pci_set_long(cfg_mask->pci_cfg_data, ~0x0); } if (proxy->nvectors && @@ -1354,6 +1490,7 @@ static void virtio_pci_device_plugged(DeviceState *d, Error **errp) } proxy->pci_dev.config_write = virtio_write_config; + proxy->pci_dev.config_read = virtio_read_config; if (legacy) { size = VIRTIO_PCI_REGION_SIZE(&proxy->pci_dev) @@ -1424,6 +1561,15 @@ static void virtio_pci_realize(PCIDevice *pci_dev, Error **errp) 2 * QEMU_VIRTIO_PCI_QUEUE_MEM_MULT * VIRTIO_QUEUE_MAX); + memory_region_init_alias(&proxy->modern_cfg, + OBJECT(proxy), + "virtio-pci-cfg", + &proxy->modern_bar, + 0, + memory_region_size(&proxy->modern_bar)); + + address_space_init(&proxy->modern_as, &proxy->modern_cfg, "virtio-pci-cfg-as"); + virtio_pci_bus_new(&proxy->bus, sizeof(proxy->bus), proxy); if (k->realize) { k->realize(proxy, errp); @@ -1432,7 +1578,10 @@ static void virtio_pci_realize(PCIDevice *pci_dev, Error **errp) static void virtio_pci_exit(PCIDevice *pci_dev) { + VirtIOPCIProxy *proxy = VIRTIO_PCI(pci_dev); + msix_uninit_exclusive_bar(pci_dev); + address_space_destroy(&proxy->modern_as); } static void virtio_pci_reset(DeviceState *qdev) diff --git a/hw/virtio/virtio-pci.h b/hw/virtio/virtio-pci.h index 05d9d243f6..b6c442f522 100644 --- a/hw/virtio/virtio-pci.h +++ b/hw/virtio/virtio-pci.h @@ -112,9 +112,12 @@ struct VirtIOPCIProxy { VirtIOPCIRegion device; VirtIOPCIRegion notify; MemoryRegion modern_bar; + MemoryRegion modern_cfg; + AddressSpace modern_as; uint32_t legacy_io_bar; uint32_t msix_bar; uint32_t modern_mem_bar; + int config_cap; uint32_t flags; uint32_t class_code; uint32_t nvectors; diff --git a/include/block/blockjob.h b/include/block/blockjob.h index 57d8ef13e2..dd9d5e6aad 100644 --- a/include/block/blockjob.h +++ b/include/block/blockjob.h @@ -166,6 +166,14 @@ void block_job_sleep_ns(BlockJob *job, QEMUClockType type, int64_t ns); void block_job_yield(BlockJob *job); /** + * block_job_release: + * @bs: The block device. + * + * Release job resources when an error occurred or job completed. + */ +void block_job_release(BlockDriverState *bs); + +/** * block_job_completed: * @job: The job being completed. * @ret: The status code. diff --git a/include/exec/cpu-all.h b/include/exec/cpu-all.h index 8999634981..ea6a9a667c 100644 --- a/include/exec/cpu-all.h +++ b/include/exec/cpu-all.h @@ -183,10 +183,13 @@ extern unsigned long reserved_va; /* ??? These should be the larger of uintptr_t and target_ulong. */ extern uintptr_t qemu_real_host_page_size; +extern uintptr_t qemu_real_host_page_mask; extern uintptr_t qemu_host_page_size; extern uintptr_t qemu_host_page_mask; #define HOST_PAGE_ALIGN(addr) (((addr) + qemu_host_page_size - 1) & qemu_host_page_mask) +#define REAL_HOST_PAGE_ALIGN(addr) (((addr) + qemu_real_host_page_size - 1) & \ + qemu_real_host_page_mask) /* same as PROT_xxx */ #define PAGE_READ 0x0001 diff --git a/include/exec/exec-all.h b/include/exec/exec-all.h index d678114cb2..2e74760ade 100644 --- a/include/exec/exec-all.h +++ b/include/exec/exec-all.h @@ -365,4 +365,7 @@ static inline bool cpu_can_do_io(CPUState *cpu) return cpu->can_do_io != 0; } +#if !defined(CONFIG_USER_ONLY) +void migration_bitmap_extend(ram_addr_t old, ram_addr_t new); +#endif #endif diff --git a/include/hw/acpi/ich9.h b/include/hw/acpi/ich9.h index ac24bbe9a3..345fd8d92b 100644 --- a/include/hw/acpi/ich9.h +++ b/include/hw/acpi/ich9.h @@ -25,6 +25,7 @@ #include "hw/acpi/cpu_hotplug.h" #include "hw/acpi/memory_hotplug.h" #include "hw/acpi/acpi_dev_interface.h" +#include "hw/acpi/tco.h" typedef struct ICH9LPCPMRegs { /* @@ -55,10 +56,15 @@ typedef struct ICH9LPCPMRegs { uint8_t disable_s4; uint8_t s4_val; uint8_t smm_enabled; + bool enable_tco; + TCOIORegs tco_regs; } ICH9LPCPMRegs; void ich9_pm_init(PCIDevice *lpc_pci, ICH9LPCPMRegs *pm, - bool smm_enabled, qemu_irq sci_irq); + bool smm_enabled, + bool enable_tco, + qemu_irq sci_irq); + void ich9_pm_iospace_update(ICH9LPCPMRegs *pm, uint32_t pm_io_base); extern const VMStateDescription vmstate_ich9_pm; diff --git a/include/hw/acpi/tco.h b/include/hw/acpi/tco.h new file mode 100644 index 0000000000..c63afc8ca3 --- /dev/null +++ b/include/hw/acpi/tco.h @@ -0,0 +1,82 @@ +/* + * QEMU ICH9 TCO emulation + * + * Copyright (c) 2015 Paulo Alcantara <pcacjr@zytor.com> + * + * This work is licensed under the terms of the GNU GPL, version 2 or later. + * See the COPYING file in the top-level directory. + */ +#ifndef HW_ACPI_TCO_H +#define HW_ACPI_TCO_H + +#include "qemu/typedefs.h" +#include "qemu-common.h" + +/* As per ICH9 spec, the internal timer has an error of ~0.6s on every tick */ +#define TCO_TICK_NSEC 600000000LL + +/* TCO I/O register offsets */ +enum { + TCO_RLD = 0x00, + TCO_DAT_IN = 0x02, + TCO_DAT_OUT = 0x03, + TCO1_STS = 0x04, + TCO2_STS = 0x06, + TCO1_CNT = 0x08, + TCO2_CNT = 0x0a, + TCO_MESSAGE1 = 0x0c, + TCO_MESSAGE2 = 0x0d, + TCO_WDCNT = 0x0e, + SW_IRQ_GEN = 0x10, + TCO_TMR = 0x12, +}; + +/* TCO I/O register control/status bits */ +enum { + SW_TCO_SMI = 1 << 1, + TCO_INT_STS = 1 << 2, + TCO_LOCK = 1 << 12, + TCO_TMR_HLT = 1 << 11, + TCO_TIMEOUT = 1 << 3, + TCO_SECOND_TO_STS = 1 << 1, + TCO_BOOT_STS = 1 << 2, +}; + +/* TCO I/O registers mask bits */ +enum { + TCO_RLD_MASK = 0x3ff, + TCO1_STS_MASK = 0xe870, + TCO2_STS_MASK = 0xfff8, + TCO1_CNT_MASK = 0xfeff, + TCO_TMR_MASK = 0x3ff, +}; + +typedef struct TCOIORegs { + struct { + uint16_t rld; + uint8_t din; + uint8_t dout; + uint16_t sts1; + uint16_t sts2; + uint16_t cnt1; + uint16_t cnt2; + uint8_t msg1; + uint8_t msg2; + uint8_t wdcnt; + uint16_t tmr; + } tco; + uint8_t sw_irq_gen; + + QEMUTimer *tco_timer; + int64_t expire_time; + uint8_t timeouts_no; + + MemoryRegion io; +} TCOIORegs; + +/* tco.c */ +void acpi_pm_tco_init(TCOIORegs *tr, MemoryRegion *parent); + +extern const VMStateDescription vmstate_tco_io_sts; + +#endif /* HW_ACPI_TCO_H */ diff --git a/include/hw/boards.h b/include/hw/boards.h index 6379901528..2aec9cbb12 100644 --- a/include/hw/boards.h +++ b/include/hw/boards.h @@ -99,7 +99,8 @@ struct MachineClass { no_floppy:1, no_cdrom:1, no_sdcard:1, - has_dynamic_sysbus:1; + has_dynamic_sysbus:1, + no_tco:1; int is_default; const char *default_machine_opts; const char *default_boot_order; diff --git a/include/hw/i386/ich9.h b/include/hw/i386/ich9.h index b317a481c8..b9d2b04b6e 100644 --- a/include/hw/i386/ich9.h +++ b/include/hw/i386/ich9.h @@ -17,9 +17,12 @@ void ich9_lpc_set_irq(void *opaque, int irq_num, int level); int ich9_lpc_map_irq(PCIDevice *pci_dev, int intx); PCIINTxRoute ich9_route_intx_pin_to_irq(void *opaque, int pirq_pin); -void ich9_lpc_pm_init(PCIDevice *pci_lpc, bool smm_enabled); +void ich9_lpc_pm_init(PCIDevice *pci_lpc, bool smm_enabled, bool enable_tco); I2CBus *ich9_smb_init(PCIBus *bus, int devfn, uint32_t smb_io_base); +void ich9_generate_smi(void); +void ich9_generate_nmi(void); + #define ICH9_CC_SIZE (16 * 1024) /* 16KB */ #define TYPE_ICH9_LPC_DEVICE "ICH9-LPC" @@ -43,6 +46,11 @@ typedef struct ICH9LPCState { ICH9LPCPMRegs pm; uint32_t sci_level; /* track sci level */ + /* 2.24 Pin Straps */ + struct { + bool spkr_hi; + } pin_strap; + /* 10.1 Chipset Configuration registers(Memory Space) which is pointed by RCBA */ uint8_t chip_config[ICH9_CC_SIZE]; @@ -90,6 +98,9 @@ Object *ich9_lpc_find(void); #define ICH9_CC_DIR_MASK 0x7 #define ICH9_CC_OIC 0x31FF #define ICH9_CC_OIC_AEN 0x1 +#define ICH9_CC_GCS 0x3410 +#define ICH9_CC_GCS_DEFAULT 0x00000020 +#define ICH9_CC_GCS_NO_REBOOT (1 << 5) /* D28:F[0-5] */ #define ICH9_PCIE_DEV 28 @@ -186,7 +197,10 @@ Object *ich9_lpc_find(void); #define ICH9_PMIO_GPE0_LEN 16 #define ICH9_PMIO_SMI_EN 0x30 #define ICH9_PMIO_SMI_EN_APMC_EN (1 << 5) +#define ICH9_PMIO_SMI_EN_TCO_EN (1 << 13) #define ICH9_PMIO_SMI_STS 0x34 +#define ICH9_PMIO_TCO_RLD 0x60 +#define ICH9_PMIO_TCO_LEN 32 /* FADT ACPI_ENABLE/ACPI_DISABLE */ #define ICH9_APM_ACPI_ENABLE 0x2 diff --git a/include/hw/i386/pc.h b/include/hw/i386/pc.h index 786a1d511c..15e3352967 100644 --- a/include/hw/i386/pc.h +++ b/include/hw/i386/pc.h @@ -88,6 +88,7 @@ typedef struct PcPciInfo { #define ACPI_PM_PROP_PM_IO_BASE "pm_io_base" #define ACPI_PM_PROP_GPE0_BLK "gpe0_blk" #define ACPI_PM_PROP_GPE0_BLK_LEN "gpe0_blk_len" +#define ACPI_PM_PROP_TCO_ENABLED "enable_tco" struct PcGuestInfo { bool isapc_ram_fw; @@ -198,13 +199,12 @@ DeviceState *pc_vga_init(ISABus *isa_bus, PCIBus *pci_bus); void pc_basic_device_init(ISABus *isa_bus, qemu_irq *gsi, ISADevice **rtc_state, bool create_fdctrl, - ISADevice **floppy, bool no_vmport, uint32 hpet_irqs); void pc_init_ne2k_isa(ISABus *bus, NICInfo *nd); void pc_cmos_init(ram_addr_t ram_size, ram_addr_t above_4g_mem_size, const char *boot_device, MachineState *machine, - ISADevice *floppy, BusState *ide0, BusState *ide1, + BusState *ide0, BusState *ide1, ISADevice *s); void pc_nic_init(ISABus *isa_bus, PCIBus *pci_bus); void pc_pci_device_init(PCIBus *pci_bus); @@ -293,7 +293,12 @@ int e820_get_num_entries(void); bool e820_get_entry(int, uint32_t, uint64_t *, uint64_t *); #define PC_COMPAT_2_3 \ - HW_COMPAT_2_3 + HW_COMPAT_2_3 \ + {\ + .driver = TYPE_X86_CPU,\ + .property = "arat",\ + .value = "off",\ + }, #define PC_COMPAT_2_2 \ PC_COMPAT_2_3 \ diff --git a/include/hw/pci-host/spapr.h b/include/hw/pci-host/spapr.h index 9dca38837b..5322b560eb 100644 --- a/include/hw/pci-host/spapr.h +++ b/include/hw/pci-host/spapr.h @@ -119,21 +119,23 @@ struct sPAPRPHBVFIOState { static inline qemu_irq spapr_phb_lsi_qirq(struct sPAPRPHBState *phb, int pin) { + sPAPRMachineState *spapr = SPAPR_MACHINE(qdev_get_machine()); + return xics_get_qirq(spapr->icp, phb->lsi_table[pin].irq); } -PCIHostState *spapr_create_phb(sPAPREnvironment *spapr, int index); +PCIHostState *spapr_create_phb(sPAPRMachineState *spapr, int index); int spapr_populate_pci_dt(sPAPRPHBState *phb, uint32_t xics_phandle, void *fdt); -void spapr_pci_msi_init(sPAPREnvironment *spapr, hwaddr addr); +void spapr_pci_msi_init(sPAPRMachineState *spapr, hwaddr addr); void spapr_pci_rtas_init(void); -sPAPRPHBState *spapr_pci_find_phb(sPAPREnvironment *spapr, uint64_t buid); -PCIDevice *spapr_pci_find_dev(sPAPREnvironment *spapr, uint64_t buid, +sPAPRPHBState *spapr_pci_find_phb(sPAPRMachineState *spapr, uint64_t buid); +PCIDevice *spapr_pci_find_dev(sPAPRMachineState *spapr, uint64_t buid, uint32_t config_addr); #endif /* __HW_SPAPR_PCI_H__ */ diff --git a/include/hw/pci/pci_ids.h b/include/hw/pci/pci_ids.h index 49c062b8ce..d98e6c915d 100644 --- a/include/hw/pci/pci_ids.h +++ b/include/hw/pci/pci_ids.h @@ -114,6 +114,8 @@ #define PCI_VENDOR_ID_ENSONIQ 0x1274 #define PCI_DEVICE_ID_ENSONIQ_ES1370 0x5000 +#define PCI_VENDOR_ID_CHELSIO 0x1425 + #define PCI_VENDOR_ID_FREESCALE 0x1957 #define PCI_DEVICE_ID_MPC8533E 0x0030 diff --git a/include/hw/pci/pci_regs.h b/include/hw/pci/pci_regs.h index 57e8c80c30..ba8cbe9278 100644 --- a/include/hw/pci/pci_regs.h +++ b/include/hw/pci/pci_regs.h @@ -1,719 +1 @@ -/* - * pci_regs.h - * - * PCI standard defines - * Copyright 1994, Drew Eckhardt - * Copyright 1997--1999 Martin Mares <mj@ucw.cz> - * - * For more information, please consult the following manuals (look at - * http://www.pcisig.com/ for how to get them): - * - * PCI BIOS Specification - * PCI Local Bus Specification - * PCI to PCI Bridge Specification - * PCI System Design Guide - * - * For hypertransport information, please consult the following manuals - * from http://www.hypertransport.org - * - * The Hypertransport I/O Link Specification - */ - -#ifndef LINUX_PCI_REGS_H -#define LINUX_PCI_REGS_H - -/* - * Under PCI, each device has 256 bytes of configuration address space, - * of which the first 64 bytes are standardized as follows: - */ -#define PCI_VENDOR_ID 0x00 /* 16 bits */ -#define PCI_DEVICE_ID 0x02 /* 16 bits */ -#define PCI_COMMAND 0x04 /* 16 bits */ -#define PCI_COMMAND_IO 0x1 /* Enable response in I/O space */ -#define PCI_COMMAND_MEMORY 0x2 /* Enable response in Memory space */ -#define PCI_COMMAND_MASTER 0x4 /* Enable bus mastering */ -#define PCI_COMMAND_SPECIAL 0x8 /* Enable response to special cycles */ -#define PCI_COMMAND_INVALIDATE 0x10 /* Use memory write and invalidate */ -#define PCI_COMMAND_VGA_PALETTE 0x20 /* Enable palette snooping */ -#define PCI_COMMAND_PARITY 0x40 /* Enable parity checking */ -#define PCI_COMMAND_WAIT 0x80 /* Enable address/data stepping */ -#define PCI_COMMAND_SERR 0x100 /* Enable SERR */ -#define PCI_COMMAND_FAST_BACK 0x200 /* Enable back-to-back writes */ -#define PCI_COMMAND_INTX_DISABLE 0x400 /* INTx Emulation Disable */ - -#define PCI_STATUS 0x06 /* 16 bits */ -#define PCI_STATUS_INTERRUPT 0x08 /* Interrupt status */ -#define PCI_STATUS_CAP_LIST 0x10 /* Support Capability List */ -#define PCI_STATUS_66MHZ 0x20 /* Support 66 Mhz PCI 2.1 bus */ -#define PCI_STATUS_UDF 0x40 /* Support User Definable Features [obsolete] */ -#define PCI_STATUS_FAST_BACK 0x80 /* Accept fast-back to back */ -#define PCI_STATUS_PARITY 0x100 /* Detected parity error */ -#define PCI_STATUS_DEVSEL_MASK 0x600 /* DEVSEL timing */ -#define PCI_STATUS_DEVSEL_FAST 0x000 -#define PCI_STATUS_DEVSEL_MEDIUM 0x200 -#define PCI_STATUS_DEVSEL_SLOW 0x400 -#define PCI_STATUS_SIG_TARGET_ABORT 0x800 /* Set on target abort */ -#define PCI_STATUS_REC_TARGET_ABORT 0x1000 /* Master ack of " */ -#define PCI_STATUS_REC_MASTER_ABORT 0x2000 /* Set on master abort */ -#define PCI_STATUS_SIG_SYSTEM_ERROR 0x4000 /* Set when we drive SERR */ -#define PCI_STATUS_DETECTED_PARITY 0x8000 /* Set on parity error */ - -#define PCI_CLASS_REVISION 0x08 /* High 24 bits are class, low 8 revision */ -#define PCI_REVISION_ID 0x08 /* Revision ID */ -#define PCI_CLASS_PROG 0x09 /* Reg. Level Programming Interface */ -#define PCI_CLASS_DEVICE 0x0a /* Device class */ - -#define PCI_CACHE_LINE_SIZE 0x0c /* 8 bits */ -#define PCI_LATENCY_TIMER 0x0d /* 8 bits */ -#define PCI_HEADER_TYPE 0x0e /* 8 bits */ -#define PCI_HEADER_TYPE_NORMAL 0 -#define PCI_HEADER_TYPE_BRIDGE 1 -#define PCI_HEADER_TYPE_CARDBUS 2 - -#define PCI_BIST 0x0f /* 8 bits */ -#define PCI_BIST_CODE_MASK 0x0f /* Return result */ -#define PCI_BIST_START 0x40 /* 1 to start BIST, 2 secs or less */ -#define PCI_BIST_CAPABLE 0x80 /* 1 if BIST capable */ - -/* - * Base addresses specify locations in memory or I/O space. - * Decoded size can be determined by writing a value of - * 0xffffffff to the register, and reading it back. Only - * 1 bits are decoded. - */ -#define PCI_BASE_ADDRESS_0 0x10 /* 32 bits */ -#define PCI_BASE_ADDRESS_1 0x14 /* 32 bits [htype 0,1 only] */ -#define PCI_BASE_ADDRESS_2 0x18 /* 32 bits [htype 0 only] */ -#define PCI_BASE_ADDRESS_3 0x1c /* 32 bits */ -#define PCI_BASE_ADDRESS_4 0x20 /* 32 bits */ -#define PCI_BASE_ADDRESS_5 0x24 /* 32 bits */ -#define PCI_BASE_ADDRESS_SPACE 0x01 /* 0 = memory, 1 = I/O */ -#define PCI_BASE_ADDRESS_SPACE_IO 0x01 -#define PCI_BASE_ADDRESS_SPACE_MEMORY 0x00 -#define PCI_BASE_ADDRESS_MEM_TYPE_MASK 0x06 -#define PCI_BASE_ADDRESS_MEM_TYPE_32 0x00 /* 32 bit address */ -#define PCI_BASE_ADDRESS_MEM_TYPE_1M 0x02 /* Below 1M [obsolete] */ -#define PCI_BASE_ADDRESS_MEM_TYPE_64 0x04 /* 64 bit address */ -#define PCI_BASE_ADDRESS_MEM_PREFETCH 0x08 /* prefetchable? */ -#define PCI_BASE_ADDRESS_MEM_MASK (~0x0fUL) -#define PCI_BASE_ADDRESS_IO_MASK (~0x03UL) -/* bit 1 is reserved if address_space = 1 */ - -/* Header type 0 (normal devices) */ -#define PCI_CARDBUS_CIS 0x28 -#define PCI_SUBSYSTEM_VENDOR_ID 0x2c -#define PCI_SUBSYSTEM_ID 0x2e -#define PCI_ROM_ADDRESS 0x30 /* Bits 31..11 are address, 10..1 reserved */ -#define PCI_ROM_ADDRESS_ENABLE 0x01 -#define PCI_ROM_ADDRESS_MASK (~0x7ffUL) - -#define PCI_CAPABILITY_LIST 0x34 /* Offset of first capability list entry */ - -/* 0x35-0x3b are reserved */ -#define PCI_INTERRUPT_LINE 0x3c /* 8 bits */ -#define PCI_INTERRUPT_PIN 0x3d /* 8 bits */ -#define PCI_MIN_GNT 0x3e /* 8 bits */ -#define PCI_MAX_LAT 0x3f /* 8 bits */ - -/* Header type 1 (PCI-to-PCI bridges) */ -#define PCI_PRIMARY_BUS 0x18 /* Primary bus number */ -#define PCI_SECONDARY_BUS 0x19 /* Secondary bus number */ -#define PCI_SUBORDINATE_BUS 0x1a /* Highest bus number behind the bridge */ -#define PCI_SEC_LATENCY_TIMER 0x1b /* Latency timer for secondary interface */ -#define PCI_IO_BASE 0x1c /* I/O range behind the bridge */ -#define PCI_IO_LIMIT 0x1d -#define PCI_IO_RANGE_TYPE_MASK 0x0fUL /* I/O bridging type */ -#define PCI_IO_RANGE_TYPE_16 0x00 -#define PCI_IO_RANGE_TYPE_32 0x01 -#define PCI_IO_RANGE_MASK (~0x0fUL) -#define PCI_SEC_STATUS 0x1e /* Secondary status register, only bit 14 used */ -#define PCI_MEMORY_BASE 0x20 /* Memory range behind */ -#define PCI_MEMORY_LIMIT 0x22 -#define PCI_MEMORY_RANGE_TYPE_MASK 0x0fUL -#define PCI_MEMORY_RANGE_MASK (~0x0fUL) -#define PCI_PREF_MEMORY_BASE 0x24 /* Prefetchable memory range behind */ -#define PCI_PREF_MEMORY_LIMIT 0x26 -#define PCI_PREF_RANGE_TYPE_MASK 0x0fUL -#define PCI_PREF_RANGE_TYPE_32 0x00 -#define PCI_PREF_RANGE_TYPE_64 0x01 -#define PCI_PREF_RANGE_MASK (~0x0fUL) -#define PCI_PREF_BASE_UPPER32 0x28 /* Upper half of prefetchable memory range */ -#define PCI_PREF_LIMIT_UPPER32 0x2c -#define PCI_IO_BASE_UPPER16 0x30 /* Upper half of I/O addresses */ -#define PCI_IO_LIMIT_UPPER16 0x32 -/* 0x34 same as for htype 0 */ -/* 0x35-0x3b is reserved */ -#define PCI_ROM_ADDRESS1 0x38 /* Same as PCI_ROM_ADDRESS, but for htype 1 */ -/* 0x3c-0x3d are same as for htype 0 */ -#define PCI_BRIDGE_CONTROL 0x3e -#define PCI_BRIDGE_CTL_PARITY 0x01 /* Enable parity detection on secondary interface */ -#define PCI_BRIDGE_CTL_SERR 0x02 /* The same for SERR forwarding */ -#define PCI_BRIDGE_CTL_ISA 0x04 /* Enable ISA mode */ -#define PCI_BRIDGE_CTL_VGA 0x08 /* Forward VGA addresses */ -#define PCI_BRIDGE_CTL_MASTER_ABORT 0x20 /* Report master aborts */ -#define PCI_BRIDGE_CTL_BUS_RESET 0x40 /* Secondary bus reset */ -#define PCI_BRIDGE_CTL_FAST_BACK 0x80 /* Fast Back2Back enabled on secondary interface */ - -/* Header type 2 (CardBus bridges) */ -#define PCI_CB_CAPABILITY_LIST 0x14 -/* 0x15 reserved */ -#define PCI_CB_SEC_STATUS 0x16 /* Secondary status */ -#define PCI_CB_PRIMARY_BUS 0x18 /* PCI bus number */ -#define PCI_CB_CARD_BUS 0x19 /* CardBus bus number */ -#define PCI_CB_SUBORDINATE_BUS 0x1a /* Subordinate bus number */ -#define PCI_CB_LATENCY_TIMER 0x1b /* CardBus latency timer */ -#define PCI_CB_MEMORY_BASE_0 0x1c -#define PCI_CB_MEMORY_LIMIT_0 0x20 -#define PCI_CB_MEMORY_BASE_1 0x24 -#define PCI_CB_MEMORY_LIMIT_1 0x28 -#define PCI_CB_IO_BASE_0 0x2c -#define PCI_CB_IO_BASE_0_HI 0x2e -#define PCI_CB_IO_LIMIT_0 0x30 -#define PCI_CB_IO_LIMIT_0_HI 0x32 -#define PCI_CB_IO_BASE_1 0x34 -#define PCI_CB_IO_BASE_1_HI 0x36 -#define PCI_CB_IO_LIMIT_1 0x38 -#define PCI_CB_IO_LIMIT_1_HI 0x3a -#define PCI_CB_IO_RANGE_MASK (~0x03UL) -/* 0x3c-0x3d are same as for htype 0 */ -#define PCI_CB_BRIDGE_CONTROL 0x3e -#define PCI_CB_BRIDGE_CTL_PARITY 0x01 /* Similar to standard bridge control register */ -#define PCI_CB_BRIDGE_CTL_SERR 0x02 -#define PCI_CB_BRIDGE_CTL_ISA 0x04 -#define PCI_CB_BRIDGE_CTL_VGA 0x08 -#define PCI_CB_BRIDGE_CTL_MASTER_ABORT 0x20 -#define PCI_CB_BRIDGE_CTL_CB_RESET 0x40 /* CardBus reset */ -#define PCI_CB_BRIDGE_CTL_16BIT_INT 0x80 /* Enable interrupt for 16-bit cards */ -#define PCI_CB_BRIDGE_CTL_PREFETCH_MEM0 0x100 /* Prefetch enable for both memory regions */ -#define PCI_CB_BRIDGE_CTL_PREFETCH_MEM1 0x200 -#define PCI_CB_BRIDGE_CTL_POST_WRITES 0x400 -#define PCI_CB_SUBSYSTEM_VENDOR_ID 0x40 -#define PCI_CB_SUBSYSTEM_ID 0x42 -#define PCI_CB_LEGACY_MODE_BASE 0x44 /* 16-bit PC Card legacy mode base address (ExCa) */ -/* 0x48-0x7f reserved */ - -/* Capability lists */ - -#define PCI_CAP_LIST_ID 0 /* Capability ID */ -#define PCI_CAP_ID_PM 0x01 /* Power Management */ -#define PCI_CAP_ID_AGP 0x02 /* Accelerated Graphics Port */ -#define PCI_CAP_ID_VPD 0x03 /* Vital Product Data */ -#define PCI_CAP_ID_SLOTID 0x04 /* Slot Identification */ -#define PCI_CAP_ID_MSI 0x05 /* Message Signalled Interrupts */ -#define PCI_CAP_ID_CHSWP 0x06 /* CompactPCI HotSwap */ -#define PCI_CAP_ID_PCIX 0x07 /* PCI-X */ -#define PCI_CAP_ID_HT 0x08 /* HyperTransport */ -#define PCI_CAP_ID_VNDR 0x09 /* Vendor specific */ -#define PCI_CAP_ID_DBG 0x0A /* Debug port */ -#define PCI_CAP_ID_CCRC 0x0B /* CompactPCI Central Resource Control */ -#define PCI_CAP_ID_SHPC 0x0C /* PCI Standard Hot-Plug Controller */ -#define PCI_CAP_ID_SSVID 0x0D /* Bridge subsystem vendor/device ID */ -#define PCI_CAP_ID_AGP3 0x0E /* AGP Target PCI-PCI bridge */ -#define PCI_CAP_ID_EXP 0x10 /* PCI Express */ -#define PCI_CAP_ID_MSIX 0x11 /* MSI-X */ -#define PCI_CAP_ID_SATA 0x12 /* Serial ATA */ -#define PCI_CAP_ID_AF 0x13 /* PCI Advanced Features */ -#define PCI_CAP_LIST_NEXT 1 /* Next capability in the list */ -#define PCI_CAP_FLAGS 2 /* Capability defined flags (16 bits) */ -#define PCI_CAP_SIZEOF 4 - -/* Power Management Registers */ - -#define PCI_PM_PMC 2 /* PM Capabilities Register */ -#define PCI_PM_CAP_VER_MASK 0x0007 /* Version */ -#define PCI_PM_CAP_PME_CLOCK 0x0008 /* PME clock required */ -#define PCI_PM_CAP_RESERVED 0x0010 /* Reserved field */ -#define PCI_PM_CAP_DSI 0x0020 /* Device specific initialization */ -#define PCI_PM_CAP_AUX_POWER 0x01C0 /* Auxiliary power support mask */ -#define PCI_PM_CAP_D1 0x0200 /* D1 power state support */ -#define PCI_PM_CAP_D2 0x0400 /* D2 power state support */ -#define PCI_PM_CAP_PME 0x0800 /* PME pin supported */ -#define PCI_PM_CAP_PME_MASK 0xF800 /* PME Mask of all supported states */ -#define PCI_PM_CAP_PME_D0 0x0800 /* PME# from D0 */ -#define PCI_PM_CAP_PME_D1 0x1000 /* PME# from D1 */ -#define PCI_PM_CAP_PME_D2 0x2000 /* PME# from D2 */ -#define PCI_PM_CAP_PME_D3 0x4000 /* PME# from D3 (hot) */ -#define PCI_PM_CAP_PME_D3cold 0x8000 /* PME# from D3 (cold) */ -#define PCI_PM_CAP_PME_SHIFT 11 /* Start of the PME Mask in PMC */ -#define PCI_PM_CTRL 4 /* PM control and status register */ -#define PCI_PM_CTRL_STATE_MASK 0x0003 /* Current power state (D0 to D3) */ -#define PCI_PM_CTRL_NO_SOFT_RESET 0x0008 /* No reset for D3hot->D0 */ -#define PCI_PM_CTRL_PME_ENABLE 0x0100 /* PME pin enable */ -#define PCI_PM_CTRL_DATA_SEL_MASK 0x1e00 /* Data select (??) */ -#define PCI_PM_CTRL_DATA_SCALE_MASK 0x6000 /* Data scale (??) */ -#define PCI_PM_CTRL_PME_STATUS 0x8000 /* PME pin status */ -#define PCI_PM_PPB_EXTENSIONS 6 /* PPB support extensions (??) */ -#define PCI_PM_PPB_B2_B3 0x40 /* Stop clock when in D3hot (??) */ -#define PCI_PM_BPCC_ENABLE 0x80 /* Bus power/clock control enable (??) */ -#define PCI_PM_DATA_REGISTER 7 /* (??) */ -#define PCI_PM_SIZEOF 8 - -/* AGP registers */ - -#define PCI_AGP_VERSION 2 /* BCD version number */ -#define PCI_AGP_RFU 3 /* Rest of capability flags */ -#define PCI_AGP_STATUS 4 /* Status register */ -#define PCI_AGP_STATUS_RQ_MASK 0xff000000 /* Maximum number of requests - 1 */ -#define PCI_AGP_STATUS_SBA 0x0200 /* Sideband addressing supported */ -#define PCI_AGP_STATUS_64BIT 0x0020 /* 64-bit addressing supported */ -#define PCI_AGP_STATUS_FW 0x0010 /* FW transfers supported */ -#define PCI_AGP_STATUS_RATE4 0x0004 /* 4x transfer rate supported */ -#define PCI_AGP_STATUS_RATE2 0x0002 /* 2x transfer rate supported */ -#define PCI_AGP_STATUS_RATE1 0x0001 /* 1x transfer rate supported */ -#define PCI_AGP_COMMAND 8 /* Control register */ -#define PCI_AGP_COMMAND_RQ_MASK 0xff000000 /* Master: Maximum number of requests */ -#define PCI_AGP_COMMAND_SBA 0x0200 /* Sideband addressing enabled */ -#define PCI_AGP_COMMAND_AGP 0x0100 /* Allow processing of AGP transactions */ -#define PCI_AGP_COMMAND_64BIT 0x0020 /* Allow processing of 64-bit addresses */ -#define PCI_AGP_COMMAND_FW 0x0010 /* Force FW transfers */ -#define PCI_AGP_COMMAND_RATE4 0x0004 /* Use 4x rate */ -#define PCI_AGP_COMMAND_RATE2 0x0002 /* Use 2x rate */ -#define PCI_AGP_COMMAND_RATE1 0x0001 /* Use 1x rate */ -#define PCI_AGP_SIZEOF 12 - -/* Vital Product Data */ - -#define PCI_VPD_ADDR 2 /* Address to access (15 bits!) */ -#define PCI_VPD_ADDR_MASK 0x7fff /* Address mask */ -#define PCI_VPD_ADDR_F 0x8000 /* Write 0, 1 indicates completion */ -#define PCI_VPD_DATA 4 /* 32-bits of data returned here */ - -/* Slot Identification */ - -#define PCI_SID_ESR 2 /* Expansion Slot Register */ -#define PCI_SID_ESR_NSLOTS 0x1f /* Number of expansion slots available */ -#define PCI_SID_ESR_FIC 0x20 /* First In Chassis Flag */ -#define PCI_SID_CHASSIS_NR 3 /* Chassis Number */ - -/* Message Signalled Interrupts registers */ - -#define PCI_MSI_FLAGS 2 /* Various flags */ -#define PCI_MSI_FLAGS_64BIT 0x80 /* 64-bit addresses allowed */ -#define PCI_MSI_FLAGS_QSIZE 0x70 /* Message queue size configured */ -#define PCI_MSI_FLAGS_QMASK 0x0e /* Maximum queue size available */ -#define PCI_MSI_FLAGS_ENABLE 0x01 /* MSI feature enabled */ -#define PCI_MSI_FLAGS_MASKBIT 0x100 /* 64-bit mask bits allowed */ -#define PCI_MSI_RFU 3 /* Rest of capability flags */ -#define PCI_MSI_ADDRESS_LO 4 /* Lower 32 bits */ -#define PCI_MSI_ADDRESS_HI 8 /* Upper 32 bits (if PCI_MSI_FLAGS_64BIT set) */ -#define PCI_MSI_DATA_32 8 /* 16 bits of data for 32-bit devices */ -#define PCI_MSI_MASK_32 12 /* Mask bits register for 32-bit devices */ -#define PCI_MSI_PENDING_32 16 /* Pending bits register for 32-bit devices */ -#define PCI_MSI_DATA_64 12 /* 16 bits of data for 64-bit devices */ -#define PCI_MSI_MASK_64 16 /* Mask bits register for 64-bit devices */ -#define PCI_MSI_PENDING_64 20 /* Pending bits register for 32-bit devices */ - -/* MSI-X registers */ -#define PCI_MSIX_FLAGS 2 -#define PCI_MSIX_FLAGS_QSIZE 0x7FF -#define PCI_MSIX_FLAGS_ENABLE (1 << 15) -#define PCI_MSIX_FLAGS_MASKALL (1 << 14) -#define PCI_MSIX_TABLE 4 -#define PCI_MSIX_PBA 8 -#define PCI_MSIX_FLAGS_BIRMASK (7 << 0) - -/* MSI-X entry's format */ -#define PCI_MSIX_ENTRY_SIZE 16 -#define PCI_MSIX_ENTRY_LOWER_ADDR 0 -#define PCI_MSIX_ENTRY_UPPER_ADDR 4 -#define PCI_MSIX_ENTRY_DATA 8 -#define PCI_MSIX_ENTRY_VECTOR_CTRL 12 -#define PCI_MSIX_ENTRY_CTRL_MASKBIT 1 - -/* CompactPCI Hotswap Register */ - -#define PCI_CHSWP_CSR 2 /* Control and Status Register */ -#define PCI_CHSWP_DHA 0x01 /* Device Hiding Arm */ -#define PCI_CHSWP_EIM 0x02 /* ENUM# Signal Mask */ -#define PCI_CHSWP_PIE 0x04 /* Pending Insert or Extract */ -#define PCI_CHSWP_LOO 0x08 /* LED On / Off */ -#define PCI_CHSWP_PI 0x30 /* Programming Interface */ -#define PCI_CHSWP_EXT 0x40 /* ENUM# status - extraction */ -#define PCI_CHSWP_INS 0x80 /* ENUM# status - insertion */ - -/* PCI Advanced Feature registers */ - -#define PCI_AF_LENGTH 2 -#define PCI_AF_CAP 3 -#define PCI_AF_CAP_TP 0x01 -#define PCI_AF_CAP_FLR 0x02 -#define PCI_AF_CTRL 4 -#define PCI_AF_CTRL_FLR 0x01 -#define PCI_AF_STATUS 5 -#define PCI_AF_STATUS_TP 0x01 - -/* PCI-X registers */ - -#define PCI_X_CMD 2 /* Modes & Features */ -#define PCI_X_CMD_DPERR_E 0x0001 /* Data Parity Error Recovery Enable */ -#define PCI_X_CMD_ERO 0x0002 /* Enable Relaxed Ordering */ -#define PCI_X_CMD_READ_512 0x0000 /* 512 byte maximum read byte count */ -#define PCI_X_CMD_READ_1K 0x0004 /* 1Kbyte maximum read byte count */ -#define PCI_X_CMD_READ_2K 0x0008 /* 2Kbyte maximum read byte count */ -#define PCI_X_CMD_READ_4K 0x000c /* 4Kbyte maximum read byte count */ -#define PCI_X_CMD_MAX_READ 0x000c /* Max Memory Read Byte Count */ - /* Max # of outstanding split transactions */ -#define PCI_X_CMD_SPLIT_1 0x0000 /* Max 1 */ -#define PCI_X_CMD_SPLIT_2 0x0010 /* Max 2 */ -#define PCI_X_CMD_SPLIT_3 0x0020 /* Max 3 */ -#define PCI_X_CMD_SPLIT_4 0x0030 /* Max 4 */ -#define PCI_X_CMD_SPLIT_8 0x0040 /* Max 8 */ -#define PCI_X_CMD_SPLIT_12 0x0050 /* Max 12 */ -#define PCI_X_CMD_SPLIT_16 0x0060 /* Max 16 */ -#define PCI_X_CMD_SPLIT_32 0x0070 /* Max 32 */ -#define PCI_X_CMD_MAX_SPLIT 0x0070 /* Max Outstanding Split Transactions */ -#define PCI_X_CMD_VERSION(x) (((x) >> 12) & 3) /* Version */ -#define PCI_X_STATUS 4 /* PCI-X capabilities */ -#define PCI_X_STATUS_DEVFN 0x000000ff /* A copy of devfn */ -#define PCI_X_STATUS_BUS 0x0000ff00 /* A copy of bus nr */ -#define PCI_X_STATUS_64BIT 0x00010000 /* 64-bit device */ -#define PCI_X_STATUS_133MHZ 0x00020000 /* 133 MHz capable */ -#define PCI_X_STATUS_SPL_DISC 0x00040000 /* Split Completion Discarded */ -#define PCI_X_STATUS_UNX_SPL 0x00080000 /* Unexpected Split Completion */ -#define PCI_X_STATUS_COMPLEX 0x00100000 /* Device Complexity */ -#define PCI_X_STATUS_MAX_READ 0x00600000 /* Designed Max Memory Read Count */ -#define PCI_X_STATUS_MAX_SPLIT 0x03800000 /* Designed Max Outstanding Split Transactions */ -#define PCI_X_STATUS_MAX_CUM 0x1c000000 /* Designed Max Cumulative Read Size */ -#define PCI_X_STATUS_SPL_ERR 0x20000000 /* Rcvd Split Completion Error Msg */ -#define PCI_X_STATUS_266MHZ 0x40000000 /* 266 MHz capable */ -#define PCI_X_STATUS_533MHZ 0x80000000 /* 533 MHz capable */ - -/* PCI Bridge Subsystem ID registers */ - -#define PCI_SSVID_VENDOR_ID 4 /* PCI-Bridge subsystem vendor id register */ -#define PCI_SSVID_DEVICE_ID 6 /* PCI-Bridge subsystem device id register */ - -/* PCI Express capability registers */ - -#define PCI_EXP_FLAGS 2 /* Capabilities register */ -#define PCI_EXP_FLAGS_VERS 0x000f /* Capability version */ -#define PCI_EXP_FLAGS_TYPE 0x00f0 /* Device/Port type */ -#define PCI_EXP_TYPE_ENDPOINT 0x0 /* Express Endpoint */ -#define PCI_EXP_TYPE_LEG_END 0x1 /* Legacy Endpoint */ -#define PCI_EXP_TYPE_ROOT_PORT 0x4 /* Root Port */ -#define PCI_EXP_TYPE_UPSTREAM 0x5 /* Upstream Port */ -#define PCI_EXP_TYPE_DOWNSTREAM 0x6 /* Downstream Port */ -#define PCI_EXP_TYPE_PCI_BRIDGE 0x7 /* PCI/PCI-X Bridge */ -#define PCI_EXP_TYPE_PCIE_BRIDGE 0x8 /* PCI/PCI-X to PCIE Bridge */ -#define PCI_EXP_TYPE_RC_END 0x9 /* Root Complex Integrated Endpoint */ -#define PCI_EXP_TYPE_RC_EC 0xa /* Root Complex Event Collector */ -#define PCI_EXP_FLAGS_SLOT 0x0100 /* Slot implemented */ -#define PCI_EXP_FLAGS_IRQ 0x3e00 /* Interrupt message number */ -#define PCI_EXP_DEVCAP 4 /* Device capabilities */ -#define PCI_EXP_DEVCAP_PAYLOAD 0x07 /* Max_Payload_Size */ -#define PCI_EXP_DEVCAP_PHANTOM 0x18 /* Phantom functions */ -#define PCI_EXP_DEVCAP_EXT_TAG 0x20 /* Extended tags */ -#define PCI_EXP_DEVCAP_L0S 0x1c0 /* L0s Acceptable Latency */ -#define PCI_EXP_DEVCAP_L1 0xe00 /* L1 Acceptable Latency */ -#define PCI_EXP_DEVCAP_ATN_BUT 0x1000 /* Attention Button Present */ -#define PCI_EXP_DEVCAP_ATN_IND 0x2000 /* Attention Indicator Present */ -#define PCI_EXP_DEVCAP_PWR_IND 0x4000 /* Power Indicator Present */ -#define PCI_EXP_DEVCAP_RBER 0x8000 /* Role-Based Error Reporting */ -#define PCI_EXP_DEVCAP_PWR_VAL 0x3fc0000 /* Slot Power Limit Value */ -#define PCI_EXP_DEVCAP_PWR_SCL 0xc000000 /* Slot Power Limit Scale */ -#define PCI_EXP_DEVCAP_FLR 0x10000000 /* Function Level Reset */ -#define PCI_EXP_DEVCTL 8 /* Device Control */ -#define PCI_EXP_DEVCTL_CERE 0x0001 /* Correctable Error Reporting En. */ -#define PCI_EXP_DEVCTL_NFERE 0x0002 /* Non-Fatal Error Reporting Enable */ -#define PCI_EXP_DEVCTL_FERE 0x0004 /* Fatal Error Reporting Enable */ -#define PCI_EXP_DEVCTL_URRE 0x0008 /* Unsupported Request Reporting En. */ -#define PCI_EXP_DEVCTL_RELAX_EN 0x0010 /* Enable relaxed ordering */ -#define PCI_EXP_DEVCTL_PAYLOAD 0x00e0 /* Max_Payload_Size */ -#define PCI_EXP_DEVCTL_EXT_TAG 0x0100 /* Extended Tag Field Enable */ -#define PCI_EXP_DEVCTL_PHANTOM 0x0200 /* Phantom Functions Enable */ -#define PCI_EXP_DEVCTL_AUX_PME 0x0400 /* Auxiliary Power PM Enable */ -#define PCI_EXP_DEVCTL_NOSNOOP_EN 0x0800 /* Enable No Snoop */ -#define PCI_EXP_DEVCTL_READRQ 0x7000 /* Max_Read_Request_Size */ -#define PCI_EXP_DEVCTL_BCR_FLR 0x8000 /* Bridge Configuration Retry / FLR */ -#define PCI_EXP_DEVSTA 10 /* Device Status */ -#define PCI_EXP_DEVSTA_CED 0x01 /* Correctable Error Detected */ -#define PCI_EXP_DEVSTA_NFED 0x02 /* Non-Fatal Error Detected */ -#define PCI_EXP_DEVSTA_FED 0x04 /* Fatal Error Detected */ -#define PCI_EXP_DEVSTA_URD 0x08 /* Unsupported Request Detected */ -#define PCI_EXP_DEVSTA_AUXPD 0x10 /* AUX Power Detected */ -#define PCI_EXP_DEVSTA_TRPND 0x20 /* Transactions Pending */ -#define PCI_EXP_LNKCAP 12 /* Link Capabilities */ -#define PCI_EXP_LNKCAP_SLS 0x0000000f /* Supported Link Speeds */ -#define PCI_EXP_LNKCAP_MLW 0x000003f0 /* Maximum Link Width */ -#define PCI_EXP_LNKCAP_ASPMS 0x00000c00 /* ASPM Support */ -#define PCI_EXP_LNKCAP_L0SEL 0x00007000 /* L0s Exit Latency */ -#define PCI_EXP_LNKCAP_L1EL 0x00038000 /* L1 Exit Latency */ -#define PCI_EXP_LNKCAP_CLKPM 0x00040000 /* L1 Clock Power Management */ -#define PCI_EXP_LNKCAP_SDERC 0x00080000 /* Surprise Down Error Reporting Capable */ -#define PCI_EXP_LNKCAP_DLLLARC 0x00100000 /* Data Link Layer Link Active Reporting Capable */ -#define PCI_EXP_LNKCAP_LBNC 0x00200000 /* Link Bandwidth Notification Capability */ -#define PCI_EXP_LNKCAP_PN 0xff000000 /* Port Number */ -#define PCI_EXP_LNKCTL 16 /* Link Control */ -#define PCI_EXP_LNKCTL_ASPMC 0x0003 /* ASPM Control */ -#define PCI_EXP_LNKCTL_RCB 0x0008 /* Read Completion Boundary */ -#define PCI_EXP_LNKCTL_LD 0x0010 /* Link Disable */ -#define PCI_EXP_LNKCTL_RL 0x0020 /* Retrain Link */ -#define PCI_EXP_LNKCTL_CCC 0x0040 /* Common Clock Configuration */ -#define PCI_EXP_LNKCTL_ES 0x0080 /* Extended Synch */ -#define PCI_EXP_LNKCTL_CLKREQ_EN 0x100 /* Enable clkreq */ -#define PCI_EXP_LNKCTL_HAWD 0x0200 /* Hardware Autonomous Width Disable */ -#define PCI_EXP_LNKCTL_LBMIE 0x0400 /* Link Bandwidth Management Interrupt Enable */ -#define PCI_EXP_LNKCTL_LABIE 0x0800 /* Lnk Autonomous Bandwidth Interrupt Enable */ -#define PCI_EXP_LNKSTA 18 /* Link Status */ -#define PCI_EXP_LNKSTA_CLS 0x000f /* Current Link Speed */ -#define PCI_EXP_LNKSTA_CLS_2_5GB 0x01 /* Current Link Speed 2.5GT/s */ -#define PCI_EXP_LNKSTA_CLS_5_0GB 0x02 /* Current Link Speed 5.0GT/s */ -#define PCI_EXP_LNKSTA_NLW 0x03f0 /* Nogotiated Link Width */ -#define PCI_EXP_LNKSTA_NLW_SHIFT 4 /* start of NLW mask in link status */ -#define PCI_EXP_LNKSTA_LT 0x0800 /* Link Training */ -#define PCI_EXP_LNKSTA_SLC 0x1000 /* Slot Clock Configuration */ -#define PCI_EXP_LNKSTA_DLLLA 0x2000 /* Data Link Layer Link Active */ -#define PCI_EXP_LNKSTA_LBMS 0x4000 /* Link Bandwidth Management Status */ -#define PCI_EXP_LNKSTA_LABS 0x8000 /* Link Autonomous Bandwidth Status */ -#define PCI_EXP_SLTCAP 20 /* Slot Capabilities */ -#define PCI_EXP_SLTCAP_ABP 0x00000001 /* Attention Button Present */ -#define PCI_EXP_SLTCAP_PCP 0x00000002 /* Power Controller Present */ -#define PCI_EXP_SLTCAP_MRLSP 0x00000004 /* MRL Sensor Present */ -#define PCI_EXP_SLTCAP_AIP 0x00000008 /* Attention Indicator Present */ -#define PCI_EXP_SLTCAP_PIP 0x00000010 /* Power Indicator Present */ -#define PCI_EXP_SLTCAP_HPS 0x00000020 /* Hot-Plug Surprise */ -#define PCI_EXP_SLTCAP_HPC 0x00000040 /* Hot-Plug Capable */ -#define PCI_EXP_SLTCAP_SPLV 0x00007f80 /* Slot Power Limit Value */ -#define PCI_EXP_SLTCAP_SPLS 0x00018000 /* Slot Power Limit Scale */ -#define PCI_EXP_SLTCAP_EIP 0x00020000 /* Electromechanical Interlock Present */ -#define PCI_EXP_SLTCAP_NCCS 0x00040000 /* No Command Completed Support */ -#define PCI_EXP_SLTCAP_PSN 0xfff80000 /* Physical Slot Number */ -#define PCI_EXP_SLTCTL 24 /* Slot Control */ -#define PCI_EXP_SLTCTL_ABPE 0x0001 /* Attention Button Pressed Enable */ -#define PCI_EXP_SLTCTL_PFDE 0x0002 /* Power Fault Detected Enable */ -#define PCI_EXP_SLTCTL_MRLSCE 0x0004 /* MRL Sensor Changed Enable */ -#define PCI_EXP_SLTCTL_PDCE 0x0008 /* Presence Detect Changed Enable */ -#define PCI_EXP_SLTCTL_CCIE 0x0010 /* Command Completed Interrupt Enable */ -#define PCI_EXP_SLTCTL_HPIE 0x0020 /* Hot-Plug Interrupt Enable */ -#define PCI_EXP_SLTCTL_AIC 0x00c0 /* Attention Indicator Control */ -#define PCI_EXP_SLTCTL_PIC 0x0300 /* Power Indicator Control */ -#define PCI_EXP_SLTCTL_PCC 0x0400 /* Power Controller Control */ -#define PCI_EXP_SLTCTL_EIC 0x0800 /* Electromechanical Interlock Control */ -#define PCI_EXP_SLTCTL_DLLSCE 0x1000 /* Data Link Layer State Changed Enable */ -#define PCI_EXP_SLTSTA 26 /* Slot Status */ -#define PCI_EXP_SLTSTA_ABP 0x0001 /* Attention Button Pressed */ -#define PCI_EXP_SLTSTA_PFD 0x0002 /* Power Fault Detected */ -#define PCI_EXP_SLTSTA_MRLSC 0x0004 /* MRL Sensor Changed */ -#define PCI_EXP_SLTSTA_PDC 0x0008 /* Presence Detect Changed */ -#define PCI_EXP_SLTSTA_CC 0x0010 /* Command Completed */ -#define PCI_EXP_SLTSTA_MRLSS 0x0020 /* MRL Sensor State */ -#define PCI_EXP_SLTSTA_PDS 0x0040 /* Presence Detect State */ -#define PCI_EXP_SLTSTA_EIS 0x0080 /* Electromechanical Interlock Status */ -#define PCI_EXP_SLTSTA_DLLSC 0x0100 /* Data Link Layer State Changed */ -#define PCI_EXP_RTCTL 28 /* Root Control */ -#define PCI_EXP_RTCTL_SECEE 0x01 /* System Error on Correctable Error */ -#define PCI_EXP_RTCTL_SENFEE 0x02 /* System Error on Non-Fatal Error */ -#define PCI_EXP_RTCTL_SEFEE 0x04 /* System Error on Fatal Error */ -#define PCI_EXP_RTCTL_PMEIE 0x08 /* PME Interrupt Enable */ -#define PCI_EXP_RTCTL_CRSSVE 0x10 /* CRS Software Visibility Enable */ -#define PCI_EXP_RTCAP 30 /* Root Capabilities */ -#define PCI_EXP_RTSTA 32 /* Root Status */ -#define PCI_EXP_RTSTA_PME 0x10000 /* PME status */ -#define PCI_EXP_RTSTA_PENDING 0x20000 /* PME pending */ -#define PCI_EXP_DEVCAP2 36 /* Device Capabilities 2 */ -#define PCI_EXP_DEVCAP2_ARI 0x20 /* Alternative Routing-ID */ -#define PCI_EXP_DEVCAP2_LTR 0x800 /* Latency tolerance reporting */ -#define PCI_EXP_OBFF_MASK 0xc0000 /* OBFF support mechanism */ -#define PCI_EXP_OBFF_MSG 0x40000 /* New message signaling */ -#define PCI_EXP_OBFF_WAKE 0x80000 /* Re-use WAKE# for OBFF */ -#define PCI_EXP_DEVCTL2 40 /* Device Control 2 */ -#define PCI_EXP_DEVCTL2_ARI 0x20 /* Alternative Routing-ID */ -#define PCI_EXP_IDO_REQ_EN 0x100 /* ID-based ordering request enable */ -#define PCI_EXP_IDO_CMP_EN 0x200 /* ID-based ordering completion enable */ -#define PCI_EXP_LTR_EN 0x400 /* Latency tolerance reporting */ -#define PCI_EXP_OBFF_MSGA_EN 0x2000 /* OBFF enable with Message type A */ -#define PCI_EXP_OBFF_MSGB_EN 0x4000 /* OBFF enable with Message type B */ -#define PCI_EXP_OBFF_WAKE_EN 0x6000 /* OBFF using WAKE# signaling */ -#define PCI_EXP_LNKCTL2 48 /* Link Control 2 */ -#define PCI_EXP_SLTCTL2 56 /* Slot Control 2 */ - -/* Extended Capabilities (PCI-X 2.0 and Express) */ -#define PCI_EXT_CAP_ID(header) (header & 0x0000ffff) -#define PCI_EXT_CAP_VER(header) ((header >> 16) & 0xf) -#define PCI_EXT_CAP_NEXT(header) ((header >> 20) & 0xffc) - -#define PCI_EXT_CAP_ID_ERR 1 -#define PCI_EXT_CAP_ID_VC 2 -#define PCI_EXT_CAP_ID_DSN 3 -#define PCI_EXT_CAP_ID_PWR 4 -#define PCI_EXT_CAP_ID_VNDR 11 -#define PCI_EXT_CAP_ID_ACS 13 -#define PCI_EXT_CAP_ID_ARI 14 -#define PCI_EXT_CAP_ID_ATS 15 -#define PCI_EXT_CAP_ID_SRIOV 16 -#define PCI_EXT_CAP_ID_LTR 24 - -/* Advanced Error Reporting */ -#define PCI_ERR_UNCOR_STATUS 4 /* Uncorrectable Error Status */ -#define PCI_ERR_UNC_TRAIN 0x00000001 /* Training */ -#define PCI_ERR_UNC_DLP 0x00000010 /* Data Link Protocol */ -#define PCI_ERR_UNC_POISON_TLP 0x00001000 /* Poisoned TLP */ -#define PCI_ERR_UNC_FCP 0x00002000 /* Flow Control Protocol */ -#define PCI_ERR_UNC_COMP_TIME 0x00004000 /* Completion Timeout */ -#define PCI_ERR_UNC_COMP_ABORT 0x00008000 /* Completer Abort */ -#define PCI_ERR_UNC_UNX_COMP 0x00010000 /* Unexpected Completion */ -#define PCI_ERR_UNC_RX_OVER 0x00020000 /* Receiver Overflow */ -#define PCI_ERR_UNC_MALF_TLP 0x00040000 /* Malformed TLP */ -#define PCI_ERR_UNC_ECRC 0x00080000 /* ECRC Error Status */ -#define PCI_ERR_UNC_UNSUP 0x00100000 /* Unsupported Request */ -#define PCI_ERR_UNCOR_MASK 8 /* Uncorrectable Error Mask */ - /* Same bits as above */ -#define PCI_ERR_UNCOR_SEVER 12 /* Uncorrectable Error Severity */ - /* Same bits as above */ -#define PCI_ERR_COR_STATUS 16 /* Correctable Error Status */ -#define PCI_ERR_COR_RCVR 0x00000001 /* Receiver Error Status */ -#define PCI_ERR_COR_BAD_TLP 0x00000040 /* Bad TLP Status */ -#define PCI_ERR_COR_BAD_DLLP 0x00000080 /* Bad DLLP Status */ -#define PCI_ERR_COR_REP_ROLL 0x00000100 /* REPLAY_NUM Rollover */ -#define PCI_ERR_COR_REP_TIMER 0x00001000 /* Replay Timer Timeout */ -#define PCI_ERR_COR_MASK 20 /* Correctable Error Mask */ - /* Same bits as above */ -#define PCI_ERR_CAP 24 /* Advanced Error Capabilities */ -#define PCI_ERR_CAP_FEP(x) ((x) & 31) /* First Error Pointer */ -#define PCI_ERR_CAP_ECRC_GENC 0x00000020 /* ECRC Generation Capable */ -#define PCI_ERR_CAP_ECRC_GENE 0x00000040 /* ECRC Generation Enable */ -#define PCI_ERR_CAP_ECRC_CHKC 0x00000080 /* ECRC Check Capable */ -#define PCI_ERR_CAP_ECRC_CHKE 0x00000100 /* ECRC Check Enable */ -#define PCI_ERR_HEADER_LOG 28 /* Header Log Register (16 bytes) */ -#define PCI_ERR_ROOT_COMMAND 44 /* Root Error Command */ -/* Correctable Err Reporting Enable */ -#define PCI_ERR_ROOT_CMD_COR_EN 0x00000001 -/* Non-fatal Err Reporting Enable */ -#define PCI_ERR_ROOT_CMD_NONFATAL_EN 0x00000002 -/* Fatal Err Reporting Enable */ -#define PCI_ERR_ROOT_CMD_FATAL_EN 0x00000004 -#define PCI_ERR_ROOT_STATUS 48 -#define PCI_ERR_ROOT_COR_RCV 0x00000001 /* ERR_COR Received */ -/* Multi ERR_COR Received */ -#define PCI_ERR_ROOT_MULTI_COR_RCV 0x00000002 -/* ERR_FATAL/NONFATAL Recevied */ -#define PCI_ERR_ROOT_UNCOR_RCV 0x00000004 -/* Multi ERR_FATAL/NONFATAL Recevied */ -#define PCI_ERR_ROOT_MULTI_UNCOR_RCV 0x00000008 -#define PCI_ERR_ROOT_FIRST_FATAL 0x00000010 /* First Fatal */ -#define PCI_ERR_ROOT_NONFATAL_RCV 0x00000020 /* Non-Fatal Received */ -#define PCI_ERR_ROOT_FATAL_RCV 0x00000040 /* Fatal Received */ -#define PCI_ERR_ROOT_ERR_SRC 52 /* Error Source Identification */ - -/* Virtual Channel */ -#define PCI_VC_PORT_REG1 4 -#define PCI_VC_PORT_REG2 8 -#define PCI_VC_PORT_CTRL 12 -#define PCI_VC_PORT_STATUS 14 -#define PCI_VC_RES_CAP 16 -#define PCI_VC_RES_CTRL 20 -#define PCI_VC_RES_STATUS 26 - -/* Power Budgeting */ -#define PCI_PWR_DSR 4 /* Data Select Register */ -#define PCI_PWR_DATA 8 /* Data Register */ -#define PCI_PWR_DATA_BASE(x) ((x) & 0xff) /* Base Power */ -#define PCI_PWR_DATA_SCALE(x) (((x) >> 8) & 3) /* Data Scale */ -#define PCI_PWR_DATA_PM_SUB(x) (((x) >> 10) & 7) /* PM Sub State */ -#define PCI_PWR_DATA_PM_STATE(x) (((x) >> 13) & 3) /* PM State */ -#define PCI_PWR_DATA_TYPE(x) (((x) >> 15) & 7) /* Type */ -#define PCI_PWR_DATA_RAIL(x) (((x) >> 18) & 7) /* Power Rail */ -#define PCI_PWR_CAP 12 /* Capability */ -#define PCI_PWR_CAP_BUDGET(x) ((x) & 1) /* Included in system budget */ - -/* - * Hypertransport sub capability types - * - * Unfortunately there are both 3 bit and 5 bit capability types defined - * in the HT spec, catering for that is a little messy. You probably don't - * want to use these directly, just use pci_find_ht_capability() and it - * will do the right thing for you. - */ -#define HT_3BIT_CAP_MASK 0xE0 -#define HT_CAPTYPE_SLAVE 0x00 /* Slave/Primary link configuration */ -#define HT_CAPTYPE_HOST 0x20 /* Host/Secondary link configuration */ - -#define HT_5BIT_CAP_MASK 0xF8 -#define HT_CAPTYPE_IRQ 0x80 /* IRQ Configuration */ -#define HT_CAPTYPE_REMAPPING_40 0xA0 /* 40 bit address remapping */ -#define HT_CAPTYPE_REMAPPING_64 0xA2 /* 64 bit address remapping */ -#define HT_CAPTYPE_UNITID_CLUMP 0x90 /* Unit ID clumping */ -#define HT_CAPTYPE_EXTCONF 0x98 /* Extended Configuration Space Access */ -#define HT_CAPTYPE_MSI_MAPPING 0xA8 /* MSI Mapping Capability */ -#define HT_MSI_FLAGS 0x02 /* Offset to flags */ -#define HT_MSI_FLAGS_ENABLE 0x1 /* Mapping enable */ -#define HT_MSI_FLAGS_FIXED 0x2 /* Fixed mapping only */ -#define HT_MSI_FIXED_ADDR 0x00000000FEE00000ULL /* Fixed addr */ -#define HT_MSI_ADDR_LO 0x04 /* Offset to low addr bits */ -#define HT_MSI_ADDR_LO_MASK 0xFFF00000 /* Low address bit mask */ -#define HT_MSI_ADDR_HI 0x08 /* Offset to high addr bits */ -#define HT_CAPTYPE_DIRECT_ROUTE 0xB0 /* Direct routing configuration */ -#define HT_CAPTYPE_VCSET 0xB8 /* Virtual Channel configuration */ -#define HT_CAPTYPE_ERROR_RETRY 0xC0 /* Retry on error configuration */ -#define HT_CAPTYPE_GEN3 0xD0 /* Generation 3 hypertransport configuration */ -#define HT_CAPTYPE_PM 0xE0 /* Hypertransport powermanagement configuration */ - -/* Alternative Routing-ID Interpretation */ -#define PCI_ARI_CAP 0x04 /* ARI Capability Register */ -#define PCI_ARI_CAP_MFVC 0x0001 /* MFVC Function Groups Capability */ -#define PCI_ARI_CAP_ACS 0x0002 /* ACS Function Groups Capability */ -#define PCI_ARI_CAP_NFN(x) (((x) >> 8) & 0xff) /* Next Function Number */ -#define PCI_ARI_CTRL 0x06 /* ARI Control Register */ -#define PCI_ARI_CTRL_MFVC 0x0001 /* MFVC Function Groups Enable */ -#define PCI_ARI_CTRL_ACS 0x0002 /* ACS Function Groups Enable */ -#define PCI_ARI_CTRL_FG(x) (((x) >> 4) & 7) /* Function Group */ - -/* Address Translation Service */ -#define PCI_ATS_CAP 0x04 /* ATS Capability Register */ -#define PCI_ATS_CAP_QDEP(x) ((x) & 0x1f) /* Invalidate Queue Depth */ -#define PCI_ATS_MAX_QDEP 32 /* Max Invalidate Queue Depth */ -#define PCI_ATS_CTRL 0x06 /* ATS Control Register */ -#define PCI_ATS_CTRL_ENABLE 0x8000 /* ATS Enable */ -#define PCI_ATS_CTRL_STU(x) ((x) & 0x1f) /* Smallest Translation Unit */ -#define PCI_ATS_MIN_STU 12 /* shift of minimum STU block */ - -/* Single Root I/O Virtualization */ -#define PCI_SRIOV_CAP 0x04 /* SR-IOV Capabilities */ -#define PCI_SRIOV_CAP_VFM 0x01 /* VF Migration Capable */ -#define PCI_SRIOV_CAP_INTR(x) ((x) >> 21) /* Interrupt Message Number */ -#define PCI_SRIOV_CTRL 0x08 /* SR-IOV Control */ -#define PCI_SRIOV_CTRL_VFE 0x01 /* VF Enable */ -#define PCI_SRIOV_CTRL_VFM 0x02 /* VF Migration Enable */ -#define PCI_SRIOV_CTRL_INTR 0x04 /* VF Migration Interrupt Enable */ -#define PCI_SRIOV_CTRL_MSE 0x08 /* VF Memory Space Enable */ -#define PCI_SRIOV_CTRL_ARI 0x10 /* ARI Capable Hierarchy */ -#define PCI_SRIOV_STATUS 0x0a /* SR-IOV Status */ -#define PCI_SRIOV_STATUS_VFM 0x01 /* VF Migration Status */ -#define PCI_SRIOV_INITIAL_VF 0x0c /* Initial VFs */ -#define PCI_SRIOV_TOTAL_VF 0x0e /* Total VFs */ -#define PCI_SRIOV_NUM_VF 0x10 /* Number of VFs */ -#define PCI_SRIOV_FUNC_LINK 0x12 /* Function Dependency Link */ -#define PCI_SRIOV_VF_OFFSET 0x14 /* First VF Offset */ -#define PCI_SRIOV_VF_STRIDE 0x16 /* Following VF Stride */ -#define PCI_SRIOV_VF_DID 0x1a /* VF Device ID */ -#define PCI_SRIOV_SUP_PGSIZE 0x1c /* Supported Page Sizes */ -#define PCI_SRIOV_SYS_PGSIZE 0x20 /* System Page Size */ -#define PCI_SRIOV_BAR 0x24 /* VF BAR0 */ -#define PCI_SRIOV_NUM_BARS 6 /* Number of VF BARs */ -#define PCI_SRIOV_VFM 0x3c /* VF Migration State Array Offset*/ -#define PCI_SRIOV_VFM_BIR(x) ((x) & 7) /* State BIR */ -#define PCI_SRIOV_VFM_OFFSET(x) ((x) & ~7) /* State Offset */ -#define PCI_SRIOV_VFM_UA 0x0 /* Inactive.Unavailable */ -#define PCI_SRIOV_VFM_MI 0x1 /* Dormant.MigrateIn */ -#define PCI_SRIOV_VFM_MO 0x2 /* Active.MigrateOut */ -#define PCI_SRIOV_VFM_AV 0x3 /* Active.Available */ - -#define PCI_LTR_MAX_SNOOP_LAT 0x4 -#define PCI_LTR_MAX_NOSNOOP_LAT 0x6 -#define PCI_LTR_VALUE_MASK 0x000003ff -#define PCI_LTR_SCALE_MASK 0x00001c00 -#define PCI_LTR_SCALE_SHIFT 10 - -/* Access Control Service */ -#define PCI_ACS_CAP 0x04 /* ACS Capability Register */ -#define PCI_ACS_SV 0x01 /* Source Validation */ -#define PCI_ACS_TB 0x02 /* Translation Blocking */ -#define PCI_ACS_RR 0x04 /* P2P Request Redirect */ -#define PCI_ACS_CR 0x08 /* P2P Completion Redirect */ -#define PCI_ACS_UF 0x10 /* Upstream Forwarding */ -#define PCI_ACS_EC 0x20 /* P2P Egress Control */ -#define PCI_ACS_DT 0x40 /* Direct Translated P2P */ -#define PCI_ACS_CTRL 0x06 /* ACS Control Register */ -#define PCI_ACS_EGRESS_CTL_V 0x08 /* ACS Egress Control Vector */ - -#endif /* LINUX_PCI_REGS_H */ +#include "standard-headers/linux/pci_regs.h" diff --git a/include/hw/ppc/spapr.h b/include/hw/ppc/spapr.h index 7b4b1bb3d7..91a61abbc4 100644 --- a/include/hw/ppc/spapr.h +++ b/include/hw/ppc/spapr.h @@ -2,6 +2,7 @@ #define __HW_SPAPR_H__ #include "sysemu/dma.h" +#include "hw/boards.h" #include "hw/ppc/xics.h" #include "hw/ppc/spapr_drc.h" @@ -12,15 +13,42 @@ typedef struct sPAPRConfigureConnectorState sPAPRConfigureConnectorState; typedef struct sPAPREventLogEntry sPAPREventLogEntry; #define HPTE64_V_HPTE_DIRTY 0x0000000000000040ULL +#define SPAPR_ENTRY_POINT 0x100 + +typedef struct sPAPRMachineClass sPAPRMachineClass; +typedef struct sPAPRMachineState sPAPRMachineState; + +#define TYPE_SPAPR_MACHINE "spapr-machine" +#define SPAPR_MACHINE(obj) \ + OBJECT_CHECK(sPAPRMachineState, (obj), TYPE_SPAPR_MACHINE) +#define SPAPR_MACHINE_GET_CLASS(obj) \ + OBJECT_GET_CLASS(sPAPRMachineClass, obj, TYPE_SPAPR_MACHINE) +#define SPAPR_MACHINE_CLASS(klass) \ + OBJECT_CLASS_CHECK(sPAPRMachineClass, klass, TYPE_SPAPR_MACHINE) + +/** + * sPAPRMachineClass: + */ +struct sPAPRMachineClass { + /*< private >*/ + MachineClass parent_class; + + /*< public >*/ +}; + +/** + * sPAPRMachineState: + */ +struct sPAPRMachineState { + /*< private >*/ + MachineState parent_obj; -typedef struct sPAPREnvironment { struct VIOsPAPRBus *vio_bus; QLIST_HEAD(, sPAPRPHBState) phbs; struct sPAPRNVRAM *nvram; XICSState *icp; DeviceState *rtc; - hwaddr ram_limit; void *htab; uint32_t htab_shift; hwaddr rma_size; @@ -29,7 +57,6 @@ typedef struct sPAPREnvironment { ssize_t rtas_size; void *rtas_blob; void *fdt_skel; - target_ulong entry_point; uint64_t rtc_offset; /* Now used only during incoming migration */ struct PPCTimebase tb; bool has_graphics; @@ -46,7 +73,10 @@ typedef struct sPAPREnvironment { /* RTAS state */ QTAILQ_HEAD(, sPAPRConfigureConnectorState) ccs_list; -} sPAPREnvironment; + + /*< public >*/ + char *kvm_type; +}; #define H_SUCCESS 0 #define H_BUSY 1 /* Hardware busy -- retry later */ @@ -319,8 +349,6 @@ typedef struct sPAPREnvironment { #define KVMPPC_H_CAS (KVMPPC_HCALL_BASE + 0x2) #define KVMPPC_HCALL_MAX KVMPPC_H_CAS -extern sPAPREnvironment *spapr; - typedef struct sPAPRDeviceTreeUpdateHeader { uint32_t version_id; } sPAPRDeviceTreeUpdateHeader; @@ -335,7 +363,7 @@ typedef struct sPAPRDeviceTreeUpdateHeader { do { } while (0) #endif -typedef target_ulong (*spapr_hcall_fn)(PowerPCCPU *cpu, sPAPREnvironment *spapr, +typedef target_ulong (*spapr_hcall_fn)(PowerPCCPU *cpu, sPAPRMachineState *sm, target_ulong opcode, target_ulong *args); @@ -490,12 +518,12 @@ static inline void rtas_st_buffer(target_ulong phys, target_ulong phys_len, rtas_st_buffer_direct(phys + 2, phys_len - 2, buffer, buffer_len); } -typedef void (*spapr_rtas_fn)(PowerPCCPU *cpu, sPAPREnvironment *spapr, +typedef void (*spapr_rtas_fn)(PowerPCCPU *cpu, sPAPRMachineState *sm, uint32_t token, uint32_t nargs, target_ulong args, uint32_t nret, target_ulong rets); void spapr_rtas_register(int token, const char *name, spapr_rtas_fn fn); -target_ulong spapr_rtas_call(PowerPCCPU *cpu, sPAPREnvironment *spapr, +target_ulong spapr_rtas_call(PowerPCCPU *cpu, sPAPRMachineState *sm, uint32_t token, uint32_t nargs, target_ulong args, uint32_t nret, target_ulong rets); int spapr_rtas_device_tree_setup(void *fdt, hwaddr rtas_addr, @@ -546,9 +574,10 @@ struct sPAPREventLogEntry { QTAILQ_ENTRY(sPAPREventLogEntry) next; }; -void spapr_events_init(sPAPREnvironment *spapr); +void spapr_events_init(sPAPRMachineState *sm); void spapr_events_fdt_skel(void *fdt, uint32_t epow_irq); -int spapr_h_cas_compose_response(target_ulong addr, target_ulong size); +int spapr_h_cas_compose_response(sPAPRMachineState *sm, + target_ulong addr, target_ulong size); sPAPRTCETable *spapr_tce_new_table(DeviceState *owner, uint32_t liobn, uint64_t bus_offset, uint32_t page_shift, @@ -578,4 +607,6 @@ void spapr_ccs_reset_hook(void *opaque); void spapr_rtc_read(DeviceState *dev, struct tm *tm, uint32_t *ns); int spapr_rtc_import_offset(DeviceState *dev, int64_t legacy_offset); +#define SPAPR_MEMORY_BLOCK_SIZE (1 << 28) /* 256MB */ + #endif /* !defined (__HW_SPAPR_H__) */ diff --git a/include/hw/ppc/spapr_vio.h b/include/hw/ppc/spapr_vio.h index f95016a92e..2299a5405a 100644 --- a/include/hw/ppc/spapr_vio.h +++ b/include/hw/ppc/spapr_vio.h @@ -88,6 +88,8 @@ extern int spapr_vio_signal(VIOsPAPRDevice *dev, target_ulong mode); static inline qemu_irq spapr_vio_qirq(VIOsPAPRDevice *dev) { + sPAPRMachineState *spapr = SPAPR_MACHINE(qdev_get_machine()); + return xics_get_qirq(spapr->icp, dev->irq); } @@ -126,7 +128,7 @@ static inline int spapr_vio_dma_set(VIOsPAPRDevice *dev, uint64_t taddr, int spapr_vio_send_crq(VIOsPAPRDevice *dev, uint8_t *crq); -VIOsPAPRDevice *vty_lookup(sPAPREnvironment *spapr, target_ulong reg); +VIOsPAPRDevice *vty_lookup(sPAPRMachineState *spapr, target_ulong reg); void vty_putchars(VIOsPAPRDevice *sdev, uint8_t *buf, int len); void spapr_vty_create(VIOsPAPRBus *bus, CharDriverState *chardev); void spapr_vlan_create(VIOsPAPRBus *bus, NICInfo *nd); diff --git a/include/hw/ppc/xics.h b/include/hw/ppc/xics.h index a214dd7f28..355a96623c 100644 --- a/include/hw/ppc/xics.h +++ b/include/hw/ppc/xics.h @@ -109,6 +109,7 @@ struct ICPState { uint8_t pending_priority; uint8_t mfrr; qemu_irq output; + bool cap_irq_xics_enabled; }; #define TYPE_ICS "ics" diff --git a/include/hw/sysbus.h b/include/hw/sysbus.h index 34f93c39bf..cc1dba49bf 100644 --- a/include/hw/sysbus.h +++ b/include/hw/sysbus.h @@ -58,6 +58,7 @@ typedef struct SysBusDeviceClass { * omitted then. (This is not considered a fatal error.) */ char *(*explicit_ofw_unit_address)(const SysBusDevice *dev); + void (*connect_irq_notifier)(SysBusDevice *dev, qemu_irq irq); } SysBusDeviceClass; struct SysBusDevice { diff --git a/include/hw/vfio/vfio-platform.h b/include/hw/vfio/vfio-platform.h index 26b2ad6f4e..c5cf1d79f3 100644 --- a/include/hw/vfio/vfio-platform.h +++ b/include/hw/vfio/vfio-platform.h @@ -41,6 +41,7 @@ typedef struct VFIOINTp { int state; /* inactive, pending, active */ uint8_t pin; /* index */ uint32_t flags; /* IRQ info flags */ + bool kvm_accel; /* set when QEMU bypass through KVM enabled */ } VFIOINTp; /* function type for user side eventfd handler */ @@ -57,6 +58,7 @@ typedef struct VFIOPlatformDevice { uint32_t mmap_timeout; /* delay to re-enable mmaps after interrupt */ QEMUTimer *mmap_timer; /* allows fast-path resume after IRQ hit */ QemuMutex intp_mutex; /* protect the intp_list IRQ state */ + bool irqfd_allowed; /* debug option to force irqfd on/off */ } VFIOPlatformDevice; typedef struct VFIOPlatformDeviceClass { diff --git a/include/hw/virtio/virtio-gpu.h b/include/hw/virtio/virtio-gpu.h index b8c9244b21..889676147a 100644 --- a/include/hw/virtio/virtio-gpu.h +++ b/include/hw/virtio/virtio-gpu.h @@ -112,9 +112,6 @@ extern const GraphicHwOps virtio_gpu_ops; VIRTIO_PCI_FLAG_USE_IOEVENTFD_BIT, false), \ DEFINE_PROP_UINT32("vectors", _state, nvectors, 3) -#define DEFINE_VIRTIO_GPU_PROPERTIES(_state, _conf_field) \ - DEFINE_PROP_UINT32("max_outputs", _state, _conf_field.max_outputs, 1) - #define VIRTIO_GPU_FILL_CMD(out) do { \ size_t s; \ s = iov_to_buf(cmd->elem.out_sg, cmd->elem.out_num, 0, \ diff --git a/include/hw/virtio/virtio-net.h b/include/hw/virtio/virtio-net.h index 280dacfbe9..60b11d5c2c 100644 --- a/include/hw/virtio/virtio-net.h +++ b/include/hw/virtio/virtio-net.h @@ -21,9 +21,6 @@ #define VIRTIO_NET(obj) \ OBJECT_CHECK(VirtIONet, (obj), TYPE_VIRTIO_NET) -#define VIRTIO_NET_F_CTRL_GUEST_OFFLOADS 2 /* Control channel offload - * configuration support */ - #define TX_TIMER_INTERVAL 150000 /* 150 us */ /* Limit the number of packets that can be sent via a single flush @@ -100,15 +97,6 @@ typedef struct VirtIONet { int announce_counter; } VirtIONet; -/* - * Control network offloads - * - * Dynamic offloads are available with the - * VIRTIO_NET_F_CTRL_GUEST_OFFLOADS feature bit. - */ -#define VIRTIO_NET_CTRL_GUEST_OFFLOADS 5 -#define VIRTIO_NET_CTRL_GUEST_OFFLOADS_SET 0 - void virtio_net_set_netclient_name(VirtIONet *n, const char *name, const char *type); diff --git a/include/hw/xen/xen_common.h b/include/hw/xen/xen_common.h index 38f29fb098..ed5fd3e1a2 100644 --- a/include/hw/xen/xen_common.h +++ b/include/hw/xen/xen_common.h @@ -195,7 +195,7 @@ static inline int xen_get_vmport_regs_pfn(XenXC xc, domid_t dom, #define IOREQ_TYPE_PCI_CONFIG 2 -typedef uint32_t ioservid_t; +typedef uint16_t ioservid_t; static inline void xen_map_memory_section(XenXC xc, domid_t dom, ioservid_t ioservid, diff --git a/include/migration/migration.h b/include/migration/migration.h index 9387c8c9d4..b2711ef305 100644 --- a/include/migration/migration.h +++ b/include/migration/migration.h @@ -34,6 +34,7 @@ #define QEMU_VM_SECTION_FULL 0x04 #define QEMU_VM_SUBSECTION 0x05 #define QEMU_VM_VMDESCRIPTION 0x06 +#define QEMU_VM_CONFIGURATION 0x07 #define QEMU_VM_SECTION_FOOTER 0x7e struct MigrationParams { @@ -176,10 +177,11 @@ bool migrate_use_compression(void); int migrate_compress_level(void); int migrate_compress_threads(void); int migrate_decompress_threads(void); +bool migrate_use_events(void); void ram_control_before_iterate(QEMUFile *f, uint64_t flags); void ram_control_after_iterate(QEMUFile *f, uint64_t flags); -void ram_control_load_hook(QEMUFile *f, uint64_t flags); +void ram_control_load_hook(QEMUFile *f, uint64_t flags, void *data); /* Whenever this is found in the data stream, the flags * will be passed to ram_control_load_hook in the incoming-migration @@ -197,4 +199,7 @@ size_t ram_control_save_page(QEMUFile *f, ram_addr_t block_offset, void ram_mig_init(void); void savevm_skip_section_footers(void); +void register_global_state(void); +void global_state_set_optional(void); +void savevm_skip_configuration(void); #endif diff --git a/include/migration/qemu-file.h b/include/migration/qemu-file.h index 4f67d79227..ea49f33fac 100644 --- a/include/migration/qemu-file.h +++ b/include/migration/qemu-file.h @@ -63,16 +63,20 @@ typedef ssize_t (QEMUFileWritevBufferFunc)(void *opaque, struct iovec *iov, /* * This function provides hooks around different * stages of RAM migration. + * 'opaque' is the backend specific data in QEMUFile + * 'data' is call specific data associated with the 'flags' value */ -typedef int (QEMURamHookFunc)(QEMUFile *f, void *opaque, uint64_t flags); +typedef int (QEMURamHookFunc)(QEMUFile *f, void *opaque, uint64_t flags, + void *data); /* * Constants used by ram_control_* hooks */ -#define RAM_CONTROL_SETUP 0 -#define RAM_CONTROL_ROUND 1 -#define RAM_CONTROL_HOOK 2 -#define RAM_CONTROL_FINISH 3 +#define RAM_CONTROL_SETUP 0 +#define RAM_CONTROL_ROUND 1 +#define RAM_CONTROL_HOOK 2 +#define RAM_CONTROL_FINISH 3 +#define RAM_CONTROL_BLOCK_REG 4 /* * This function allows override of where the RAM page diff --git a/include/migration/vmstate.h b/include/migration/vmstate.h index 0695d7c3de..f51ff693e9 100644 --- a/include/migration/vmstate.h +++ b/include/migration/vmstate.h @@ -820,6 +820,8 @@ int vmstate_load_state(QEMUFile *f, const VMStateDescription *vmsd, void vmstate_save_state(QEMUFile *f, const VMStateDescription *vmsd, void *opaque, QJSON *vmdesc); +bool vmstate_save_needed(const VMStateDescription *vmsd, void *opaque); + int vmstate_register_with_alias_id(DeviceState *dev, int instance_id, const VMStateDescription *vmsd, void *base, int alias_id, diff --git a/include/qom/cpu.h b/include/qom/cpu.h index 39f0f19fb0..42f42f5a2d 100644 --- a/include/qom/cpu.h +++ b/include/qom/cpu.h @@ -323,6 +323,8 @@ extern struct CPUTailQ cpus; #define CPU_FOREACH(cpu) QTAILQ_FOREACH(cpu, &cpus, node) #define CPU_FOREACH_SAFE(cpu, next_cpu) \ QTAILQ_FOREACH_SAFE(cpu, &cpus, node, next_cpu) +#define CPU_FOREACH_REVERSE(cpu) \ + QTAILQ_FOREACH_REVERSE(cpu, &cpus, CPUTailQ, node) #define first_cpu QTAILQ_FIRST(&cpus) DECLARE_TLS(CPUState *, current_cpu); diff --git a/include/standard-headers/linux/pci_regs.h b/include/standard-headers/linux/pci_regs.h new file mode 100644 index 0000000000..57e8c80c30 --- /dev/null +++ b/include/standard-headers/linux/pci_regs.h @@ -0,0 +1,719 @@ +/* + * pci_regs.h + * + * PCI standard defines + * Copyright 1994, Drew Eckhardt + * Copyright 1997--1999 Martin Mares <mj@ucw.cz> + * + * For more information, please consult the following manuals (look at + * http://www.pcisig.com/ for how to get them): + * + * PCI BIOS Specification + * PCI Local Bus Specification + * PCI to PCI Bridge Specification + * PCI System Design Guide + * + * For hypertransport information, please consult the following manuals + * from http://www.hypertransport.org + * + * The Hypertransport I/O Link Specification + */ + +#ifndef LINUX_PCI_REGS_H +#define LINUX_PCI_REGS_H + +/* + * Under PCI, each device has 256 bytes of configuration address space, + * of which the first 64 bytes are standardized as follows: + */ +#define PCI_VENDOR_ID 0x00 /* 16 bits */ +#define PCI_DEVICE_ID 0x02 /* 16 bits */ +#define PCI_COMMAND 0x04 /* 16 bits */ +#define PCI_COMMAND_IO 0x1 /* Enable response in I/O space */ +#define PCI_COMMAND_MEMORY 0x2 /* Enable response in Memory space */ +#define PCI_COMMAND_MASTER 0x4 /* Enable bus mastering */ +#define PCI_COMMAND_SPECIAL 0x8 /* Enable response to special cycles */ +#define PCI_COMMAND_INVALIDATE 0x10 /* Use memory write and invalidate */ +#define PCI_COMMAND_VGA_PALETTE 0x20 /* Enable palette snooping */ +#define PCI_COMMAND_PARITY 0x40 /* Enable parity checking */ +#define PCI_COMMAND_WAIT 0x80 /* Enable address/data stepping */ +#define PCI_COMMAND_SERR 0x100 /* Enable SERR */ +#define PCI_COMMAND_FAST_BACK 0x200 /* Enable back-to-back writes */ +#define PCI_COMMAND_INTX_DISABLE 0x400 /* INTx Emulation Disable */ + +#define PCI_STATUS 0x06 /* 16 bits */ +#define PCI_STATUS_INTERRUPT 0x08 /* Interrupt status */ +#define PCI_STATUS_CAP_LIST 0x10 /* Support Capability List */ +#define PCI_STATUS_66MHZ 0x20 /* Support 66 Mhz PCI 2.1 bus */ +#define PCI_STATUS_UDF 0x40 /* Support User Definable Features [obsolete] */ +#define PCI_STATUS_FAST_BACK 0x80 /* Accept fast-back to back */ +#define PCI_STATUS_PARITY 0x100 /* Detected parity error */ +#define PCI_STATUS_DEVSEL_MASK 0x600 /* DEVSEL timing */ +#define PCI_STATUS_DEVSEL_FAST 0x000 +#define PCI_STATUS_DEVSEL_MEDIUM 0x200 +#define PCI_STATUS_DEVSEL_SLOW 0x400 +#define PCI_STATUS_SIG_TARGET_ABORT 0x800 /* Set on target abort */ +#define PCI_STATUS_REC_TARGET_ABORT 0x1000 /* Master ack of " */ +#define PCI_STATUS_REC_MASTER_ABORT 0x2000 /* Set on master abort */ +#define PCI_STATUS_SIG_SYSTEM_ERROR 0x4000 /* Set when we drive SERR */ +#define PCI_STATUS_DETECTED_PARITY 0x8000 /* Set on parity error */ + +#define PCI_CLASS_REVISION 0x08 /* High 24 bits are class, low 8 revision */ +#define PCI_REVISION_ID 0x08 /* Revision ID */ +#define PCI_CLASS_PROG 0x09 /* Reg. Level Programming Interface */ +#define PCI_CLASS_DEVICE 0x0a /* Device class */ + +#define PCI_CACHE_LINE_SIZE 0x0c /* 8 bits */ +#define PCI_LATENCY_TIMER 0x0d /* 8 bits */ +#define PCI_HEADER_TYPE 0x0e /* 8 bits */ +#define PCI_HEADER_TYPE_NORMAL 0 +#define PCI_HEADER_TYPE_BRIDGE 1 +#define PCI_HEADER_TYPE_CARDBUS 2 + +#define PCI_BIST 0x0f /* 8 bits */ +#define PCI_BIST_CODE_MASK 0x0f /* Return result */ +#define PCI_BIST_START 0x40 /* 1 to start BIST, 2 secs or less */ +#define PCI_BIST_CAPABLE 0x80 /* 1 if BIST capable */ + +/* + * Base addresses specify locations in memory or I/O space. + * Decoded size can be determined by writing a value of + * 0xffffffff to the register, and reading it back. Only + * 1 bits are decoded. + */ +#define PCI_BASE_ADDRESS_0 0x10 /* 32 bits */ +#define PCI_BASE_ADDRESS_1 0x14 /* 32 bits [htype 0,1 only] */ +#define PCI_BASE_ADDRESS_2 0x18 /* 32 bits [htype 0 only] */ +#define PCI_BASE_ADDRESS_3 0x1c /* 32 bits */ +#define PCI_BASE_ADDRESS_4 0x20 /* 32 bits */ +#define PCI_BASE_ADDRESS_5 0x24 /* 32 bits */ +#define PCI_BASE_ADDRESS_SPACE 0x01 /* 0 = memory, 1 = I/O */ +#define PCI_BASE_ADDRESS_SPACE_IO 0x01 +#define PCI_BASE_ADDRESS_SPACE_MEMORY 0x00 +#define PCI_BASE_ADDRESS_MEM_TYPE_MASK 0x06 +#define PCI_BASE_ADDRESS_MEM_TYPE_32 0x00 /* 32 bit address */ +#define PCI_BASE_ADDRESS_MEM_TYPE_1M 0x02 /* Below 1M [obsolete] */ +#define PCI_BASE_ADDRESS_MEM_TYPE_64 0x04 /* 64 bit address */ +#define PCI_BASE_ADDRESS_MEM_PREFETCH 0x08 /* prefetchable? */ +#define PCI_BASE_ADDRESS_MEM_MASK (~0x0fUL) +#define PCI_BASE_ADDRESS_IO_MASK (~0x03UL) +/* bit 1 is reserved if address_space = 1 */ + +/* Header type 0 (normal devices) */ +#define PCI_CARDBUS_CIS 0x28 +#define PCI_SUBSYSTEM_VENDOR_ID 0x2c +#define PCI_SUBSYSTEM_ID 0x2e +#define PCI_ROM_ADDRESS 0x30 /* Bits 31..11 are address, 10..1 reserved */ +#define PCI_ROM_ADDRESS_ENABLE 0x01 +#define PCI_ROM_ADDRESS_MASK (~0x7ffUL) + +#define PCI_CAPABILITY_LIST 0x34 /* Offset of first capability list entry */ + +/* 0x35-0x3b are reserved */ +#define PCI_INTERRUPT_LINE 0x3c /* 8 bits */ +#define PCI_INTERRUPT_PIN 0x3d /* 8 bits */ +#define PCI_MIN_GNT 0x3e /* 8 bits */ +#define PCI_MAX_LAT 0x3f /* 8 bits */ + +/* Header type 1 (PCI-to-PCI bridges) */ +#define PCI_PRIMARY_BUS 0x18 /* Primary bus number */ +#define PCI_SECONDARY_BUS 0x19 /* Secondary bus number */ +#define PCI_SUBORDINATE_BUS 0x1a /* Highest bus number behind the bridge */ +#define PCI_SEC_LATENCY_TIMER 0x1b /* Latency timer for secondary interface */ +#define PCI_IO_BASE 0x1c /* I/O range behind the bridge */ +#define PCI_IO_LIMIT 0x1d +#define PCI_IO_RANGE_TYPE_MASK 0x0fUL /* I/O bridging type */ +#define PCI_IO_RANGE_TYPE_16 0x00 +#define PCI_IO_RANGE_TYPE_32 0x01 +#define PCI_IO_RANGE_MASK (~0x0fUL) +#define PCI_SEC_STATUS 0x1e /* Secondary status register, only bit 14 used */ +#define PCI_MEMORY_BASE 0x20 /* Memory range behind */ +#define PCI_MEMORY_LIMIT 0x22 +#define PCI_MEMORY_RANGE_TYPE_MASK 0x0fUL +#define PCI_MEMORY_RANGE_MASK (~0x0fUL) +#define PCI_PREF_MEMORY_BASE 0x24 /* Prefetchable memory range behind */ +#define PCI_PREF_MEMORY_LIMIT 0x26 +#define PCI_PREF_RANGE_TYPE_MASK 0x0fUL +#define PCI_PREF_RANGE_TYPE_32 0x00 +#define PCI_PREF_RANGE_TYPE_64 0x01 +#define PCI_PREF_RANGE_MASK (~0x0fUL) +#define PCI_PREF_BASE_UPPER32 0x28 /* Upper half of prefetchable memory range */ +#define PCI_PREF_LIMIT_UPPER32 0x2c +#define PCI_IO_BASE_UPPER16 0x30 /* Upper half of I/O addresses */ +#define PCI_IO_LIMIT_UPPER16 0x32 +/* 0x34 same as for htype 0 */ +/* 0x35-0x3b is reserved */ +#define PCI_ROM_ADDRESS1 0x38 /* Same as PCI_ROM_ADDRESS, but for htype 1 */ +/* 0x3c-0x3d are same as for htype 0 */ +#define PCI_BRIDGE_CONTROL 0x3e +#define PCI_BRIDGE_CTL_PARITY 0x01 /* Enable parity detection on secondary interface */ +#define PCI_BRIDGE_CTL_SERR 0x02 /* The same for SERR forwarding */ +#define PCI_BRIDGE_CTL_ISA 0x04 /* Enable ISA mode */ +#define PCI_BRIDGE_CTL_VGA 0x08 /* Forward VGA addresses */ +#define PCI_BRIDGE_CTL_MASTER_ABORT 0x20 /* Report master aborts */ +#define PCI_BRIDGE_CTL_BUS_RESET 0x40 /* Secondary bus reset */ +#define PCI_BRIDGE_CTL_FAST_BACK 0x80 /* Fast Back2Back enabled on secondary interface */ + +/* Header type 2 (CardBus bridges) */ +#define PCI_CB_CAPABILITY_LIST 0x14 +/* 0x15 reserved */ +#define PCI_CB_SEC_STATUS 0x16 /* Secondary status */ +#define PCI_CB_PRIMARY_BUS 0x18 /* PCI bus number */ +#define PCI_CB_CARD_BUS 0x19 /* CardBus bus number */ +#define PCI_CB_SUBORDINATE_BUS 0x1a /* Subordinate bus number */ +#define PCI_CB_LATENCY_TIMER 0x1b /* CardBus latency timer */ +#define PCI_CB_MEMORY_BASE_0 0x1c +#define PCI_CB_MEMORY_LIMIT_0 0x20 +#define PCI_CB_MEMORY_BASE_1 0x24 +#define PCI_CB_MEMORY_LIMIT_1 0x28 +#define PCI_CB_IO_BASE_0 0x2c +#define PCI_CB_IO_BASE_0_HI 0x2e +#define PCI_CB_IO_LIMIT_0 0x30 +#define PCI_CB_IO_LIMIT_0_HI 0x32 +#define PCI_CB_IO_BASE_1 0x34 +#define PCI_CB_IO_BASE_1_HI 0x36 +#define PCI_CB_IO_LIMIT_1 0x38 +#define PCI_CB_IO_LIMIT_1_HI 0x3a +#define PCI_CB_IO_RANGE_MASK (~0x03UL) +/* 0x3c-0x3d are same as for htype 0 */ +#define PCI_CB_BRIDGE_CONTROL 0x3e +#define PCI_CB_BRIDGE_CTL_PARITY 0x01 /* Similar to standard bridge control register */ +#define PCI_CB_BRIDGE_CTL_SERR 0x02 +#define PCI_CB_BRIDGE_CTL_ISA 0x04 +#define PCI_CB_BRIDGE_CTL_VGA 0x08 +#define PCI_CB_BRIDGE_CTL_MASTER_ABORT 0x20 +#define PCI_CB_BRIDGE_CTL_CB_RESET 0x40 /* CardBus reset */ +#define PCI_CB_BRIDGE_CTL_16BIT_INT 0x80 /* Enable interrupt for 16-bit cards */ +#define PCI_CB_BRIDGE_CTL_PREFETCH_MEM0 0x100 /* Prefetch enable for both memory regions */ +#define PCI_CB_BRIDGE_CTL_PREFETCH_MEM1 0x200 +#define PCI_CB_BRIDGE_CTL_POST_WRITES 0x400 +#define PCI_CB_SUBSYSTEM_VENDOR_ID 0x40 +#define PCI_CB_SUBSYSTEM_ID 0x42 +#define PCI_CB_LEGACY_MODE_BASE 0x44 /* 16-bit PC Card legacy mode base address (ExCa) */ +/* 0x48-0x7f reserved */ + +/* Capability lists */ + +#define PCI_CAP_LIST_ID 0 /* Capability ID */ +#define PCI_CAP_ID_PM 0x01 /* Power Management */ +#define PCI_CAP_ID_AGP 0x02 /* Accelerated Graphics Port */ +#define PCI_CAP_ID_VPD 0x03 /* Vital Product Data */ +#define PCI_CAP_ID_SLOTID 0x04 /* Slot Identification */ +#define PCI_CAP_ID_MSI 0x05 /* Message Signalled Interrupts */ +#define PCI_CAP_ID_CHSWP 0x06 /* CompactPCI HotSwap */ +#define PCI_CAP_ID_PCIX 0x07 /* PCI-X */ +#define PCI_CAP_ID_HT 0x08 /* HyperTransport */ +#define PCI_CAP_ID_VNDR 0x09 /* Vendor specific */ +#define PCI_CAP_ID_DBG 0x0A /* Debug port */ +#define PCI_CAP_ID_CCRC 0x0B /* CompactPCI Central Resource Control */ +#define PCI_CAP_ID_SHPC 0x0C /* PCI Standard Hot-Plug Controller */ +#define PCI_CAP_ID_SSVID 0x0D /* Bridge subsystem vendor/device ID */ +#define PCI_CAP_ID_AGP3 0x0E /* AGP Target PCI-PCI bridge */ +#define PCI_CAP_ID_EXP 0x10 /* PCI Express */ +#define PCI_CAP_ID_MSIX 0x11 /* MSI-X */ +#define PCI_CAP_ID_SATA 0x12 /* Serial ATA */ +#define PCI_CAP_ID_AF 0x13 /* PCI Advanced Features */ +#define PCI_CAP_LIST_NEXT 1 /* Next capability in the list */ +#define PCI_CAP_FLAGS 2 /* Capability defined flags (16 bits) */ +#define PCI_CAP_SIZEOF 4 + +/* Power Management Registers */ + +#define PCI_PM_PMC 2 /* PM Capabilities Register */ +#define PCI_PM_CAP_VER_MASK 0x0007 /* Version */ +#define PCI_PM_CAP_PME_CLOCK 0x0008 /* PME clock required */ +#define PCI_PM_CAP_RESERVED 0x0010 /* Reserved field */ +#define PCI_PM_CAP_DSI 0x0020 /* Device specific initialization */ +#define PCI_PM_CAP_AUX_POWER 0x01C0 /* Auxiliary power support mask */ +#define PCI_PM_CAP_D1 0x0200 /* D1 power state support */ +#define PCI_PM_CAP_D2 0x0400 /* D2 power state support */ +#define PCI_PM_CAP_PME 0x0800 /* PME pin supported */ +#define PCI_PM_CAP_PME_MASK 0xF800 /* PME Mask of all supported states */ +#define PCI_PM_CAP_PME_D0 0x0800 /* PME# from D0 */ +#define PCI_PM_CAP_PME_D1 0x1000 /* PME# from D1 */ +#define PCI_PM_CAP_PME_D2 0x2000 /* PME# from D2 */ +#define PCI_PM_CAP_PME_D3 0x4000 /* PME# from D3 (hot) */ +#define PCI_PM_CAP_PME_D3cold 0x8000 /* PME# from D3 (cold) */ +#define PCI_PM_CAP_PME_SHIFT 11 /* Start of the PME Mask in PMC */ +#define PCI_PM_CTRL 4 /* PM control and status register */ +#define PCI_PM_CTRL_STATE_MASK 0x0003 /* Current power state (D0 to D3) */ +#define PCI_PM_CTRL_NO_SOFT_RESET 0x0008 /* No reset for D3hot->D0 */ +#define PCI_PM_CTRL_PME_ENABLE 0x0100 /* PME pin enable */ +#define PCI_PM_CTRL_DATA_SEL_MASK 0x1e00 /* Data select (??) */ +#define PCI_PM_CTRL_DATA_SCALE_MASK 0x6000 /* Data scale (??) */ +#define PCI_PM_CTRL_PME_STATUS 0x8000 /* PME pin status */ +#define PCI_PM_PPB_EXTENSIONS 6 /* PPB support extensions (??) */ +#define PCI_PM_PPB_B2_B3 0x40 /* Stop clock when in D3hot (??) */ +#define PCI_PM_BPCC_ENABLE 0x80 /* Bus power/clock control enable (??) */ +#define PCI_PM_DATA_REGISTER 7 /* (??) */ +#define PCI_PM_SIZEOF 8 + +/* AGP registers */ + +#define PCI_AGP_VERSION 2 /* BCD version number */ +#define PCI_AGP_RFU 3 /* Rest of capability flags */ +#define PCI_AGP_STATUS 4 /* Status register */ +#define PCI_AGP_STATUS_RQ_MASK 0xff000000 /* Maximum number of requests - 1 */ +#define PCI_AGP_STATUS_SBA 0x0200 /* Sideband addressing supported */ +#define PCI_AGP_STATUS_64BIT 0x0020 /* 64-bit addressing supported */ +#define PCI_AGP_STATUS_FW 0x0010 /* FW transfers supported */ +#define PCI_AGP_STATUS_RATE4 0x0004 /* 4x transfer rate supported */ +#define PCI_AGP_STATUS_RATE2 0x0002 /* 2x transfer rate supported */ +#define PCI_AGP_STATUS_RATE1 0x0001 /* 1x transfer rate supported */ +#define PCI_AGP_COMMAND 8 /* Control register */ +#define PCI_AGP_COMMAND_RQ_MASK 0xff000000 /* Master: Maximum number of requests */ +#define PCI_AGP_COMMAND_SBA 0x0200 /* Sideband addressing enabled */ +#define PCI_AGP_COMMAND_AGP 0x0100 /* Allow processing of AGP transactions */ +#define PCI_AGP_COMMAND_64BIT 0x0020 /* Allow processing of 64-bit addresses */ +#define PCI_AGP_COMMAND_FW 0x0010 /* Force FW transfers */ +#define PCI_AGP_COMMAND_RATE4 0x0004 /* Use 4x rate */ +#define PCI_AGP_COMMAND_RATE2 0x0002 /* Use 2x rate */ +#define PCI_AGP_COMMAND_RATE1 0x0001 /* Use 1x rate */ +#define PCI_AGP_SIZEOF 12 + +/* Vital Product Data */ + +#define PCI_VPD_ADDR 2 /* Address to access (15 bits!) */ +#define PCI_VPD_ADDR_MASK 0x7fff /* Address mask */ +#define PCI_VPD_ADDR_F 0x8000 /* Write 0, 1 indicates completion */ +#define PCI_VPD_DATA 4 /* 32-bits of data returned here */ + +/* Slot Identification */ + +#define PCI_SID_ESR 2 /* Expansion Slot Register */ +#define PCI_SID_ESR_NSLOTS 0x1f /* Number of expansion slots available */ +#define PCI_SID_ESR_FIC 0x20 /* First In Chassis Flag */ +#define PCI_SID_CHASSIS_NR 3 /* Chassis Number */ + +/* Message Signalled Interrupts registers */ + +#define PCI_MSI_FLAGS 2 /* Various flags */ +#define PCI_MSI_FLAGS_64BIT 0x80 /* 64-bit addresses allowed */ +#define PCI_MSI_FLAGS_QSIZE 0x70 /* Message queue size configured */ +#define PCI_MSI_FLAGS_QMASK 0x0e /* Maximum queue size available */ +#define PCI_MSI_FLAGS_ENABLE 0x01 /* MSI feature enabled */ +#define PCI_MSI_FLAGS_MASKBIT 0x100 /* 64-bit mask bits allowed */ +#define PCI_MSI_RFU 3 /* Rest of capability flags */ +#define PCI_MSI_ADDRESS_LO 4 /* Lower 32 bits */ +#define PCI_MSI_ADDRESS_HI 8 /* Upper 32 bits (if PCI_MSI_FLAGS_64BIT set) */ +#define PCI_MSI_DATA_32 8 /* 16 bits of data for 32-bit devices */ +#define PCI_MSI_MASK_32 12 /* Mask bits register for 32-bit devices */ +#define PCI_MSI_PENDING_32 16 /* Pending bits register for 32-bit devices */ +#define PCI_MSI_DATA_64 12 /* 16 bits of data for 64-bit devices */ +#define PCI_MSI_MASK_64 16 /* Mask bits register for 64-bit devices */ +#define PCI_MSI_PENDING_64 20 /* Pending bits register for 32-bit devices */ + +/* MSI-X registers */ +#define PCI_MSIX_FLAGS 2 +#define PCI_MSIX_FLAGS_QSIZE 0x7FF +#define PCI_MSIX_FLAGS_ENABLE (1 << 15) +#define PCI_MSIX_FLAGS_MASKALL (1 << 14) +#define PCI_MSIX_TABLE 4 +#define PCI_MSIX_PBA 8 +#define PCI_MSIX_FLAGS_BIRMASK (7 << 0) + +/* MSI-X entry's format */ +#define PCI_MSIX_ENTRY_SIZE 16 +#define PCI_MSIX_ENTRY_LOWER_ADDR 0 +#define PCI_MSIX_ENTRY_UPPER_ADDR 4 +#define PCI_MSIX_ENTRY_DATA 8 +#define PCI_MSIX_ENTRY_VECTOR_CTRL 12 +#define PCI_MSIX_ENTRY_CTRL_MASKBIT 1 + +/* CompactPCI Hotswap Register */ + +#define PCI_CHSWP_CSR 2 /* Control and Status Register */ +#define PCI_CHSWP_DHA 0x01 /* Device Hiding Arm */ +#define PCI_CHSWP_EIM 0x02 /* ENUM# Signal Mask */ +#define PCI_CHSWP_PIE 0x04 /* Pending Insert or Extract */ +#define PCI_CHSWP_LOO 0x08 /* LED On / Off */ +#define PCI_CHSWP_PI 0x30 /* Programming Interface */ +#define PCI_CHSWP_EXT 0x40 /* ENUM# status - extraction */ +#define PCI_CHSWP_INS 0x80 /* ENUM# status - insertion */ + +/* PCI Advanced Feature registers */ + +#define PCI_AF_LENGTH 2 +#define PCI_AF_CAP 3 +#define PCI_AF_CAP_TP 0x01 +#define PCI_AF_CAP_FLR 0x02 +#define PCI_AF_CTRL 4 +#define PCI_AF_CTRL_FLR 0x01 +#define PCI_AF_STATUS 5 +#define PCI_AF_STATUS_TP 0x01 + +/* PCI-X registers */ + +#define PCI_X_CMD 2 /* Modes & Features */ +#define PCI_X_CMD_DPERR_E 0x0001 /* Data Parity Error Recovery Enable */ +#define PCI_X_CMD_ERO 0x0002 /* Enable Relaxed Ordering */ +#define PCI_X_CMD_READ_512 0x0000 /* 512 byte maximum read byte count */ +#define PCI_X_CMD_READ_1K 0x0004 /* 1Kbyte maximum read byte count */ +#define PCI_X_CMD_READ_2K 0x0008 /* 2Kbyte maximum read byte count */ +#define PCI_X_CMD_READ_4K 0x000c /* 4Kbyte maximum read byte count */ +#define PCI_X_CMD_MAX_READ 0x000c /* Max Memory Read Byte Count */ + /* Max # of outstanding split transactions */ +#define PCI_X_CMD_SPLIT_1 0x0000 /* Max 1 */ +#define PCI_X_CMD_SPLIT_2 0x0010 /* Max 2 */ +#define PCI_X_CMD_SPLIT_3 0x0020 /* Max 3 */ +#define PCI_X_CMD_SPLIT_4 0x0030 /* Max 4 */ +#define PCI_X_CMD_SPLIT_8 0x0040 /* Max 8 */ +#define PCI_X_CMD_SPLIT_12 0x0050 /* Max 12 */ +#define PCI_X_CMD_SPLIT_16 0x0060 /* Max 16 */ +#define PCI_X_CMD_SPLIT_32 0x0070 /* Max 32 */ +#define PCI_X_CMD_MAX_SPLIT 0x0070 /* Max Outstanding Split Transactions */ +#define PCI_X_CMD_VERSION(x) (((x) >> 12) & 3) /* Version */ +#define PCI_X_STATUS 4 /* PCI-X capabilities */ +#define PCI_X_STATUS_DEVFN 0x000000ff /* A copy of devfn */ +#define PCI_X_STATUS_BUS 0x0000ff00 /* A copy of bus nr */ +#define PCI_X_STATUS_64BIT 0x00010000 /* 64-bit device */ +#define PCI_X_STATUS_133MHZ 0x00020000 /* 133 MHz capable */ +#define PCI_X_STATUS_SPL_DISC 0x00040000 /* Split Completion Discarded */ +#define PCI_X_STATUS_UNX_SPL 0x00080000 /* Unexpected Split Completion */ +#define PCI_X_STATUS_COMPLEX 0x00100000 /* Device Complexity */ +#define PCI_X_STATUS_MAX_READ 0x00600000 /* Designed Max Memory Read Count */ +#define PCI_X_STATUS_MAX_SPLIT 0x03800000 /* Designed Max Outstanding Split Transactions */ +#define PCI_X_STATUS_MAX_CUM 0x1c000000 /* Designed Max Cumulative Read Size */ +#define PCI_X_STATUS_SPL_ERR 0x20000000 /* Rcvd Split Completion Error Msg */ +#define PCI_X_STATUS_266MHZ 0x40000000 /* 266 MHz capable */ +#define PCI_X_STATUS_533MHZ 0x80000000 /* 533 MHz capable */ + +/* PCI Bridge Subsystem ID registers */ + +#define PCI_SSVID_VENDOR_ID 4 /* PCI-Bridge subsystem vendor id register */ +#define PCI_SSVID_DEVICE_ID 6 /* PCI-Bridge subsystem device id register */ + +/* PCI Express capability registers */ + +#define PCI_EXP_FLAGS 2 /* Capabilities register */ +#define PCI_EXP_FLAGS_VERS 0x000f /* Capability version */ +#define PCI_EXP_FLAGS_TYPE 0x00f0 /* Device/Port type */ +#define PCI_EXP_TYPE_ENDPOINT 0x0 /* Express Endpoint */ +#define PCI_EXP_TYPE_LEG_END 0x1 /* Legacy Endpoint */ +#define PCI_EXP_TYPE_ROOT_PORT 0x4 /* Root Port */ +#define PCI_EXP_TYPE_UPSTREAM 0x5 /* Upstream Port */ +#define PCI_EXP_TYPE_DOWNSTREAM 0x6 /* Downstream Port */ +#define PCI_EXP_TYPE_PCI_BRIDGE 0x7 /* PCI/PCI-X Bridge */ +#define PCI_EXP_TYPE_PCIE_BRIDGE 0x8 /* PCI/PCI-X to PCIE Bridge */ +#define PCI_EXP_TYPE_RC_END 0x9 /* Root Complex Integrated Endpoint */ +#define PCI_EXP_TYPE_RC_EC 0xa /* Root Complex Event Collector */ +#define PCI_EXP_FLAGS_SLOT 0x0100 /* Slot implemented */ +#define PCI_EXP_FLAGS_IRQ 0x3e00 /* Interrupt message number */ +#define PCI_EXP_DEVCAP 4 /* Device capabilities */ +#define PCI_EXP_DEVCAP_PAYLOAD 0x07 /* Max_Payload_Size */ +#define PCI_EXP_DEVCAP_PHANTOM 0x18 /* Phantom functions */ +#define PCI_EXP_DEVCAP_EXT_TAG 0x20 /* Extended tags */ +#define PCI_EXP_DEVCAP_L0S 0x1c0 /* L0s Acceptable Latency */ +#define PCI_EXP_DEVCAP_L1 0xe00 /* L1 Acceptable Latency */ +#define PCI_EXP_DEVCAP_ATN_BUT 0x1000 /* Attention Button Present */ +#define PCI_EXP_DEVCAP_ATN_IND 0x2000 /* Attention Indicator Present */ +#define PCI_EXP_DEVCAP_PWR_IND 0x4000 /* Power Indicator Present */ +#define PCI_EXP_DEVCAP_RBER 0x8000 /* Role-Based Error Reporting */ +#define PCI_EXP_DEVCAP_PWR_VAL 0x3fc0000 /* Slot Power Limit Value */ +#define PCI_EXP_DEVCAP_PWR_SCL 0xc000000 /* Slot Power Limit Scale */ +#define PCI_EXP_DEVCAP_FLR 0x10000000 /* Function Level Reset */ +#define PCI_EXP_DEVCTL 8 /* Device Control */ +#define PCI_EXP_DEVCTL_CERE 0x0001 /* Correctable Error Reporting En. */ +#define PCI_EXP_DEVCTL_NFERE 0x0002 /* Non-Fatal Error Reporting Enable */ +#define PCI_EXP_DEVCTL_FERE 0x0004 /* Fatal Error Reporting Enable */ +#define PCI_EXP_DEVCTL_URRE 0x0008 /* Unsupported Request Reporting En. */ +#define PCI_EXP_DEVCTL_RELAX_EN 0x0010 /* Enable relaxed ordering */ +#define PCI_EXP_DEVCTL_PAYLOAD 0x00e0 /* Max_Payload_Size */ +#define PCI_EXP_DEVCTL_EXT_TAG 0x0100 /* Extended Tag Field Enable */ +#define PCI_EXP_DEVCTL_PHANTOM 0x0200 /* Phantom Functions Enable */ +#define PCI_EXP_DEVCTL_AUX_PME 0x0400 /* Auxiliary Power PM Enable */ +#define PCI_EXP_DEVCTL_NOSNOOP_EN 0x0800 /* Enable No Snoop */ +#define PCI_EXP_DEVCTL_READRQ 0x7000 /* Max_Read_Request_Size */ +#define PCI_EXP_DEVCTL_BCR_FLR 0x8000 /* Bridge Configuration Retry / FLR */ +#define PCI_EXP_DEVSTA 10 /* Device Status */ +#define PCI_EXP_DEVSTA_CED 0x01 /* Correctable Error Detected */ +#define PCI_EXP_DEVSTA_NFED 0x02 /* Non-Fatal Error Detected */ +#define PCI_EXP_DEVSTA_FED 0x04 /* Fatal Error Detected */ +#define PCI_EXP_DEVSTA_URD 0x08 /* Unsupported Request Detected */ +#define PCI_EXP_DEVSTA_AUXPD 0x10 /* AUX Power Detected */ +#define PCI_EXP_DEVSTA_TRPND 0x20 /* Transactions Pending */ +#define PCI_EXP_LNKCAP 12 /* Link Capabilities */ +#define PCI_EXP_LNKCAP_SLS 0x0000000f /* Supported Link Speeds */ +#define PCI_EXP_LNKCAP_MLW 0x000003f0 /* Maximum Link Width */ +#define PCI_EXP_LNKCAP_ASPMS 0x00000c00 /* ASPM Support */ +#define PCI_EXP_LNKCAP_L0SEL 0x00007000 /* L0s Exit Latency */ +#define PCI_EXP_LNKCAP_L1EL 0x00038000 /* L1 Exit Latency */ +#define PCI_EXP_LNKCAP_CLKPM 0x00040000 /* L1 Clock Power Management */ +#define PCI_EXP_LNKCAP_SDERC 0x00080000 /* Surprise Down Error Reporting Capable */ +#define PCI_EXP_LNKCAP_DLLLARC 0x00100000 /* Data Link Layer Link Active Reporting Capable */ +#define PCI_EXP_LNKCAP_LBNC 0x00200000 /* Link Bandwidth Notification Capability */ +#define PCI_EXP_LNKCAP_PN 0xff000000 /* Port Number */ +#define PCI_EXP_LNKCTL 16 /* Link Control */ +#define PCI_EXP_LNKCTL_ASPMC 0x0003 /* ASPM Control */ +#define PCI_EXP_LNKCTL_RCB 0x0008 /* Read Completion Boundary */ +#define PCI_EXP_LNKCTL_LD 0x0010 /* Link Disable */ +#define PCI_EXP_LNKCTL_RL 0x0020 /* Retrain Link */ +#define PCI_EXP_LNKCTL_CCC 0x0040 /* Common Clock Configuration */ +#define PCI_EXP_LNKCTL_ES 0x0080 /* Extended Synch */ +#define PCI_EXP_LNKCTL_CLKREQ_EN 0x100 /* Enable clkreq */ +#define PCI_EXP_LNKCTL_HAWD 0x0200 /* Hardware Autonomous Width Disable */ +#define PCI_EXP_LNKCTL_LBMIE 0x0400 /* Link Bandwidth Management Interrupt Enable */ +#define PCI_EXP_LNKCTL_LABIE 0x0800 /* Lnk Autonomous Bandwidth Interrupt Enable */ +#define PCI_EXP_LNKSTA 18 /* Link Status */ +#define PCI_EXP_LNKSTA_CLS 0x000f /* Current Link Speed */ +#define PCI_EXP_LNKSTA_CLS_2_5GB 0x01 /* Current Link Speed 2.5GT/s */ +#define PCI_EXP_LNKSTA_CLS_5_0GB 0x02 /* Current Link Speed 5.0GT/s */ +#define PCI_EXP_LNKSTA_NLW 0x03f0 /* Nogotiated Link Width */ +#define PCI_EXP_LNKSTA_NLW_SHIFT 4 /* start of NLW mask in link status */ +#define PCI_EXP_LNKSTA_LT 0x0800 /* Link Training */ +#define PCI_EXP_LNKSTA_SLC 0x1000 /* Slot Clock Configuration */ +#define PCI_EXP_LNKSTA_DLLLA 0x2000 /* Data Link Layer Link Active */ +#define PCI_EXP_LNKSTA_LBMS 0x4000 /* Link Bandwidth Management Status */ +#define PCI_EXP_LNKSTA_LABS 0x8000 /* Link Autonomous Bandwidth Status */ +#define PCI_EXP_SLTCAP 20 /* Slot Capabilities */ +#define PCI_EXP_SLTCAP_ABP 0x00000001 /* Attention Button Present */ +#define PCI_EXP_SLTCAP_PCP 0x00000002 /* Power Controller Present */ +#define PCI_EXP_SLTCAP_MRLSP 0x00000004 /* MRL Sensor Present */ +#define PCI_EXP_SLTCAP_AIP 0x00000008 /* Attention Indicator Present */ +#define PCI_EXP_SLTCAP_PIP 0x00000010 /* Power Indicator Present */ +#define PCI_EXP_SLTCAP_HPS 0x00000020 /* Hot-Plug Surprise */ +#define PCI_EXP_SLTCAP_HPC 0x00000040 /* Hot-Plug Capable */ +#define PCI_EXP_SLTCAP_SPLV 0x00007f80 /* Slot Power Limit Value */ +#define PCI_EXP_SLTCAP_SPLS 0x00018000 /* Slot Power Limit Scale */ +#define PCI_EXP_SLTCAP_EIP 0x00020000 /* Electromechanical Interlock Present */ +#define PCI_EXP_SLTCAP_NCCS 0x00040000 /* No Command Completed Support */ +#define PCI_EXP_SLTCAP_PSN 0xfff80000 /* Physical Slot Number */ +#define PCI_EXP_SLTCTL 24 /* Slot Control */ +#define PCI_EXP_SLTCTL_ABPE 0x0001 /* Attention Button Pressed Enable */ +#define PCI_EXP_SLTCTL_PFDE 0x0002 /* Power Fault Detected Enable */ +#define PCI_EXP_SLTCTL_MRLSCE 0x0004 /* MRL Sensor Changed Enable */ +#define PCI_EXP_SLTCTL_PDCE 0x0008 /* Presence Detect Changed Enable */ +#define PCI_EXP_SLTCTL_CCIE 0x0010 /* Command Completed Interrupt Enable */ +#define PCI_EXP_SLTCTL_HPIE 0x0020 /* Hot-Plug Interrupt Enable */ +#define PCI_EXP_SLTCTL_AIC 0x00c0 /* Attention Indicator Control */ +#define PCI_EXP_SLTCTL_PIC 0x0300 /* Power Indicator Control */ +#define PCI_EXP_SLTCTL_PCC 0x0400 /* Power Controller Control */ +#define PCI_EXP_SLTCTL_EIC 0x0800 /* Electromechanical Interlock Control */ +#define PCI_EXP_SLTCTL_DLLSCE 0x1000 /* Data Link Layer State Changed Enable */ +#define PCI_EXP_SLTSTA 26 /* Slot Status */ +#define PCI_EXP_SLTSTA_ABP 0x0001 /* Attention Button Pressed */ +#define PCI_EXP_SLTSTA_PFD 0x0002 /* Power Fault Detected */ +#define PCI_EXP_SLTSTA_MRLSC 0x0004 /* MRL Sensor Changed */ +#define PCI_EXP_SLTSTA_PDC 0x0008 /* Presence Detect Changed */ +#define PCI_EXP_SLTSTA_CC 0x0010 /* Command Completed */ +#define PCI_EXP_SLTSTA_MRLSS 0x0020 /* MRL Sensor State */ +#define PCI_EXP_SLTSTA_PDS 0x0040 /* Presence Detect State */ +#define PCI_EXP_SLTSTA_EIS 0x0080 /* Electromechanical Interlock Status */ +#define PCI_EXP_SLTSTA_DLLSC 0x0100 /* Data Link Layer State Changed */ +#define PCI_EXP_RTCTL 28 /* Root Control */ +#define PCI_EXP_RTCTL_SECEE 0x01 /* System Error on Correctable Error */ +#define PCI_EXP_RTCTL_SENFEE 0x02 /* System Error on Non-Fatal Error */ +#define PCI_EXP_RTCTL_SEFEE 0x04 /* System Error on Fatal Error */ +#define PCI_EXP_RTCTL_PMEIE 0x08 /* PME Interrupt Enable */ +#define PCI_EXP_RTCTL_CRSSVE 0x10 /* CRS Software Visibility Enable */ +#define PCI_EXP_RTCAP 30 /* Root Capabilities */ +#define PCI_EXP_RTSTA 32 /* Root Status */ +#define PCI_EXP_RTSTA_PME 0x10000 /* PME status */ +#define PCI_EXP_RTSTA_PENDING 0x20000 /* PME pending */ +#define PCI_EXP_DEVCAP2 36 /* Device Capabilities 2 */ +#define PCI_EXP_DEVCAP2_ARI 0x20 /* Alternative Routing-ID */ +#define PCI_EXP_DEVCAP2_LTR 0x800 /* Latency tolerance reporting */ +#define PCI_EXP_OBFF_MASK 0xc0000 /* OBFF support mechanism */ +#define PCI_EXP_OBFF_MSG 0x40000 /* New message signaling */ +#define PCI_EXP_OBFF_WAKE 0x80000 /* Re-use WAKE# for OBFF */ +#define PCI_EXP_DEVCTL2 40 /* Device Control 2 */ +#define PCI_EXP_DEVCTL2_ARI 0x20 /* Alternative Routing-ID */ +#define PCI_EXP_IDO_REQ_EN 0x100 /* ID-based ordering request enable */ +#define PCI_EXP_IDO_CMP_EN 0x200 /* ID-based ordering completion enable */ +#define PCI_EXP_LTR_EN 0x400 /* Latency tolerance reporting */ +#define PCI_EXP_OBFF_MSGA_EN 0x2000 /* OBFF enable with Message type A */ +#define PCI_EXP_OBFF_MSGB_EN 0x4000 /* OBFF enable with Message type B */ +#define PCI_EXP_OBFF_WAKE_EN 0x6000 /* OBFF using WAKE# signaling */ +#define PCI_EXP_LNKCTL2 48 /* Link Control 2 */ +#define PCI_EXP_SLTCTL2 56 /* Slot Control 2 */ + +/* Extended Capabilities (PCI-X 2.0 and Express) */ +#define PCI_EXT_CAP_ID(header) (header & 0x0000ffff) +#define PCI_EXT_CAP_VER(header) ((header >> 16) & 0xf) +#define PCI_EXT_CAP_NEXT(header) ((header >> 20) & 0xffc) + +#define PCI_EXT_CAP_ID_ERR 1 +#define PCI_EXT_CAP_ID_VC 2 +#define PCI_EXT_CAP_ID_DSN 3 +#define PCI_EXT_CAP_ID_PWR 4 +#define PCI_EXT_CAP_ID_VNDR 11 +#define PCI_EXT_CAP_ID_ACS 13 +#define PCI_EXT_CAP_ID_ARI 14 +#define PCI_EXT_CAP_ID_ATS 15 +#define PCI_EXT_CAP_ID_SRIOV 16 +#define PCI_EXT_CAP_ID_LTR 24 + +/* Advanced Error Reporting */ +#define PCI_ERR_UNCOR_STATUS 4 /* Uncorrectable Error Status */ +#define PCI_ERR_UNC_TRAIN 0x00000001 /* Training */ +#define PCI_ERR_UNC_DLP 0x00000010 /* Data Link Protocol */ +#define PCI_ERR_UNC_POISON_TLP 0x00001000 /* Poisoned TLP */ +#define PCI_ERR_UNC_FCP 0x00002000 /* Flow Control Protocol */ +#define PCI_ERR_UNC_COMP_TIME 0x00004000 /* Completion Timeout */ +#define PCI_ERR_UNC_COMP_ABORT 0x00008000 /* Completer Abort */ +#define PCI_ERR_UNC_UNX_COMP 0x00010000 /* Unexpected Completion */ +#define PCI_ERR_UNC_RX_OVER 0x00020000 /* Receiver Overflow */ +#define PCI_ERR_UNC_MALF_TLP 0x00040000 /* Malformed TLP */ +#define PCI_ERR_UNC_ECRC 0x00080000 /* ECRC Error Status */ +#define PCI_ERR_UNC_UNSUP 0x00100000 /* Unsupported Request */ +#define PCI_ERR_UNCOR_MASK 8 /* Uncorrectable Error Mask */ + /* Same bits as above */ +#define PCI_ERR_UNCOR_SEVER 12 /* Uncorrectable Error Severity */ + /* Same bits as above */ +#define PCI_ERR_COR_STATUS 16 /* Correctable Error Status */ +#define PCI_ERR_COR_RCVR 0x00000001 /* Receiver Error Status */ +#define PCI_ERR_COR_BAD_TLP 0x00000040 /* Bad TLP Status */ +#define PCI_ERR_COR_BAD_DLLP 0x00000080 /* Bad DLLP Status */ +#define PCI_ERR_COR_REP_ROLL 0x00000100 /* REPLAY_NUM Rollover */ +#define PCI_ERR_COR_REP_TIMER 0x00001000 /* Replay Timer Timeout */ +#define PCI_ERR_COR_MASK 20 /* Correctable Error Mask */ + /* Same bits as above */ +#define PCI_ERR_CAP 24 /* Advanced Error Capabilities */ +#define PCI_ERR_CAP_FEP(x) ((x) & 31) /* First Error Pointer */ +#define PCI_ERR_CAP_ECRC_GENC 0x00000020 /* ECRC Generation Capable */ +#define PCI_ERR_CAP_ECRC_GENE 0x00000040 /* ECRC Generation Enable */ +#define PCI_ERR_CAP_ECRC_CHKC 0x00000080 /* ECRC Check Capable */ +#define PCI_ERR_CAP_ECRC_CHKE 0x00000100 /* ECRC Check Enable */ +#define PCI_ERR_HEADER_LOG 28 /* Header Log Register (16 bytes) */ +#define PCI_ERR_ROOT_COMMAND 44 /* Root Error Command */ +/* Correctable Err Reporting Enable */ +#define PCI_ERR_ROOT_CMD_COR_EN 0x00000001 +/* Non-fatal Err Reporting Enable */ +#define PCI_ERR_ROOT_CMD_NONFATAL_EN 0x00000002 +/* Fatal Err Reporting Enable */ +#define PCI_ERR_ROOT_CMD_FATAL_EN 0x00000004 +#define PCI_ERR_ROOT_STATUS 48 +#define PCI_ERR_ROOT_COR_RCV 0x00000001 /* ERR_COR Received */ +/* Multi ERR_COR Received */ +#define PCI_ERR_ROOT_MULTI_COR_RCV 0x00000002 +/* ERR_FATAL/NONFATAL Recevied */ +#define PCI_ERR_ROOT_UNCOR_RCV 0x00000004 +/* Multi ERR_FATAL/NONFATAL Recevied */ +#define PCI_ERR_ROOT_MULTI_UNCOR_RCV 0x00000008 +#define PCI_ERR_ROOT_FIRST_FATAL 0x00000010 /* First Fatal */ +#define PCI_ERR_ROOT_NONFATAL_RCV 0x00000020 /* Non-Fatal Received */ +#define PCI_ERR_ROOT_FATAL_RCV 0x00000040 /* Fatal Received */ +#define PCI_ERR_ROOT_ERR_SRC 52 /* Error Source Identification */ + +/* Virtual Channel */ +#define PCI_VC_PORT_REG1 4 +#define PCI_VC_PORT_REG2 8 +#define PCI_VC_PORT_CTRL 12 +#define PCI_VC_PORT_STATUS 14 +#define PCI_VC_RES_CAP 16 +#define PCI_VC_RES_CTRL 20 +#define PCI_VC_RES_STATUS 26 + +/* Power Budgeting */ +#define PCI_PWR_DSR 4 /* Data Select Register */ +#define PCI_PWR_DATA 8 /* Data Register */ +#define PCI_PWR_DATA_BASE(x) ((x) & 0xff) /* Base Power */ +#define PCI_PWR_DATA_SCALE(x) (((x) >> 8) & 3) /* Data Scale */ +#define PCI_PWR_DATA_PM_SUB(x) (((x) >> 10) & 7) /* PM Sub State */ +#define PCI_PWR_DATA_PM_STATE(x) (((x) >> 13) & 3) /* PM State */ +#define PCI_PWR_DATA_TYPE(x) (((x) >> 15) & 7) /* Type */ +#define PCI_PWR_DATA_RAIL(x) (((x) >> 18) & 7) /* Power Rail */ +#define PCI_PWR_CAP 12 /* Capability */ +#define PCI_PWR_CAP_BUDGET(x) ((x) & 1) /* Included in system budget */ + +/* + * Hypertransport sub capability types + * + * Unfortunately there are both 3 bit and 5 bit capability types defined + * in the HT spec, catering for that is a little messy. You probably don't + * want to use these directly, just use pci_find_ht_capability() and it + * will do the right thing for you. + */ +#define HT_3BIT_CAP_MASK 0xE0 +#define HT_CAPTYPE_SLAVE 0x00 /* Slave/Primary link configuration */ +#define HT_CAPTYPE_HOST 0x20 /* Host/Secondary link configuration */ + +#define HT_5BIT_CAP_MASK 0xF8 +#define HT_CAPTYPE_IRQ 0x80 /* IRQ Configuration */ +#define HT_CAPTYPE_REMAPPING_40 0xA0 /* 40 bit address remapping */ +#define HT_CAPTYPE_REMAPPING_64 0xA2 /* 64 bit address remapping */ +#define HT_CAPTYPE_UNITID_CLUMP 0x90 /* Unit ID clumping */ +#define HT_CAPTYPE_EXTCONF 0x98 /* Extended Configuration Space Access */ +#define HT_CAPTYPE_MSI_MAPPING 0xA8 /* MSI Mapping Capability */ +#define HT_MSI_FLAGS 0x02 /* Offset to flags */ +#define HT_MSI_FLAGS_ENABLE 0x1 /* Mapping enable */ +#define HT_MSI_FLAGS_FIXED 0x2 /* Fixed mapping only */ +#define HT_MSI_FIXED_ADDR 0x00000000FEE00000ULL /* Fixed addr */ +#define HT_MSI_ADDR_LO 0x04 /* Offset to low addr bits */ +#define HT_MSI_ADDR_LO_MASK 0xFFF00000 /* Low address bit mask */ +#define HT_MSI_ADDR_HI 0x08 /* Offset to high addr bits */ +#define HT_CAPTYPE_DIRECT_ROUTE 0xB0 /* Direct routing configuration */ +#define HT_CAPTYPE_VCSET 0xB8 /* Virtual Channel configuration */ +#define HT_CAPTYPE_ERROR_RETRY 0xC0 /* Retry on error configuration */ +#define HT_CAPTYPE_GEN3 0xD0 /* Generation 3 hypertransport configuration */ +#define HT_CAPTYPE_PM 0xE0 /* Hypertransport powermanagement configuration */ + +/* Alternative Routing-ID Interpretation */ +#define PCI_ARI_CAP 0x04 /* ARI Capability Register */ +#define PCI_ARI_CAP_MFVC 0x0001 /* MFVC Function Groups Capability */ +#define PCI_ARI_CAP_ACS 0x0002 /* ACS Function Groups Capability */ +#define PCI_ARI_CAP_NFN(x) (((x) >> 8) & 0xff) /* Next Function Number */ +#define PCI_ARI_CTRL 0x06 /* ARI Control Register */ +#define PCI_ARI_CTRL_MFVC 0x0001 /* MFVC Function Groups Enable */ +#define PCI_ARI_CTRL_ACS 0x0002 /* ACS Function Groups Enable */ +#define PCI_ARI_CTRL_FG(x) (((x) >> 4) & 7) /* Function Group */ + +/* Address Translation Service */ +#define PCI_ATS_CAP 0x04 /* ATS Capability Register */ +#define PCI_ATS_CAP_QDEP(x) ((x) & 0x1f) /* Invalidate Queue Depth */ +#define PCI_ATS_MAX_QDEP 32 /* Max Invalidate Queue Depth */ +#define PCI_ATS_CTRL 0x06 /* ATS Control Register */ +#define PCI_ATS_CTRL_ENABLE 0x8000 /* ATS Enable */ +#define PCI_ATS_CTRL_STU(x) ((x) & 0x1f) /* Smallest Translation Unit */ +#define PCI_ATS_MIN_STU 12 /* shift of minimum STU block */ + +/* Single Root I/O Virtualization */ +#define PCI_SRIOV_CAP 0x04 /* SR-IOV Capabilities */ +#define PCI_SRIOV_CAP_VFM 0x01 /* VF Migration Capable */ +#define PCI_SRIOV_CAP_INTR(x) ((x) >> 21) /* Interrupt Message Number */ +#define PCI_SRIOV_CTRL 0x08 /* SR-IOV Control */ +#define PCI_SRIOV_CTRL_VFE 0x01 /* VF Enable */ +#define PCI_SRIOV_CTRL_VFM 0x02 /* VF Migration Enable */ +#define PCI_SRIOV_CTRL_INTR 0x04 /* VF Migration Interrupt Enable */ +#define PCI_SRIOV_CTRL_MSE 0x08 /* VF Memory Space Enable */ +#define PCI_SRIOV_CTRL_ARI 0x10 /* ARI Capable Hierarchy */ +#define PCI_SRIOV_STATUS 0x0a /* SR-IOV Status */ +#define PCI_SRIOV_STATUS_VFM 0x01 /* VF Migration Status */ +#define PCI_SRIOV_INITIAL_VF 0x0c /* Initial VFs */ +#define PCI_SRIOV_TOTAL_VF 0x0e /* Total VFs */ +#define PCI_SRIOV_NUM_VF 0x10 /* Number of VFs */ +#define PCI_SRIOV_FUNC_LINK 0x12 /* Function Dependency Link */ +#define PCI_SRIOV_VF_OFFSET 0x14 /* First VF Offset */ +#define PCI_SRIOV_VF_STRIDE 0x16 /* Following VF Stride */ +#define PCI_SRIOV_VF_DID 0x1a /* VF Device ID */ +#define PCI_SRIOV_SUP_PGSIZE 0x1c /* Supported Page Sizes */ +#define PCI_SRIOV_SYS_PGSIZE 0x20 /* System Page Size */ +#define PCI_SRIOV_BAR 0x24 /* VF BAR0 */ +#define PCI_SRIOV_NUM_BARS 6 /* Number of VF BARs */ +#define PCI_SRIOV_VFM 0x3c /* VF Migration State Array Offset*/ +#define PCI_SRIOV_VFM_BIR(x) ((x) & 7) /* State BIR */ +#define PCI_SRIOV_VFM_OFFSET(x) ((x) & ~7) /* State Offset */ +#define PCI_SRIOV_VFM_UA 0x0 /* Inactive.Unavailable */ +#define PCI_SRIOV_VFM_MI 0x1 /* Dormant.MigrateIn */ +#define PCI_SRIOV_VFM_MO 0x2 /* Active.MigrateOut */ +#define PCI_SRIOV_VFM_AV 0x3 /* Active.Available */ + +#define PCI_LTR_MAX_SNOOP_LAT 0x4 +#define PCI_LTR_MAX_NOSNOOP_LAT 0x6 +#define PCI_LTR_VALUE_MASK 0x000003ff +#define PCI_LTR_SCALE_MASK 0x00001c00 +#define PCI_LTR_SCALE_SHIFT 10 + +/* Access Control Service */ +#define PCI_ACS_CAP 0x04 /* ACS Capability Register */ +#define PCI_ACS_SV 0x01 /* Source Validation */ +#define PCI_ACS_TB 0x02 /* Translation Blocking */ +#define PCI_ACS_RR 0x04 /* P2P Request Redirect */ +#define PCI_ACS_CR 0x08 /* P2P Completion Redirect */ +#define PCI_ACS_UF 0x10 /* Upstream Forwarding */ +#define PCI_ACS_EC 0x20 /* P2P Egress Control */ +#define PCI_ACS_DT 0x40 /* Direct Translated P2P */ +#define PCI_ACS_CTRL 0x06 /* ACS Control Register */ +#define PCI_ACS_EGRESS_CTL_V 0x08 /* ACS Egress Control Vector */ + +#endif /* LINUX_PCI_REGS_H */ diff --git a/include/standard-headers/linux/virtio_net.h b/include/standard-headers/linux/virtio_net.h index 3209c90219..a78f33e775 100644 --- a/include/standard-headers/linux/virtio_net.h +++ b/include/standard-headers/linux/virtio_net.h @@ -34,6 +34,7 @@ /* The feature bitmap for virtio net */ #define VIRTIO_NET_F_CSUM 0 /* Host handles pkts w/ partial csum */ #define VIRTIO_NET_F_GUEST_CSUM 1 /* Guest handles pkts w/ partial csum */ +#define VIRTIO_NET_F_CTRL_GUEST_OFFLOADS 2 /* Dynamic offload configuration. */ #define VIRTIO_NET_F_MAC 5 /* Host has given MAC address. */ #define VIRTIO_NET_F_GUEST_TSO4 7 /* Guest can handle TSOv4 in. */ #define VIRTIO_NET_F_GUEST_TSO6 8 /* Guest can handle TSOv6 in. */ @@ -226,4 +227,19 @@ struct virtio_net_ctrl_mq { #define VIRTIO_NET_CTRL_MQ_VQ_PAIRS_MIN 1 #define VIRTIO_NET_CTRL_MQ_VQ_PAIRS_MAX 0x8000 +/* + * Control network offloads + * + * Reconfigures the network offloads that Guest can handle. + * + * Available with the VIRTIO_NET_F_CTRL_GUEST_OFFLOADS feature bit. + * + * Command data format matches the feature bit mask exactly. + * + * See VIRTIO_NET_F_GUEST_* for the list of offloads + * that can be enabled/disabled. + */ +#define VIRTIO_NET_CTRL_GUEST_OFFLOADS 5 +#define VIRTIO_NET_CTRL_GUEST_OFFLOADS_SET 0 + #endif /* _LINUX_VIRTIO_NET_H */ diff --git a/include/standard-headers/linux/virtio_pci.h b/include/standard-headers/linux/virtio_pci.h index ecdc133d59..9262acd130 100644 --- a/include/standard-headers/linux/virtio_pci.h +++ b/include/standard-headers/linux/virtio_pci.h @@ -157,6 +157,12 @@ struct virtio_pci_common_cfg { uint32_t queue_used_hi; /* read-write */ }; +/* Fields in VIRTIO_PCI_CAP_PCI_CFG: */ +struct virtio_pci_cfg_cap { + struct virtio_pci_cap cap; + uint8_t pci_cfg_data[4]; /* Data for BAR access. */ +}; + /* Macro versions of offsets for the Old Timers! */ #define VIRTIO_PCI_CAP_VNDR 0 #define VIRTIO_PCI_CAP_NEXT 1 diff --git a/include/sysemu/kvm.h b/include/sysemu/kvm.h index f459fbdbd4..983e99e1e7 100644 --- a/include/sysemu/kvm.h +++ b/include/sysemu/kvm.h @@ -19,6 +19,7 @@ #include "qemu/queue.h" #include "qom/cpu.h" #include "exec/memattrs.h" +#include "hw/irq.h" #ifdef CONFIG_KVM #include <linux/kvm.h> @@ -151,6 +152,7 @@ extern bool kvm_readonly_mem_allowed; #define kvm_halt_in_kernel() (false) #define kvm_eventfds_enabled() (false) #define kvm_irqfds_enabled() (false) +#define kvm_resamplefds_enabled() (false) #define kvm_msi_via_irqfd_enabled() (false) #define kvm_gsi_routing_allowed() (false) #define kvm_gsi_direct_mapping() (false) @@ -416,9 +418,15 @@ void kvm_irqchip_release_virq(KVMState *s, int virq); int kvm_irqchip_add_adapter_route(KVMState *s, AdapterInfo *adapter); +int kvm_irqchip_add_irqfd_notifier_gsi(KVMState *s, EventNotifier *n, + EventNotifier *rn, int virq); +int kvm_irqchip_remove_irqfd_notifier_gsi(KVMState *s, EventNotifier *n, + int virq); int kvm_irqchip_add_irqfd_notifier(KVMState *s, EventNotifier *n, - EventNotifier *rn, int virq); -int kvm_irqchip_remove_irqfd_notifier(KVMState *s, EventNotifier *n, int virq); + EventNotifier *rn, qemu_irq irq); +int kvm_irqchip_remove_irqfd_notifier(KVMState *s, EventNotifier *n, + qemu_irq irq); +void kvm_irqchip_set_qemuirq_gsi(KVMState *s, qemu_irq irq, int gsi); void kvm_pc_gsi_handler(void *opaque, int n, int level); void kvm_pc_setup_irq_routing(bool pci_enabled); void kvm_init_irq_routing(KVMState *s); diff --git a/include/sysemu/sysemu.h b/include/sysemu/sysemu.h index df809518b4..44570d17e6 100644 --- a/include/sysemu/sysemu.h +++ b/include/sysemu/sysemu.h @@ -28,6 +28,7 @@ bool runstate_check(RunState state); void runstate_set(RunState new_state); int runstate_is_running(void); bool runstate_needs_reset(void); +bool runstate_store(char *str, size_t size); typedef struct vm_change_state_entry VMChangeStateEntry; typedef void VMChangeStateHandler(void *opaque, int running, RunState state); @@ -35,6 +35,7 @@ #include "exec/address-spaces.h" #include "qemu/event_notifier.h" #include "trace.h" +#include "hw/irq.h" #include "hw/boards.h" @@ -84,6 +85,7 @@ struct KVMState * unsigned, and treating them as signed here can break things */ unsigned irq_set_ioctl; unsigned int sigmask_len; + GHashTable *gsimap; #ifdef KVM_CAP_IRQ_ROUTING struct kvm_irq_routing *irq_routes; int nr_allocated_irq_routes; @@ -1332,19 +1334,49 @@ int kvm_irqchip_update_msi_route(KVMState *s, int virq, MSIMessage msg) } #endif /* !KVM_CAP_IRQ_ROUTING */ -int kvm_irqchip_add_irqfd_notifier(KVMState *s, EventNotifier *n, - EventNotifier *rn, int virq) +int kvm_irqchip_add_irqfd_notifier_gsi(KVMState *s, EventNotifier *n, + EventNotifier *rn, int virq) { return kvm_irqchip_assign_irqfd(s, event_notifier_get_fd(n), rn ? event_notifier_get_fd(rn) : -1, virq, true); } -int kvm_irqchip_remove_irqfd_notifier(KVMState *s, EventNotifier *n, int virq) +int kvm_irqchip_remove_irqfd_notifier_gsi(KVMState *s, EventNotifier *n, + int virq) { return kvm_irqchip_assign_irqfd(s, event_notifier_get_fd(n), -1, virq, false); } +int kvm_irqchip_add_irqfd_notifier(KVMState *s, EventNotifier *n, + EventNotifier *rn, qemu_irq irq) +{ + gpointer key, gsi; + gboolean found = g_hash_table_lookup_extended(s->gsimap, irq, &key, &gsi); + + if (!found) { + return -ENXIO; + } + return kvm_irqchip_add_irqfd_notifier_gsi(s, n, rn, GPOINTER_TO_INT(gsi)); +} + +int kvm_irqchip_remove_irqfd_notifier(KVMState *s, EventNotifier *n, + qemu_irq irq) +{ + gpointer key, gsi; + gboolean found = g_hash_table_lookup_extended(s->gsimap, irq, &key, &gsi); + + if (!found) { + return -ENXIO; + } + return kvm_irqchip_remove_irqfd_notifier_gsi(s, n, GPOINTER_TO_INT(gsi)); +} + +void kvm_irqchip_set_qemuirq_gsi(KVMState *s, qemu_irq irq, int gsi) +{ + g_hash_table_insert(s->gsimap, irq, GINT_TO_POINTER(gsi)); +} + static void kvm_irqchip_create(MachineState *machine, KVMState *s) { int ret; @@ -1380,6 +1412,8 @@ static void kvm_irqchip_create(MachineState *machine, KVMState *s) kvm_halt_in_kernel_allowed = true; kvm_init_irq_routing(s); + + s->gsimap = g_hash_table_new(g_direct_hash, g_direct_equal); } /* Find number of supported CPUs using the recommended diff --git a/kvm-stub.c b/kvm-stub.c index 7ba90c546f..d9ad624eee 100644 --- a/kvm-stub.c +++ b/kvm-stub.c @@ -24,6 +24,7 @@ bool kvm_kernel_irqchip; bool kvm_async_interrupts_allowed; bool kvm_eventfds_allowed; bool kvm_irqfds_allowed; +bool kvm_resamplefds_allowed; bool kvm_msi_via_irqfd_allowed; bool kvm_gsi_routing_allowed; bool kvm_gsi_direct_mapping; @@ -137,13 +138,14 @@ int kvm_irqchip_add_adapter_route(KVMState *s, AdapterInfo *adapter) return -ENOSYS; } -int kvm_irqchip_add_irqfd_notifier(KVMState *s, EventNotifier *n, - EventNotifier *rn, int virq) +int kvm_irqchip_add_irqfd_notifier_gsi(KVMState *s, EventNotifier *n, + EventNotifier *rn, int virq) { return -ENOSYS; } -int kvm_irqchip_remove_irqfd_notifier(KVMState *s, EventNotifier *n, int virq) +int kvm_irqchip_remove_irqfd_notifier_gsi(KVMState *s, EventNotifier *n, + int virq) { return -ENOSYS; } diff --git a/linux-user/main.c b/linux-user/main.c index c855bccadc..6c5c2effbf 100644 --- a/linux-user/main.c +++ b/linux-user/main.c @@ -1424,8 +1424,7 @@ void cpu_loop (CPUSPARCState *env) #ifdef TARGET_PPC static inline uint64_t cpu_ppc_get_tb(CPUPPCState *env) { - /* TO FIX */ - return 0; + return cpu_get_real_ticks(); } uint64_t cpu_ppc_load_tbl(CPUPPCState *env) diff --git a/migration/block.c b/migration/block.c index ddb59ccf87..ed865ed23b 100644 --- a/migration/block.c +++ b/migration/block.c @@ -457,7 +457,7 @@ static int mig_save_device_dirty(QEMUFile *f, BlkMigDevState *bmds, blk_mig_lock(); if (bmds_aio_inflight(bmds, sector)) { blk_mig_unlock(); - bdrv_drain_all(); + bdrv_drain(bmds->bs); } else { blk_mig_unlock(); } diff --git a/migration/migration.c b/migration/migration.c index c6ac08a0cb..45719a0f5f 100644 --- a/migration/migration.c +++ b/migration/migration.c @@ -26,6 +26,8 @@ #include "qemu/thread.h" #include "qmp-commands.h" #include "trace.h" +#include "qapi/util.h" +#include "qapi-event.h" #define MAX_THROTTLE (32 << 20) /* Migration speed throttling */ @@ -97,6 +99,120 @@ void migration_incoming_state_destroy(void) mis_current = NULL; } + +typedef struct { + bool optional; + uint32_t size; + uint8_t runstate[100]; +} GlobalState; + +static GlobalState global_state; + +static int global_state_store(void) +{ + if (!runstate_store((char *)global_state.runstate, + sizeof(global_state.runstate))) { + error_report("runstate name too big: %s", global_state.runstate); + trace_migrate_state_too_big(); + return -EINVAL; + } + return 0; +} + +static char *global_state_get_runstate(void) +{ + return (char *)global_state.runstate; +} + +void global_state_set_optional(void) +{ + global_state.optional = true; +} + +static bool global_state_needed(void *opaque) +{ + GlobalState *s = opaque; + char *runstate = (char *)s->runstate; + + /* If it is not optional, it is mandatory */ + + if (s->optional == false) { + return true; + } + + /* If state is running or paused, it is not needed */ + + if (strcmp(runstate, "running") == 0 || + strcmp(runstate, "paused") == 0) { + return false; + } + + /* for any other state it is needed */ + return true; +} + +static int global_state_post_load(void *opaque, int version_id) +{ + GlobalState *s = opaque; + int ret = 0; + char *runstate = (char *)s->runstate; + + trace_migrate_global_state_post_load(runstate); + + if (strcmp(runstate, "running") != 0) { + Error *local_err = NULL; + int r = qapi_enum_parse(RunState_lookup, runstate, RUN_STATE_MAX, + -1, &local_err); + + if (r == -1) { + if (local_err) { + error_report_err(local_err); + } + return -EINVAL; + } + ret = vm_stop_force_state(r); + } + + return ret; +} + +static void global_state_pre_save(void *opaque) +{ + GlobalState *s = opaque; + + trace_migrate_global_state_pre_save((char *)s->runstate); + s->size = strlen((char *)s->runstate) + 1; +} + +static const VMStateDescription vmstate_globalstate = { + .name = "globalstate", + .version_id = 1, + .minimum_version_id = 1, + .post_load = global_state_post_load, + .pre_save = global_state_pre_save, + .needed = global_state_needed, + .fields = (VMStateField[]) { + VMSTATE_UINT32(size, GlobalState), + VMSTATE_BUFFER(runstate, GlobalState), + VMSTATE_END_OF_LIST() + }, +}; + +void register_global_state(void) +{ + /* We would use it independently that we receive it */ + strcpy((char *)&global_state.runstate, ""); + vmstate_register(NULL, 0, &vmstate_globalstate, &global_state); +} + +static void migrate_generate_event(int new_state) +{ + if (migrate_use_events()) { + qapi_event_send_migration(new_state, &error_abort); + trace_migrate_set_state(new_state); + } +} + /* * Called on -incoming with a defer: uri. * The migration can be started later after any parameters have been @@ -114,6 +230,7 @@ void qemu_start_incoming_migration(const char *uri, Error **errp) { const char *p; + qapi_event_send_migration(MIGRATION_STATUS_SETUP, &error_abort); if (!strcmp(uri, "defer")) { deferred_incoming_migration(errp); } else if (strstart(uri, "tcp:", &p)) { @@ -142,7 +259,7 @@ static void process_incoming_migration_co(void *opaque) int ret; migration_incoming_state_new(f); - + migrate_generate_event(MIGRATION_STATUS_ACTIVE); ret = qemu_loadvm_state(f); qemu_fclose(f); @@ -150,10 +267,12 @@ static void process_incoming_migration_co(void *opaque) migration_incoming_state_destroy(); if (ret < 0) { + migrate_generate_event(MIGRATION_STATUS_FAILED); error_report("load of migration failed: %s", strerror(-ret)); migrate_decompress_threads_join(); exit(EXIT_FAILURE); } + migrate_generate_event(MIGRATION_STATUS_COMPLETED); qemu_announce_self(); /* Make sure all file formats flush their mutable metadata */ @@ -164,10 +283,20 @@ static void process_incoming_migration_co(void *opaque) exit(EXIT_FAILURE); } - if (autostart) { + /* runstate == "" means that we haven't received it through the + * wire, so we obey autostart. runstate == runing means that we + * need to run it, we need to make sure that we do it after + * everything else has finished. Every other state change is done + * at the post_load function */ + + if (strcmp(global_state_get_runstate(), "running") == 0) { vm_start(); - } else { - runstate_set(RUN_STATE_PAUSED); + } else if (strcmp(global_state_get_runstate(), "") == 0) { + if (autostart) { + vm_start(); + } else { + runstate_set(RUN_STATE_PAUSED); + } } migrate_decompress_threads_join(); } @@ -392,8 +521,8 @@ void qmp_migrate_set_parameters(bool has_compress_level, static void migrate_set_state(MigrationState *s, int old_state, int new_state) { - if (atomic_cmpxchg(&s->state, old_state, new_state) == new_state) { - trace_migrate_set_state(new_state); + if (atomic_cmpxchg(&s->state, old_state, new_state) == old_state) { + migrate_generate_event(new_state); } } @@ -432,8 +561,7 @@ void migrate_fd_error(MigrationState *s) { trace_migrate_fd_error(); assert(s->file == NULL); - s->state = MIGRATION_STATUS_FAILED; - trace_migrate_set_state(MIGRATION_STATUS_FAILED); + migrate_set_state(s, MIGRATION_STATUS_SETUP, MIGRATION_STATUS_FAILED); notifier_list_notify(&migration_state_notifiers, s); } @@ -517,8 +645,7 @@ static MigrationState *migrate_init(const MigrationParams *params) s->parameters[MIGRATION_PARAMETER_DECOMPRESS_THREADS] = decompress_thread_count; s->bandwidth_limit = bandwidth_limit; - s->state = MIGRATION_STATUS_SETUP; - trace_migrate_set_state(MIGRATION_STATUS_SETUP); + migrate_set_state(s, MIGRATION_STATUS_NONE, MIGRATION_STATUS_SETUP); s->total_time = qemu_clock_get_ms(QEMU_CLOCK_REALTIME); return s; @@ -577,7 +704,6 @@ void qmp_migrate(const char *uri, bool has_blk, bool blk, error_setg(errp, QERR_MIGRATION_ACTIVE); return; } - if (runstate_check(RUN_STATE_INMIGRATE)) { error_setg(errp, "Guest is waiting for an incoming migration"); return; @@ -592,6 +718,12 @@ void qmp_migrate(const char *uri, bool has_blk, bool blk, return; } + /* We are starting a new migration, so we want to start in a clean + state. This change is only needed if previous migration + failed/was cancelled. We don't use migrate_set_state() because + we are setting the initial state, not changing it. */ + s->state = MIGRATION_STATUS_NONE; + s = migrate_init(¶ms); if (strstart(uri, "tcp:", &p)) { @@ -611,7 +743,7 @@ void qmp_migrate(const char *uri, bool has_blk, bool blk, } else { error_setg(errp, QERR_INVALID_PARAMETER_VALUE, "uri", "a valid migration protocol"); - s->state = MIGRATION_STATUS_FAILED; + migrate_set_state(s, MIGRATION_STATUS_SETUP, MIGRATION_STATUS_FAILED); return; } @@ -740,6 +872,15 @@ int migrate_decompress_threads(void) return s->parameters[MIGRATION_PARAMETER_DECOMPRESS_THREADS]; } +bool migrate_use_events(void) +{ + MigrationState *s; + + s = migrate_get_current(); + + return s->enabled_capabilities[MIGRATION_CAPABILITY_EVENTS]; +} + int migrate_use_xbzrle(void) { MigrationState *s; @@ -793,10 +934,13 @@ static void *migration_thread(void *opaque) qemu_system_wakeup_request(QEMU_WAKEUP_REASON_OTHER); old_vm_running = runstate_is_running(); - ret = vm_stop_force_state(RUN_STATE_FINISH_MIGRATE); - if (ret >= 0) { - qemu_file_set_rate_limit(s->file, INT64_MAX); - qemu_savevm_state_complete(s->file); + ret = global_state_store(); + if (!ret) { + ret = vm_stop_force_state(RUN_STATE_FINISH_MIGRATE); + if (ret >= 0) { + qemu_file_set_rate_limit(s->file, INT64_MAX); + qemu_savevm_state_complete(s->file); + } } qemu_mutex_unlock_iothread(); diff --git a/migration/qemu-file.c b/migration/qemu-file.c index 557c1c1a62..6bb3dc15cd 100644 --- a/migration/qemu-file.c +++ b/migration/qemu-file.c @@ -129,7 +129,7 @@ void ram_control_before_iterate(QEMUFile *f, uint64_t flags) int ret = 0; if (f->ops->before_ram_iterate) { - ret = f->ops->before_ram_iterate(f, f->opaque, flags); + ret = f->ops->before_ram_iterate(f, f->opaque, flags, NULL); if (ret < 0) { qemu_file_set_error(f, ret); } @@ -141,24 +141,30 @@ void ram_control_after_iterate(QEMUFile *f, uint64_t flags) int ret = 0; if (f->ops->after_ram_iterate) { - ret = f->ops->after_ram_iterate(f, f->opaque, flags); + ret = f->ops->after_ram_iterate(f, f->opaque, flags, NULL); if (ret < 0) { qemu_file_set_error(f, ret); } } } -void ram_control_load_hook(QEMUFile *f, uint64_t flags) +void ram_control_load_hook(QEMUFile *f, uint64_t flags, void *data) { int ret = -EINVAL; if (f->ops->hook_ram_load) { - ret = f->ops->hook_ram_load(f, f->opaque, flags); + ret = f->ops->hook_ram_load(f, f->opaque, flags, data); if (ret < 0) { qemu_file_set_error(f, ret); } } else { - qemu_file_set_error(f, ret); + /* + * Hook is a hook specifically requested by the source sending a flag + * that expects there to be a hook on the destination. + */ + if (flags == RAM_CONTROL_HOOK) { + qemu_file_set_error(f, ret); + } } } diff --git a/migration/ram.c b/migration/ram.c index 57368e1575..c696814196 100644 --- a/migration/ram.c +++ b/migration/ram.c @@ -222,6 +222,7 @@ static RAMBlock *last_seen_block; static RAMBlock *last_sent_block; static ram_addr_t last_offset; static unsigned long *migration_bitmap; +static QemuMutex migration_bitmap_mutex; static uint64_t migration_dirty_pages; static uint32_t last_version; static bool ram_bulk_stage; @@ -494,6 +495,7 @@ static int save_xbzrle_page(QEMUFile *f, uint8_t **current_data, return 1; } +/* Called with rcu_read_lock() to protect migration_bitmap */ static inline ram_addr_t migration_bitmap_find_and_reset_dirty(MemoryRegion *mr, ram_addr_t start) @@ -502,26 +504,31 @@ ram_addr_t migration_bitmap_find_and_reset_dirty(MemoryRegion *mr, unsigned long nr = base + (start >> TARGET_PAGE_BITS); uint64_t mr_size = TARGET_PAGE_ALIGN(memory_region_size(mr)); unsigned long size = base + (mr_size >> TARGET_PAGE_BITS); + unsigned long *bitmap; unsigned long next; + bitmap = atomic_rcu_read(&migration_bitmap); if (ram_bulk_stage && nr > base) { next = nr + 1; } else { - next = find_next_bit(migration_bitmap, size, nr); + next = find_next_bit(bitmap, size, nr); } if (next < size) { - clear_bit(next, migration_bitmap); + clear_bit(next, bitmap); migration_dirty_pages--; } return (next - base) << TARGET_PAGE_BITS; } +/* Called with rcu_read_lock() to protect migration_bitmap */ static void migration_bitmap_sync_range(ram_addr_t start, ram_addr_t length) { + unsigned long *bitmap; + bitmap = atomic_rcu_read(&migration_bitmap); migration_dirty_pages += - cpu_physical_memory_sync_dirty_bitmap(migration_bitmap, start, length); + cpu_physical_memory_sync_dirty_bitmap(bitmap, start, length); } @@ -563,11 +570,13 @@ static void migration_bitmap_sync(void) trace_migration_bitmap_sync_start(); address_space_sync_dirty_bitmap(&address_space_memory); + qemu_mutex_lock(&migration_bitmap_mutex); rcu_read_lock(); QLIST_FOREACH_RCU(block, &ram_list.blocks, next) { migration_bitmap_sync_range(block->mr->ram_addr, block->used_length); } rcu_read_unlock(); + qemu_mutex_unlock(&migration_bitmap_mutex); trace_migration_bitmap_sync_end(migration_dirty_pages - num_dirty_pages_init); @@ -1017,10 +1026,15 @@ void free_xbzrle_decoded_buf(void) static void migration_end(void) { - if (migration_bitmap) { + /* caller have hold iothread lock or is in a bh, so there is + * no writing race against this migration_bitmap + */ + unsigned long *bitmap = migration_bitmap; + atomic_rcu_set(&migration_bitmap, NULL); + if (bitmap) { memory_global_dirty_log_stop(); - g_free(migration_bitmap); - migration_bitmap = NULL; + synchronize_rcu(); + g_free(bitmap); } XBZRLE_cache_lock(); @@ -1051,6 +1065,30 @@ static void reset_ram_globals(void) #define MAX_WAIT 50 /* ms, half buffered_file limit */ +void migration_bitmap_extend(ram_addr_t old, ram_addr_t new) +{ + /* called in qemu main thread, so there is + * no writing race against this migration_bitmap + */ + if (migration_bitmap) { + unsigned long *old_bitmap = migration_bitmap, *bitmap; + bitmap = bitmap_new(new); + + /* prevent migration_bitmap content from being set bit + * by migration_bitmap_sync_range() at the same time. + * it is safe to migration if migration_bitmap is cleared bit + * at the same time. + */ + qemu_mutex_lock(&migration_bitmap_mutex); + bitmap_copy(bitmap, old_bitmap, old); + bitmap_set(bitmap, old, new - old); + atomic_rcu_set(&migration_bitmap, bitmap); + qemu_mutex_unlock(&migration_bitmap_mutex); + migration_dirty_pages += new - old; + synchronize_rcu(); + g_free(old_bitmap); + } +} /* Each of ram_save_setup, ram_save_iterate and ram_save_complete has * long-running RCU critical section. When rcu-reclaims in the code @@ -1067,6 +1105,7 @@ static int ram_save_setup(QEMUFile *f, void *opaque) dirty_rate_high_cnt = 0; bitmap_sync_count = 0; migration_bitmap_sync_init(); + qemu_mutex_init(&migration_bitmap_mutex); if (migrate_use_xbzrle()) { XBZRLE_cache_lock(); @@ -1477,6 +1516,8 @@ static int ram_load(QEMUFile *f, void *opaque, int version_id) error_report_err(local_err); } } + ram_control_load_hook(f, RAM_CONTROL_BLOCK_REG, + block->idstr); break; } } @@ -1545,7 +1586,7 @@ static int ram_load(QEMUFile *f, void *opaque, int version_id) break; default: if (flags & RAM_SAVE_FLAG_HOOK) { - ram_control_load_hook(f, flags); + ram_control_load_hook(f, RAM_CONTROL_HOOK, NULL); } else { error_report("Unknown combination of migration flags: %#x", flags); diff --git a/migration/rdma.c b/migration/rdma.c index b777273b59..f106b2a818 100644 --- a/migration/rdma.c +++ b/migration/rdma.c @@ -215,17 +215,19 @@ static void network_to_caps(RDMACapabilities *cap) * the information. It's small anyway, so a list is overkill. */ typedef struct RDMALocalBlock { - uint8_t *local_host_addr; /* local virtual address */ - uint64_t remote_host_addr; /* remote virtual address */ - uint64_t offset; - uint64_t length; - struct ibv_mr **pmr; /* MRs for chunk-level registration */ - struct ibv_mr *mr; /* MR for non-chunk-level registration */ - uint32_t *remote_keys; /* rkeys for chunk-level registration */ - uint32_t remote_rkey; /* rkeys for non-chunk-level registration */ - int index; /* which block are we */ - bool is_ram_block; - int nb_chunks; + char *block_name; + uint8_t *local_host_addr; /* local virtual address */ + uint64_t remote_host_addr; /* remote virtual address */ + uint64_t offset; + uint64_t length; + struct ibv_mr **pmr; /* MRs for chunk-level registration */ + struct ibv_mr *mr; /* MR for non-chunk-level registration */ + uint32_t *remote_keys; /* rkeys for chunk-level registration */ + uint32_t remote_rkey; /* rkeys for non-chunk-level registration */ + int index; /* which block are we */ + unsigned int src_index; /* (Only used on dest) */ + bool is_ram_block; + int nb_chunks; unsigned long *transit_bitmap; unsigned long *unregister_bitmap; } RDMALocalBlock; @@ -353,6 +355,9 @@ typedef struct RDMAContext { RDMALocalBlocks local_ram_blocks; RDMADestBlock *dest_blocks; + /* Index of the next RAMBlock received during block registration */ + unsigned int next_src_index; + /* * Migration on *destination* started. * Then use coroutine yield function. @@ -411,7 +416,7 @@ static void network_to_control(RDMAControlHeader *control) */ typedef struct QEMU_PACKED { union QEMU_PACKED { - uint64_t current_addr; /* offset into the ramblock of the chunk */ + uint64_t current_addr; /* offset into the ram_addr_t space */ uint64_t chunk; /* chunk to lookup if unregistering */ } key; uint32_t current_index; /* which ramblock the chunk belongs to */ @@ -419,8 +424,19 @@ typedef struct QEMU_PACKED { uint64_t chunks; /* how many sequential chunks to register */ } RDMARegister; -static void register_to_network(RDMARegister *reg) +static void register_to_network(RDMAContext *rdma, RDMARegister *reg) { + RDMALocalBlock *local_block; + local_block = &rdma->local_ram_blocks.block[reg->current_index]; + + if (local_block->is_ram_block) { + /* + * current_addr as passed in is an address in the local ram_addr_t + * space, we need to translate this for the destination + */ + reg->key.current_addr -= local_block->offset; + reg->key.current_addr += rdma->dest_blocks[reg->current_index].offset; + } reg->key.current_addr = htonll(reg->key.current_addr); reg->current_index = htonl(reg->current_index); reg->chunks = htonll(reg->chunks); @@ -436,13 +452,19 @@ static void network_to_register(RDMARegister *reg) typedef struct QEMU_PACKED { uint32_t value; /* if zero, we will madvise() */ uint32_t block_idx; /* which ram block index */ - uint64_t offset; /* where in the remote ramblock this chunk */ + uint64_t offset; /* Address in remote ram_addr_t space */ uint64_t length; /* length of the chunk */ } RDMACompress; -static void compress_to_network(RDMACompress *comp) +static void compress_to_network(RDMAContext *rdma, RDMACompress *comp) { comp->value = htonl(comp->value); + /* + * comp->offset as passed in is an address in the local ram_addr_t + * space, we need to translate this for the destination + */ + comp->offset -= rdma->local_ram_blocks.block[comp->block_idx].offset; + comp->offset += rdma->dest_blocks[comp->block_idx].offset; comp->block_idx = htonl(comp->block_idx); comp->offset = htonll(comp->offset); comp->length = htonll(comp->length); @@ -511,27 +533,27 @@ static inline uint8_t *ram_chunk_end(const RDMALocalBlock *rdma_ram_block, return result; } -static int rdma_add_block(RDMAContext *rdma, void *host_addr, +static int rdma_add_block(RDMAContext *rdma, const char *block_name, + void *host_addr, ram_addr_t block_offset, uint64_t length) { RDMALocalBlocks *local = &rdma->local_ram_blocks; - RDMALocalBlock *block = g_hash_table_lookup(rdma->blockmap, - (void *)(uintptr_t)block_offset); + RDMALocalBlock *block; RDMALocalBlock *old = local->block; - assert(block == NULL); - local->block = g_malloc0(sizeof(RDMALocalBlock) * (local->nb_blocks + 1)); if (local->nb_blocks) { int x; - for (x = 0; x < local->nb_blocks; x++) { - g_hash_table_remove(rdma->blockmap, - (void *)(uintptr_t)old[x].offset); - g_hash_table_insert(rdma->blockmap, - (void *)(uintptr_t)old[x].offset, - &local->block[x]); + if (rdma->blockmap) { + for (x = 0; x < local->nb_blocks; x++) { + g_hash_table_remove(rdma->blockmap, + (void *)(uintptr_t)old[x].offset); + g_hash_table_insert(rdma->blockmap, + (void *)(uintptr_t)old[x].offset, + &local->block[x]); + } } memcpy(local->block, old, sizeof(RDMALocalBlock) * local->nb_blocks); g_free(old); @@ -539,10 +561,12 @@ static int rdma_add_block(RDMAContext *rdma, void *host_addr, block = &local->block[local->nb_blocks]; + block->block_name = g_strdup(block_name); block->local_host_addr = host_addr; block->offset = block_offset; block->length = length; block->index = local->nb_blocks; + block->src_index = ~0U; /* Filled in by the receipt of the block list */ block->nb_chunks = ram_chunk_index(host_addr, host_addr + length) + 1UL; block->transit_bitmap = bitmap_new(block->nb_chunks); bitmap_clear(block->transit_bitmap, 0, block->nb_chunks); @@ -552,9 +576,12 @@ static int rdma_add_block(RDMAContext *rdma, void *host_addr, block->is_ram_block = local->init ? false : true; - g_hash_table_insert(rdma->blockmap, (void *) block_offset, block); + if (rdma->blockmap) { + g_hash_table_insert(rdma->blockmap, (void *) block_offset, block); + } - trace_rdma_add_block(local->nb_blocks, (uintptr_t) block->local_host_addr, + trace_rdma_add_block(block_name, local->nb_blocks, + (uintptr_t) block->local_host_addr, block->offset, block->length, (uintptr_t) (block->local_host_addr + block->length), BITS_TO_LONGS(block->nb_chunks) * @@ -574,7 +601,7 @@ static int rdma_add_block(RDMAContext *rdma, void *host_addr, static int qemu_rdma_init_one_block(const char *block_name, void *host_addr, ram_addr_t block_offset, ram_addr_t length, void *opaque) { - return rdma_add_block(opaque, host_addr, block_offset, length); + return rdma_add_block(opaque, block_name, host_addr, block_offset, length); } /* @@ -587,7 +614,6 @@ static int qemu_rdma_init_ram_blocks(RDMAContext *rdma) RDMALocalBlocks *local = &rdma->local_ram_blocks; assert(rdma->blockmap == NULL); - rdma->blockmap = g_hash_table_new(g_direct_hash, g_direct_equal); memset(local, 0, sizeof *local); qemu_ram_foreach_block(qemu_rdma_init_one_block, rdma); trace_qemu_rdma_init_ram_blocks(local->nb_blocks); @@ -597,16 +623,19 @@ static int qemu_rdma_init_ram_blocks(RDMAContext *rdma) return 0; } -static int rdma_delete_block(RDMAContext *rdma, ram_addr_t block_offset) +/* + * Note: If used outside of cleanup, the caller must ensure that the destination + * block structures are also updated + */ +static int rdma_delete_block(RDMAContext *rdma, RDMALocalBlock *block) { RDMALocalBlocks *local = &rdma->local_ram_blocks; - RDMALocalBlock *block = g_hash_table_lookup(rdma->blockmap, - (void *) block_offset); RDMALocalBlock *old = local->block; int x; - assert(block); - + if (rdma->blockmap) { + g_hash_table_remove(rdma->blockmap, (void *)(uintptr_t)block->offset); + } if (block->pmr) { int j; @@ -636,8 +665,14 @@ static int rdma_delete_block(RDMAContext *rdma, ram_addr_t block_offset) g_free(block->remote_keys); block->remote_keys = NULL; - for (x = 0; x < local->nb_blocks; x++) { - g_hash_table_remove(rdma->blockmap, (void *)(uintptr_t)old[x].offset); + g_free(block->block_name); + block->block_name = NULL; + + if (rdma->blockmap) { + for (x = 0; x < local->nb_blocks; x++) { + g_hash_table_remove(rdma->blockmap, + (void *)(uintptr_t)old[x].offset); + } } if (local->nb_blocks > 1) { @@ -659,8 +694,7 @@ static int rdma_delete_block(RDMAContext *rdma, ram_addr_t block_offset) local->block = NULL; } - trace_rdma_delete_block(local->nb_blocks, - (uintptr_t)block->local_host_addr, + trace_rdma_delete_block(block, (uintptr_t)block->local_host_addr, block->offset, block->length, (uintptr_t)(block->local_host_addr + block->length), BITS_TO_LONGS(block->nb_chunks) * @@ -670,7 +704,7 @@ static int rdma_delete_block(RDMAContext *rdma, ram_addr_t block_offset) local->nb_blocks--; - if (local->nb_blocks) { + if (local->nb_blocks && rdma->blockmap) { for (x = 0; x < local->nb_blocks; x++) { g_hash_table_insert(rdma->blockmap, (void *)(uintptr_t)local->block[x].offset, @@ -1223,7 +1257,7 @@ const char *print_wrid(int wrid) /* * Perform a non-optimized memory unregistration after every transfer - * for demonsration purposes, only if pin-all is not requested. + * for demonstration purposes, only if pin-all is not requested. * * Potential optimizations: * 1. Start a new thread to run this function continuously @@ -1289,7 +1323,7 @@ static int qemu_rdma_unregister_waiting(RDMAContext *rdma) rdma->total_registrations--; reg.key.chunk = chunk; - register_to_network(®); + register_to_network(rdma, ®); ret = qemu_rdma_exchange_send(rdma, &head, (uint8_t *) ®, &resp, NULL, NULL); if (ret < 0) { @@ -1910,7 +1944,7 @@ retry: trace_qemu_rdma_write_one_zero(chunk, sge.length, current_index, current_addr); - compress_to_network(&comp); + compress_to_network(rdma, &comp); ret = qemu_rdma_exchange_send(rdma, &head, (uint8_t *) &comp, NULL, NULL, NULL); @@ -1937,7 +1971,7 @@ retry: trace_qemu_rdma_write_one_sendreg(chunk, sge.length, current_index, current_addr); - register_to_network(®); + register_to_network(rdma, ®); ret = qemu_rdma_exchange_send(rdma, &head, (uint8_t *) ®, &resp, ®_result_idx, NULL); if (ret < 0) { @@ -2198,7 +2232,7 @@ static void qemu_rdma_cleanup(RDMAContext *rdma) if (rdma->local_ram_blocks.block) { while (rdma->local_ram_blocks.nb_blocks) { - rdma_delete_block(rdma, rdma->local_ram_blocks.block->offset); + rdma_delete_block(rdma, &rdma->local_ram_blocks.block[0]); } } @@ -2271,6 +2305,14 @@ static int qemu_rdma_source_init(RDMAContext *rdma, Error **errp, bool pin_all) goto err_rdma_source_init; } + /* Build the hash that maps from offset to RAMBlock */ + rdma->blockmap = g_hash_table_new(g_direct_hash, g_direct_equal); + for (idx = 0; idx < rdma->local_ram_blocks.nb_blocks; idx++) { + g_hash_table_insert(rdma->blockmap, + (void *)(uintptr_t)rdma->local_ram_blocks.block[idx].offset, + &rdma->local_ram_blocks.block[idx]); + } + for (idx = 0; idx < RDMA_WRID_MAX; idx++) { ret = qemu_rdma_reg_control(rdma, idx); if (ret) { @@ -2880,6 +2922,14 @@ err_rdma_dest_wait: return ret; } +static int dest_ram_sort_func(const void *a, const void *b) +{ + unsigned int a_index = ((const RDMALocalBlock *)a)->src_index; + unsigned int b_index = ((const RDMALocalBlock *)b)->src_index; + + return (a_index < b_index) ? -1 : (a_index != b_index); +} + /* * During each iteration of the migration, we listen for instructions * by the source VM to perform dynamic page registrations before they @@ -2889,8 +2939,7 @@ err_rdma_dest_wait: * * Keep doing this until the source tells us to stop. */ -static int qemu_rdma_registration_handle(QEMUFile *f, void *opaque, - uint64_t flags) +static int qemu_rdma_registration_handle(QEMUFile *f, void *opaque) { RDMAControlHeader reg_resp = { .len = sizeof(RDMARegisterResult), .type = RDMA_CONTROL_REGISTER_RESULT, @@ -2920,7 +2969,7 @@ static int qemu_rdma_registration_handle(QEMUFile *f, void *opaque, CHECK_ERROR_STATE(); do { - trace_qemu_rdma_registration_handle_wait(flags); + trace_qemu_rdma_registration_handle_wait(); ret = qemu_rdma_exchange_recv(rdma, &head, RDMA_CONTROL_NONE); @@ -2943,6 +2992,13 @@ static int qemu_rdma_registration_handle(QEMUFile *f, void *opaque, trace_qemu_rdma_registration_handle_compress(comp->length, comp->block_idx, comp->offset); + if (comp->block_idx >= rdma->local_ram_blocks.nb_blocks) { + error_report("rdma: 'compress' bad block index %u (vs %d)", + (unsigned int)comp->block_idx, + rdma->local_ram_blocks.nb_blocks); + ret = -EIO; + break; + } block = &(rdma->local_ram_blocks.block[comp->block_idx]); host_addr = block->local_host_addr + @@ -2958,6 +3014,13 @@ static int qemu_rdma_registration_handle(QEMUFile *f, void *opaque, case RDMA_CONTROL_RAM_BLOCKS_REQUEST: trace_qemu_rdma_registration_handle_ram_blocks(); + /* Sort our local RAM Block list so it's the same as the source, + * we can do this since we've filled in a src_index in the list + * as we received the RAMBlock list earlier. + */ + qsort(rdma->local_ram_blocks.block, + rdma->local_ram_blocks.nb_blocks, + sizeof(RDMALocalBlock), dest_ram_sort_func); if (rdma->pin_all) { ret = qemu_rdma_reg_whole_ram_blocks(rdma); if (ret) { @@ -2985,6 +3048,12 @@ static int qemu_rdma_registration_handle(QEMUFile *f, void *opaque, rdma->dest_blocks[i].length = local->block[i].length; dest_block_to_network(&rdma->dest_blocks[i]); + trace_qemu_rdma_registration_handle_ram_blocks_loop( + local->block[i].block_name, + local->block[i].offset, + local->block[i].length, + local->block[i].local_host_addr, + local->block[i].src_index); } blocks.len = rdma->local_ram_blocks.nb_blocks @@ -3018,8 +3087,23 @@ static int qemu_rdma_registration_handle(QEMUFile *f, void *opaque, trace_qemu_rdma_registration_handle_register_loop(count, reg->current_index, reg->key.current_addr, reg->chunks); + if (reg->current_index >= rdma->local_ram_blocks.nb_blocks) { + error_report("rdma: 'register' bad block index %u (vs %d)", + (unsigned int)reg->current_index, + rdma->local_ram_blocks.nb_blocks); + ret = -ENOENT; + break; + } block = &(rdma->local_ram_blocks.block[reg->current_index]); if (block->is_ram_block) { + if (block->offset > reg->key.current_addr) { + error_report("rdma: bad register address for block %s" + " offset: %" PRIx64 " current_addr: %" PRIx64, + block->block_name, block->offset, + reg->key.current_addr); + ret = -ERANGE; + break; + } host_addr = (block->local_host_addr + (reg->key.current_addr - block->offset)); chunk = ram_chunk_index(block->local_host_addr, @@ -3028,6 +3112,14 @@ static int qemu_rdma_registration_handle(QEMUFile *f, void *opaque, chunk = reg->key.chunk; host_addr = block->local_host_addr + (reg->key.chunk * (1UL << RDMA_REG_CHUNK_SHIFT)); + /* Check for particularly bad chunk value */ + if (host_addr < (void *)block->local_host_addr) { + error_report("rdma: bad chunk for block %s" + " chunk: %" PRIx64, + block->block_name, reg->key.chunk); + ret = -ERANGE; + break; + } } chunk_start = ram_chunk_start(block, chunk); chunk_end = ram_chunk_end(block, chunk + reg->chunks); @@ -3108,8 +3200,56 @@ out: return ret; } +/* Destination: + * Called via a ram_control_load_hook during the initial RAM load section which + * lists the RAMBlocks by name. This lets us know the order of the RAMBlocks + * on the source. + * We've already built our local RAMBlock list, but not yet sent the list to + * the source. + */ +static int rdma_block_notification_handle(QEMUFileRDMA *rfile, const char *name) +{ + RDMAContext *rdma = rfile->rdma; + int curr; + int found = -1; + + /* Find the matching RAMBlock in our local list */ + for (curr = 0; curr < rdma->local_ram_blocks.nb_blocks; curr++) { + if (!strcmp(rdma->local_ram_blocks.block[curr].block_name, name)) { + found = curr; + break; + } + } + + if (found == -1) { + error_report("RAMBlock '%s' not found on destination", name); + return -ENOENT; + } + + rdma->local_ram_blocks.block[curr].src_index = rdma->next_src_index; + trace_rdma_block_notification_handle(name, rdma->next_src_index); + rdma->next_src_index++; + + return 0; +} + +static int rdma_load_hook(QEMUFile *f, void *opaque, uint64_t flags, void *data) +{ + switch (flags) { + case RAM_CONTROL_BLOCK_REG: + return rdma_block_notification_handle(opaque, data); + + case RAM_CONTROL_HOOK: + return qemu_rdma_registration_handle(f, opaque); + + default: + /* Shouldn't be called with any other values */ + abort(); + } +} + static int qemu_rdma_registration_start(QEMUFile *f, void *opaque, - uint64_t flags) + uint64_t flags, void *data) { QEMUFileRDMA *rfile = opaque; RDMAContext *rdma = rfile->rdma; @@ -3128,7 +3268,7 @@ static int qemu_rdma_registration_start(QEMUFile *f, void *opaque, * First, flush writes, if any. */ static int qemu_rdma_registration_stop(QEMUFile *f, void *opaque, - uint64_t flags) + uint64_t flags, void *data) { Error *local_err = NULL, **errp = &local_err; QEMUFileRDMA *rfile = opaque; @@ -3148,7 +3288,7 @@ static int qemu_rdma_registration_stop(QEMUFile *f, void *opaque, if (flags == RAM_CONTROL_SETUP) { RDMAControlHeader resp = {.type = RDMA_CONTROL_RAM_BLOCKS_RESULT }; RDMALocalBlocks *local = &rdma->local_ram_blocks; - int reg_result_idx, i, j, nb_dest_blocks; + int reg_result_idx, i, nb_dest_blocks; head.type = RDMA_CONTROL_RAM_BLOCKS_REQUEST; trace_qemu_rdma_registration_stop_ram(); @@ -3184,9 +3324,11 @@ static int qemu_rdma_registration_stop(QEMUFile *f, void *opaque, */ if (local->nb_blocks != nb_dest_blocks) { - ERROR(errp, "ram blocks mismatch #1! " + ERROR(errp, "ram blocks mismatch (Number of blocks %d vs %d) " "Your QEMU command line parameters are probably " - "not identical on both the source and destination."); + "not identical on both the source and destination.", + local->nb_blocks, nb_dest_blocks); + rdma->error_state = -EINVAL; return -EINVAL; } @@ -3196,30 +3338,18 @@ static int qemu_rdma_registration_stop(QEMUFile *f, void *opaque, for (i = 0; i < nb_dest_blocks; i++) { network_to_dest_block(&rdma->dest_blocks[i]); - /* search local ram blocks */ - for (j = 0; j < local->nb_blocks; j++) { - if (rdma->dest_blocks[i].offset != local->block[j].offset) { - continue; - } - - if (rdma->dest_blocks[i].length != local->block[j].length) { - ERROR(errp, "ram blocks mismatch #2! " - "Your QEMU command line parameters are probably " - "not identical on both the source and destination."); - return -EINVAL; - } - local->block[j].remote_host_addr = - rdma->dest_blocks[i].remote_host_addr; - local->block[j].remote_rkey = rdma->dest_blocks[i].remote_rkey; - break; - } - - if (j >= local->nb_blocks) { - ERROR(errp, "ram blocks mismatch #3! " - "Your QEMU command line parameters are probably " - "not identical on both the source and destination."); + /* We require that the blocks are in the same order */ + if (rdma->dest_blocks[i].length != local->block[i].length) { + ERROR(errp, "Block %s/%d has a different length %" PRIu64 + "vs %" PRIu64, local->block[i].block_name, i, + local->block[i].length, + rdma->dest_blocks[i].length); + rdma->error_state = -EINVAL; return -EINVAL; } + local->block[i].remote_host_addr = + rdma->dest_blocks[i].remote_host_addr; + local->block[i].remote_rkey = rdma->dest_blocks[i].remote_rkey; } } @@ -3250,7 +3380,7 @@ static const QEMUFileOps rdma_read_ops = { .get_buffer = qemu_rdma_get_buffer, .get_fd = qemu_rdma_get_fd, .close = qemu_rdma_close, - .hook_ram_load = qemu_rdma_registration_handle, + .hook_ram_load = rdma_load_hook, }; static const QEMUFileOps rdma_write_ops = { @@ -3263,12 +3393,13 @@ static const QEMUFileOps rdma_write_ops = { static void *qemu_fopen_rdma(RDMAContext *rdma, const char *mode) { - QEMUFileRDMA *r = g_malloc0(sizeof(QEMUFileRDMA)); + QEMUFileRDMA *r; if (qemu_file_mode_is_not_valid(mode)) { return NULL; } + r = g_malloc0(sizeof(QEMUFileRDMA)); r->rdma = rdma; if (mode[0] == 'w') { @@ -3287,7 +3418,7 @@ static void rdma_accept_incoming_migration(void *opaque) QEMUFile *f; Error *local_err = NULL, **errp = &local_err; - trace_qemu_dma_accept_incoming_migration(); + trace_qemu_rdma_accept_incoming_migration(); ret = qemu_rdma_accept(rdma); if (ret) { @@ -3295,7 +3426,7 @@ static void rdma_accept_incoming_migration(void *opaque) return; } - trace_qemu_dma_accept_incoming_migration_accepted(); + trace_qemu_rdma_accept_incoming_migration_accepted(); f = qemu_fopen_rdma(rdma, "rb"); if (f == NULL) { diff --git a/migration/savevm.c b/migration/savevm.c index 9e0e286797..86735fc53a 100644 --- a/migration/savevm.c +++ b/migration/savevm.c @@ -246,11 +246,55 @@ typedef struct SaveStateEntry { typedef struct SaveState { QTAILQ_HEAD(, SaveStateEntry) handlers; int global_section_id; + bool skip_configuration; + uint32_t len; + const char *name; } SaveState; static SaveState savevm_state = { .handlers = QTAILQ_HEAD_INITIALIZER(savevm_state.handlers), .global_section_id = 0, + .skip_configuration = false, +}; + +void savevm_skip_configuration(void) +{ + savevm_state.skip_configuration = true; +} + + +static void configuration_pre_save(void *opaque) +{ + SaveState *state = opaque; + const char *current_name = MACHINE_GET_CLASS(current_machine)->name; + + state->len = strlen(current_name); + state->name = current_name; +} + +static int configuration_post_load(void *opaque, int version_id) +{ + SaveState *state = opaque; + const char *current_name = MACHINE_GET_CLASS(current_machine)->name; + + if (strncmp(state->name, current_name, state->len) != 0) { + error_report("Machine type received is '%s' and local is '%s'", + state->name, current_name); + return -EINVAL; + } + return 0; +} + +static const VMStateDescription vmstate_configuration = { + .name = "configuration", + .version_id = 1, + .post_load = configuration_post_load, + .pre_save = configuration_pre_save, + .fields = (VMStateField[]) { + VMSTATE_UINT32(len, SaveState), + VMSTATE_VBUFFER_ALLOC_UINT32(name, SaveState, 0, NULL, 0, len), + VMSTATE_END_OF_LIST() + }, }; static void dump_vmstate_vmsd(FILE *out_file, @@ -653,41 +697,6 @@ static void save_section_footer(QEMUFile *f, SaveStateEntry *se) } } -/* - * Read a footer off the wire and check that it matches the expected section - * - * Returns: true if the footer was good - * false if there is a problem (and calls error_report to say why) - */ -static bool check_section_footer(QEMUFile *f, SaveStateEntry *se) -{ - uint8_t read_mark; - uint32_t read_section_id; - - if (skip_section_footers) { - /* No footer to check */ - return true; - } - - read_mark = qemu_get_byte(f); - - if (read_mark != QEMU_VM_SECTION_FOOTER) { - error_report("Missing section footer for %s", se->idstr); - return false; - } - - read_section_id = qemu_get_be32(f); - if (read_section_id != se->section_id) { - error_report("Mismatched section id in footer for %s -" - " read 0x%x expected 0x%x", - se->idstr, read_section_id, se->section_id); - return false; - } - - /* All good */ - return true; -} - bool qemu_savevm_state_blocked(Error **errp) { SaveStateEntry *se; @@ -723,6 +732,11 @@ void qemu_savevm_state_begin(QEMUFile *f, se->ops->set_params(params, se->opaque); } + if (!savevm_state.skip_configuration) { + qemu_put_byte(f, QEMU_VM_CONFIGURATION); + vmstate_save_state(f, &vmstate_configuration, &savevm_state, 0); + } + QTAILQ_FOREACH(se, &savevm_state.handlers, entry) { if (!se->ops || !se->ops->save_live_setup) { continue; @@ -836,6 +850,11 @@ void qemu_savevm_state_complete(QEMUFile *f) if ((!se->ops || !se->ops->save_state) && !se->vmsd) { continue; } + if (se->vmsd && !vmstate_save_needed(se->vmsd, se->opaque)) { + trace_savevm_section_skip(se->idstr, se->section_id); + continue; + } + trace_savevm_section_start(se->idstr, se->section_id); json_start_object(vmdesc, NULL); @@ -949,6 +968,9 @@ static int qemu_save_device_state(QEMUFile *f) if ((!se->ops || !se->ops->save_state) && !se->vmsd) { continue; } + if (se->vmsd && !vmstate_save_needed(se->vmsd, se->opaque)) { + continue; + } save_section_header(f, se, QEMU_VM_SECTION_FULL); @@ -989,6 +1011,41 @@ struct LoadStateEntry { int version_id; }; +/* + * Read a footer off the wire and check that it matches the expected section + * + * Returns: true if the footer was good + * false if there is a problem (and calls error_report to say why) + */ +static bool check_section_footer(QEMUFile *f, LoadStateEntry *le) +{ + uint8_t read_mark; + uint32_t read_section_id; + + if (skip_section_footers) { + /* No footer to check */ + return true; + } + + read_mark = qemu_get_byte(f); + + if (read_mark != QEMU_VM_SECTION_FOOTER) { + error_report("Missing section footer for %s", le->se->idstr); + return false; + } + + read_section_id = qemu_get_be32(f); + if (read_section_id != le->section_id) { + error_report("Mismatched section id in footer for %s -" + " read 0x%x expected 0x%x", + le->se->idstr, read_section_id, le->section_id); + return false; + } + + /* All good */ + return true; +} + void loadvm_free_handlers(MigrationIncomingState *mis) { LoadStateEntry *le, *new_le; @@ -1029,6 +1086,18 @@ int qemu_loadvm_state(QEMUFile *f) return -ENOTSUP; } + if (!savevm_state.skip_configuration) { + if (qemu_get_byte(f) != QEMU_VM_CONFIGURATION) { + error_report("Configuration section missing"); + return -EINVAL; + } + ret = vmstate_load_state(f, &vmstate_configuration, &savevm_state, 0); + + if (ret) { + return ret; + } + } + while ((section_type = qemu_get_byte(f)) != QEMU_VM_EOF) { uint32_t instance_id, version_id, section_id; SaveStateEntry *se; @@ -1082,7 +1151,7 @@ int qemu_loadvm_state(QEMUFile *f) " device '%s'", instance_id, idstr); goto out; } - if (!check_section_footer(f, le->se)) { + if (!check_section_footer(f, le)) { ret = -EINVAL; goto out; } @@ -1109,7 +1178,7 @@ int qemu_loadvm_state(QEMUFile *f) section_id, le->se->idstr); goto out; } - if (!check_section_footer(f, le->se)) { + if (!check_section_footer(f, le)) { ret = -EINVAL; goto out; } @@ -1127,16 +1196,35 @@ int qemu_loadvm_state(QEMUFile *f) * Try to read in the VMDESC section as well, so that dumping tools that * intercept our migration stream have the chance to see it. */ - if (qemu_get_byte(f) == QEMU_VM_VMDESCRIPTION) { - uint32_t size = qemu_get_be32(f); - uint8_t *buf = g_malloc(0x1000); - - while (size > 0) { - uint32_t read_chunk = MIN(size, 0x1000); - qemu_get_buffer(f, buf, read_chunk); - size -= read_chunk; + + /* We've got to be careful; if we don't read the data and just shut the fd + * then the sender can error if we close while it's still sending. + * We also mustn't read data that isn't there; some transports (RDMA) + * will stall waiting for that data when the source has already closed. + */ + if (should_send_vmdesc()) { + uint8_t *buf; + uint32_t size; + section_type = qemu_get_byte(f); + + if (section_type != QEMU_VM_VMDESCRIPTION) { + error_report("Expected vmdescription section, but got %d", + section_type); + /* + * It doesn't seem worth failing at this point since + * we apparently have an otherwise valid VM state + */ + } else { + buf = g_malloc(0x1000); + size = qemu_get_be32(f); + + while (size > 0) { + uint32_t read_chunk = MIN(size, 0x1000); + qemu_get_buffer(f, buf, read_chunk); + size -= read_chunk; + } + g_free(buf); } - g_free(buf); } cpu_synchronize_all_post_init(); diff --git a/migration/vmstate.c b/migration/vmstate.c index 6138d1acb7..e8ccf22f67 100644 --- a/migration/vmstate.c +++ b/migration/vmstate.c @@ -276,6 +276,17 @@ static void vmsd_desc_field_end(const VMStateDescription *vmsd, QJSON *vmdesc, json_end_object(vmdesc); } + +bool vmstate_save_needed(const VMStateDescription *vmsd, void *opaque) +{ + if (vmsd->needed && !vmsd->needed(opaque)) { + /* optional section not needed */ + return false; + } + return true; +} + + void vmstate_save_state(QEMUFile *f, const VMStateDescription *vmsd, void *opaque, QJSON *vmdesc) { diff --git a/pc-bios/README b/pc-bios/README index 63e725444d..05cf0421be 100644 --- a/pc-bios/README +++ b/pc-bios/README @@ -17,7 +17,7 @@ - SLOF (Slimline Open Firmware) is a free IEEE 1275 Open Firmware implementation for certain IBM POWER hardware. The sources are at https://github.com/aik/SLOF, and the image currently in qemu is - built from git tag qemu-slof-20150313. + built from git tag qemu-slof-20150429. - sgabios (the Serial Graphics Adapter option ROM) provides a means for legacy x86 software to communicate with an attached serial console as diff --git a/pc-bios/slof.bin b/pc-bios/slof.bin Binary files differindex ab72cba80c..0398ac67bc 100644 --- a/pc-bios/slof.bin +++ b/pc-bios/slof.bin diff --git a/qapi-schema.json b/qapi-schema.json index 106008cdeb..1285b8c74a 100644 --- a/qapi-schema.json +++ b/qapi-schema.json @@ -523,6 +523,9 @@ # minimize migration traffic. The feature is disabled by default. # (since 2.4 ) # +# @events: generate events for each migration state change +# (since 2.4 ) +# # @auto-converge: If enabled, QEMU will automatically throttle down the guest # to speed up convergence of RAM migration. (since 1.6) # @@ -530,7 +533,7 @@ ## { 'enum': 'MigrationCapability', 'data': ['xbzrle', 'rdma-pin-all', 'auto-converge', 'zero-blocks', - 'compress'] } + 'compress', 'events'] } ## # @MigrationCapabilityStatus diff --git a/qapi/event.json b/qapi/event.json index 378dda572a..f0cef010f0 100644 --- a/qapi/event.json +++ b/qapi/event.json @@ -243,6 +243,18 @@ { 'event': 'SPICE_MIGRATE_COMPLETED' } ## +# @MIGRATION +# +# Emitted when a migration event happens +# +# @status: @MigrationStatus describing the current migration status. +# +# Since: 2.4 +## +{ 'event': 'MIGRATION', + 'data': {'status': 'MigrationStatus'}} + +## # @ACPI_DEVICE_OST # # Emitted when guest executes ACPI _OST method. diff --git a/qga/commands-posix.c b/qga/commands-posix.c index befd00b00d..675f4b4c66 100644 --- a/qga/commands-posix.c +++ b/qga/commands-posix.c @@ -154,6 +154,8 @@ void qmp_guest_set_time(bool has_time, int64_t time_ns, Error **errp) /* If user has passed a time, validate and set it. */ if (has_time) { + GDate date = { 0, }; + /* year-2038 will overflow in case time_t is 32bit */ if (time_ns / 1000000000 != (time_t)(time_ns / 1000000000)) { error_setg(errp, "Time %" PRId64 " is too large", time_ns); @@ -162,6 +164,11 @@ void qmp_guest_set_time(bool has_time, int64_t time_ns, Error **errp) tv.tv_sec = time_ns / 1000000000; tv.tv_usec = (time_ns % 1000000000) / 1000; + g_date_set_time_t(&date, tv.tv_sec); + if (date.year < 1970 || date.year >= 2070) { + error_setg_errno(errp, errno, "Invalid time"); + return; + } ret = settimeofday(&tv, NULL); if (ret < 0) { @@ -1325,18 +1332,18 @@ static void guest_fsfreeze_cleanup(void) /* * Walk list of mounted file systems in the guest, and trim them. */ -void qmp_guest_fstrim(bool has_minimum, int64_t minimum, Error **errp) +GuestFilesystemTrimResponse * +qmp_guest_fstrim(bool has_minimum, int64_t minimum, Error **errp) { + GuestFilesystemTrimResponse *response; + GuestFilesystemTrimResultList *list; + GuestFilesystemTrimResult *result; int ret = 0; FsMountList mounts; struct FsMount *mount; int fd; Error *local_err = NULL; - struct fstrim_range r = { - .start = 0, - .len = -1, - .minlen = has_minimum ? minimum : 0, - }; + struct fstrim_range r; slog("guest-fstrim called"); @@ -1344,36 +1351,59 @@ void qmp_guest_fstrim(bool has_minimum, int64_t minimum, Error **errp) build_fs_mount_list(&mounts, &local_err); if (local_err) { error_propagate(errp, local_err); - return; + return NULL; } + response = g_malloc0(sizeof(*response)); + QTAILQ_FOREACH(mount, &mounts, next) { + result = g_malloc0(sizeof(*result)); + result->path = g_strdup(mount->dirname); + + list = g_malloc0(sizeof(*list)); + list->value = result; + list->next = response->paths; + response->paths = list; + fd = qemu_open(mount->dirname, O_RDONLY); if (fd == -1) { - error_setg_errno(errp, errno, "failed to open %s", mount->dirname); - goto error; + result->error = g_strdup_printf("failed to open: %s", + strerror(errno)); + result->has_error = true; + continue; } /* We try to cull filesytems we know won't work in advance, but other * filesytems may not implement fstrim for less obvious reasons. These - * will report EOPNOTSUPP; we simply ignore these errors. Any other - * error means an unexpected error, so return it in those cases. In - * some other cases ENOTTY will be reported (e.g. CD-ROMs). + * will report EOPNOTSUPP; while in some other cases ENOTTY will be + * reported (e.g. CD-ROMs). + * Any other error means an unexpected error. */ + r.start = 0; + r.len = -1; + r.minlen = has_minimum ? minimum : 0; ret = ioctl(fd, FITRIM, &r); if (ret == -1) { - if (errno != ENOTTY && errno != EOPNOTSUPP) { - error_setg_errno(errp, errno, "failed to trim %s", - mount->dirname); - close(fd); - goto error; + result->has_error = true; + if (errno == ENOTTY || errno == EOPNOTSUPP) { + result->error = g_strdup("trim not supported"); + } else { + result->error = g_strdup_printf("failed to trim: %s", + strerror(errno)); } + close(fd); + continue; } + + result->has_minimum = true; + result->minimum = r.minlen; + result->has_trimmed = true; + result->trimmed = r.len; close(fd); } -error: free_fs_mount_list(&mounts); + return response; } #endif /* CONFIG_FSTRIM */ @@ -2402,9 +2432,11 @@ int64_t qmp_guest_fsfreeze_thaw(Error **errp) #endif /* CONFIG_FSFREEZE */ #if !defined(CONFIG_FSTRIM) -void qmp_guest_fstrim(bool has_minimum, int64_t minimum, Error **errp) +GuestFilesystemTrimResponse * +qmp_guest_fstrim(bool has_minimum, int64_t minimum, Error **errp) { error_setg(errp, QERR_UNSUPPORTED); + return NULL; } #endif diff --git a/qga/commands-win32.c b/qga/commands-win32.c index fbddc8b1b2..a7822d5ff7 100644 --- a/qga/commands-win32.c +++ b/qga/commands-win32.c @@ -16,11 +16,22 @@ #include <powrprof.h> #include <stdio.h> #include <string.h> +#include <winsock2.h> +#include <ws2tcpip.h> +#include <iptypes.h> +#include <iphlpapi.h> +#ifdef CONFIG_QGA_NTDDSCSI +#include <winioctl.h> +#include <ntddscsi.h> +#include <setupapi.h> +#include <initguid.h> +#endif #include "qga/guest-agent-core.h" #include "qga/vss-win32.h" #include "qga-qmp-commands.h" #include "qapi/qmp/qerror.h" #include "qemu/queue.h" +#include "qemu/host-utils.h" #ifndef SHTDN_REASON_FLAG_PLANNED #define SHTDN_REASON_FLAG_PLANNED 0x80000000 @@ -382,12 +393,307 @@ static void guest_file_init(void) QTAILQ_INIT(&guest_file_state.filehandles); } -GuestFilesystemInfoList *qmp_guest_get_fsinfo(Error **errp) +#ifdef CONFIG_QGA_NTDDSCSI + +static STORAGE_BUS_TYPE win2qemu[] = { + [BusTypeUnknown] = GUEST_DISK_BUS_TYPE_UNKNOWN, + [BusTypeScsi] = GUEST_DISK_BUS_TYPE_SCSI, + [BusTypeAtapi] = GUEST_DISK_BUS_TYPE_IDE, + [BusTypeAta] = GUEST_DISK_BUS_TYPE_IDE, + [BusType1394] = GUEST_DISK_BUS_TYPE_IEEE1394, + [BusTypeSsa] = GUEST_DISK_BUS_TYPE_SSA, + [BusTypeFibre] = GUEST_DISK_BUS_TYPE_SSA, + [BusTypeUsb] = GUEST_DISK_BUS_TYPE_USB, + [BusTypeRAID] = GUEST_DISK_BUS_TYPE_RAID, +#if (_WIN32_WINNT >= 0x0600) + [BusTypeiScsi] = GUEST_DISK_BUS_TYPE_ISCSI, + [BusTypeSas] = GUEST_DISK_BUS_TYPE_SAS, + [BusTypeSata] = GUEST_DISK_BUS_TYPE_SATA, + [BusTypeSd] = GUEST_DISK_BUS_TYPE_SD, + [BusTypeMmc] = GUEST_DISK_BUS_TYPE_MMC, +#endif +#if (_WIN32_WINNT >= 0x0601) + [BusTypeVirtual] = GUEST_DISK_BUS_TYPE_VIRTUAL, + [BusTypeFileBackedVirtual] = GUEST_DISK_BUS_TYPE_FILE_BACKED_VIRTUAL, +#endif +}; + +static GuestDiskBusType find_bus_type(STORAGE_BUS_TYPE bus) +{ + if (bus > ARRAY_SIZE(win2qemu) || (int)bus < 0) { + return GUEST_DISK_BUS_TYPE_UNKNOWN; + } + return win2qemu[(int)bus]; +} + +DEFINE_GUID(GUID_DEVINTERFACE_VOLUME, + 0x53f5630dL, 0xb6bf, 0x11d0, 0x94, 0xf2, + 0x00, 0xa0, 0xc9, 0x1e, 0xfb, 0x8b); + +static GuestPCIAddress *get_pci_info(char *guid, Error **errp) +{ + HDEVINFO dev_info; + SP_DEVINFO_DATA dev_info_data; + DWORD size = 0; + int i; + char dev_name[MAX_PATH]; + char *buffer = NULL; + GuestPCIAddress *pci = NULL; + char *name = g_strdup(&guid[4]); + + if (!QueryDosDevice(name, dev_name, ARRAY_SIZE(dev_name))) { + error_setg_win32(errp, GetLastError(), "failed to get dos device name"); + goto out; + } + + dev_info = SetupDiGetClassDevs(&GUID_DEVINTERFACE_VOLUME, 0, 0, + DIGCF_PRESENT | DIGCF_DEVICEINTERFACE); + if (dev_info == INVALID_HANDLE_VALUE) { + error_setg_win32(errp, GetLastError(), "failed to get devices tree"); + goto out; + } + + dev_info_data.cbSize = sizeof(SP_DEVINFO_DATA); + for (i = 0; SetupDiEnumDeviceInfo(dev_info, i, &dev_info_data); i++) { + DWORD addr, bus, slot, func, dev, data, size2; + while (!SetupDiGetDeviceRegistryProperty(dev_info, &dev_info_data, + SPDRP_PHYSICAL_DEVICE_OBJECT_NAME, + &data, (PBYTE)buffer, size, + &size2)) { + size = MAX(size, size2); + if (GetLastError() == ERROR_INSUFFICIENT_BUFFER) { + g_free(buffer); + /* Double the size to avoid problems on + * W2k MBCS systems per KB 888609. + * https://support.microsoft.com/en-us/kb/259695 */ + buffer = g_malloc(size * 2); + } else { + error_setg_win32(errp, GetLastError(), + "failed to get device name"); + goto out; + } + } + + if (g_strcmp0(buffer, dev_name)) { + continue; + } + + /* There is no need to allocate buffer in the next functions. The size + * is known and ULONG according to + * https://support.microsoft.com/en-us/kb/253232 + * https://msdn.microsoft.com/en-us/library/windows/hardware/ff543095(v=vs.85).aspx + */ + if (!SetupDiGetDeviceRegistryProperty(dev_info, &dev_info_data, + SPDRP_BUSNUMBER, &data, (PBYTE)&bus, size, NULL)) { + break; + } + + /* The function retrieves the device's address. This value will be + * transformed into device function and number */ + if (!SetupDiGetDeviceRegistryProperty(dev_info, &dev_info_data, + SPDRP_ADDRESS, &data, (PBYTE)&addr, size, NULL)) { + break; + } + + /* This call returns UINumber of DEVICE_CAPABILITIES structure. + * This number is typically a user-perceived slot number. */ + if (!SetupDiGetDeviceRegistryProperty(dev_info, &dev_info_data, + SPDRP_UI_NUMBER, &data, (PBYTE)&slot, size, NULL)) { + break; + } + + /* SetupApi gives us the same information as driver with + * IoGetDeviceProperty. According to Microsoft + * https://support.microsoft.com/en-us/kb/253232 + * FunctionNumber = (USHORT)((propertyAddress) & 0x0000FFFF); + * DeviceNumber = (USHORT)(((propertyAddress) >> 16) & 0x0000FFFF); + * SPDRP_ADDRESS is propertyAddress, so we do the same.*/ + + func = addr & 0x0000FFFF; + dev = (addr >> 16) & 0x0000FFFF; + pci = g_malloc0(sizeof(*pci)); + pci->domain = dev; + pci->slot = slot; + pci->function = func; + pci->bus = bus; + break; + } +out: + g_free(buffer); + g_free(name); + return pci; +} + +static int get_disk_bus_type(HANDLE vol_h, Error **errp) +{ + STORAGE_PROPERTY_QUERY query; + STORAGE_DEVICE_DESCRIPTOR *dev_desc, buf; + DWORD received; + + dev_desc = &buf; + dev_desc->Size = sizeof(buf); + query.PropertyId = StorageDeviceProperty; + query.QueryType = PropertyStandardQuery; + + if (!DeviceIoControl(vol_h, IOCTL_STORAGE_QUERY_PROPERTY, &query, + sizeof(STORAGE_PROPERTY_QUERY), dev_desc, + dev_desc->Size, &received, NULL)) { + error_setg_win32(errp, GetLastError(), "failed to get bus type"); + return -1; + } + + return dev_desc->BusType; +} + +/* VSS provider works with volumes, thus there is no difference if + * the volume consist of spanned disks. Info about the first disk in the + * volume is returned for the spanned disk group (LVM) */ +static GuestDiskAddressList *build_guest_disk_info(char *guid, Error **errp) +{ + GuestDiskAddressList *list = NULL; + GuestDiskAddress *disk; + SCSI_ADDRESS addr, *scsi_ad; + DWORD len; + int bus; + HANDLE vol_h; + + scsi_ad = &addr; + char *name = g_strndup(guid, strlen(guid)-1); + + vol_h = CreateFile(name, 0, FILE_SHARE_READ, NULL, OPEN_EXISTING, + 0, NULL); + if (vol_h == INVALID_HANDLE_VALUE) { + error_setg_win32(errp, GetLastError(), "failed to open volume"); + goto out_free; + } + + bus = get_disk_bus_type(vol_h, errp); + if (bus < 0) { + goto out_close; + } + + disk = g_malloc0(sizeof(*disk)); + disk->bus_type = find_bus_type(bus); + if (bus == BusTypeScsi || bus == BusTypeAta || bus == BusTypeRAID +#if (_WIN32_WINNT >= 0x0600) + /* This bus type is not supported before Windows Server 2003 SP1 */ + || bus == BusTypeSas +#endif + ) { + /* We are able to use the same ioctls for different bus types + * according to Microsoft docs + * https://technet.microsoft.com/en-us/library/ee851589(v=ws.10).aspx */ + if (DeviceIoControl(vol_h, IOCTL_SCSI_GET_ADDRESS, NULL, 0, scsi_ad, + sizeof(SCSI_ADDRESS), &len, NULL)) { + disk->unit = addr.Lun; + disk->target = addr.TargetId; + disk->bus = addr.PathId; + disk->pci_controller = get_pci_info(name, errp); + } + /* We do not set error in this case, because we still have enough + * information about volume. */ + } else { + disk->pci_controller = NULL; + } + + list = g_malloc0(sizeof(*list)); + list->value = disk; + list->next = NULL; +out_close: + CloseHandle(vol_h); +out_free: + g_free(name); + return list; +} + +#else + +static GuestDiskAddressList *build_guest_disk_info(char *guid, Error **errp) { - error_setg(errp, QERR_UNSUPPORTED); return NULL; } +#endif /* CONFIG_QGA_NTDDSCSI */ + +static GuestFilesystemInfo *build_guest_fsinfo(char *guid, Error **errp) +{ + DWORD info_size; + char mnt, *mnt_point; + char fs_name[32]; + char vol_info[MAX_PATH+1]; + size_t len; + GuestFilesystemInfo *fs = NULL; + + GetVolumePathNamesForVolumeName(guid, (LPCH)&mnt, 0, &info_size); + if (GetLastError() != ERROR_MORE_DATA) { + error_setg_win32(errp, GetLastError(), "failed to get volume name"); + return NULL; + } + + mnt_point = g_malloc(info_size + 1); + if (!GetVolumePathNamesForVolumeName(guid, mnt_point, info_size, + &info_size)) { + error_setg_win32(errp, GetLastError(), "failed to get volume name"); + goto free; + } + + len = strlen(mnt_point); + mnt_point[len] = '\\'; + mnt_point[len+1] = 0; + if (!GetVolumeInformation(mnt_point, vol_info, sizeof(vol_info), NULL, NULL, + NULL, (LPSTR)&fs_name, sizeof(fs_name))) { + if (GetLastError() != ERROR_NOT_READY) { + error_setg_win32(errp, GetLastError(), "failed to get volume info"); + } + goto free; + } + + fs_name[sizeof(fs_name) - 1] = 0; + fs = g_malloc(sizeof(*fs)); + fs->name = g_strdup(guid); + if (len == 0) { + fs->mountpoint = g_strdup("System Reserved"); + } else { + fs->mountpoint = g_strndup(mnt_point, len); + } + fs->type = g_strdup(fs_name); + fs->disk = build_guest_disk_info(guid, errp);; +free: + g_free(mnt_point); + return fs; +} + +GuestFilesystemInfoList *qmp_guest_get_fsinfo(Error **errp) +{ + HANDLE vol_h; + GuestFilesystemInfoList *new, *ret = NULL; + char guid[256]; + + vol_h = FindFirstVolume(guid, sizeof(guid)); + if (vol_h == INVALID_HANDLE_VALUE) { + error_setg_win32(errp, GetLastError(), "failed to find any volume"); + return NULL; + } + + do { + GuestFilesystemInfo *info = build_guest_fsinfo(guid, errp); + if (info == NULL) { + continue; + } + new = g_malloc(sizeof(*ret)); + new->value = info; + new->next = ret; + ret = new; + } while (FindNextVolume(vol_h, guid, sizeof(guid))); + + if (GetLastError() != ERROR_NO_MORE_FILES) { + error_setg_win32(errp, GetLastError(), "failed to find next volume"); + } + + FindVolumeClose(vol_h); + return ret; +} + /* * Return status of freeze/thaw */ @@ -493,9 +799,11 @@ static void guest_fsfreeze_cleanup(void) * Walk list of mounted file systems in the guest, and discard unused * areas. */ -void qmp_guest_fstrim(bool has_minimum, int64_t minimum, Error **errp) +GuestFilesystemTrimResponse * +qmp_guest_fstrim(bool has_minimum, int64_t minimum, Error **errp) { error_setg(errp, QERR_UNSUPPORTED); + return NULL; } typedef enum { @@ -589,12 +897,220 @@ void qmp_guest_suspend_hybrid(Error **errp) error_setg(errp, QERR_UNSUPPORTED); } -GuestNetworkInterfaceList *qmp_guest_network_get_interfaces(Error **errp) +static IP_ADAPTER_ADDRESSES *guest_get_adapters_addresses(Error **errp) { - error_setg(errp, QERR_UNSUPPORTED); + IP_ADAPTER_ADDRESSES *adptr_addrs = NULL; + ULONG adptr_addrs_len = 0; + DWORD ret; + + /* Call the first time to get the adptr_addrs_len. */ + GetAdaptersAddresses(AF_UNSPEC, GAA_FLAG_INCLUDE_PREFIX, + NULL, adptr_addrs, &adptr_addrs_len); + + adptr_addrs = g_malloc(adptr_addrs_len); + ret = GetAdaptersAddresses(AF_UNSPEC, GAA_FLAG_INCLUDE_PREFIX, + NULL, adptr_addrs, &adptr_addrs_len); + if (ret != ERROR_SUCCESS) { + error_setg_win32(errp, ret, "failed to get adapters addresses"); + g_free(adptr_addrs); + adptr_addrs = NULL; + } + return adptr_addrs; +} + +static char *guest_wctomb_dup(WCHAR *wstr) +{ + char *str; + size_t i; + + i = wcslen(wstr) + 1; + str = g_malloc(i); + WideCharToMultiByte(CP_ACP, WC_COMPOSITECHECK, + wstr, -1, str, i, NULL, NULL); + return str; +} + +static char *guest_addr_to_str(IP_ADAPTER_UNICAST_ADDRESS *ip_addr, + Error **errp) +{ + char addr_str[INET6_ADDRSTRLEN + INET_ADDRSTRLEN]; + DWORD len; + int ret; + + if (ip_addr->Address.lpSockaddr->sa_family == AF_INET || + ip_addr->Address.lpSockaddr->sa_family == AF_INET6) { + len = sizeof(addr_str); + ret = WSAAddressToString(ip_addr->Address.lpSockaddr, + ip_addr->Address.iSockaddrLength, + NULL, + addr_str, + &len); + if (ret != 0) { + error_setg_win32(errp, WSAGetLastError(), + "failed address presentation form conversion"); + return NULL; + } + return g_strdup(addr_str); + } return NULL; } +#if (_WIN32_WINNT >= 0x0600) +static int64_t guest_ip_prefix(IP_ADAPTER_UNICAST_ADDRESS *ip_addr) +{ + /* For Windows Vista/2008 and newer, use the OnLinkPrefixLength + * field to obtain the prefix. + */ + return ip_addr->OnLinkPrefixLength; +} +#else +/* When using the Windows XP and 2003 build environment, do the best we can to + * figure out the prefix. + */ +static IP_ADAPTER_INFO *guest_get_adapters_info(void) +{ + IP_ADAPTER_INFO *adptr_info = NULL; + ULONG adptr_info_len = 0; + DWORD ret; + + /* Call the first time to get the adptr_info_len. */ + GetAdaptersInfo(adptr_info, &adptr_info_len); + + adptr_info = g_malloc(adptr_info_len); + ret = GetAdaptersInfo(adptr_info, &adptr_info_len); + if (ret != ERROR_SUCCESS) { + g_free(adptr_info); + adptr_info = NULL; + } + return adptr_info; +} + +static int64_t guest_ip_prefix(IP_ADAPTER_UNICAST_ADDRESS *ip_addr) +{ + int64_t prefix = -1; /* Use for AF_INET6 and unknown/undetermined values. */ + IP_ADAPTER_INFO *adptr_info, *info; + IP_ADDR_STRING *ip; + struct in_addr *p; + + if (ip_addr->Address.lpSockaddr->sa_family != AF_INET) { + return prefix; + } + adptr_info = guest_get_adapters_info(); + if (adptr_info == NULL) { + return prefix; + } + + /* Match up the passed in ip_addr with one found in adaptr_info. + * The matching one in adptr_info will have the netmask. + */ + p = &((struct sockaddr_in *)ip_addr->Address.lpSockaddr)->sin_addr; + for (info = adptr_info; info; info = info->Next) { + for (ip = &info->IpAddressList; ip; ip = ip->Next) { + if (p->S_un.S_addr == inet_addr(ip->IpAddress.String)) { + prefix = ctpop32(inet_addr(ip->IpMask.String)); + goto out; + } + } + } +out: + g_free(adptr_info); + return prefix; +} +#endif + +GuestNetworkInterfaceList *qmp_guest_network_get_interfaces(Error **errp) +{ + IP_ADAPTER_ADDRESSES *adptr_addrs, *addr; + IP_ADAPTER_UNICAST_ADDRESS *ip_addr = NULL; + GuestNetworkInterfaceList *head = NULL, *cur_item = NULL; + GuestIpAddressList *head_addr, *cur_addr; + GuestNetworkInterfaceList *info; + GuestIpAddressList *address_item = NULL; + unsigned char *mac_addr; + char *addr_str; + WORD wsa_version; + WSADATA wsa_data; + int ret; + + adptr_addrs = guest_get_adapters_addresses(errp); + if (adptr_addrs == NULL) { + return NULL; + } + + /* Make WSA APIs available. */ + wsa_version = MAKEWORD(2, 2); + ret = WSAStartup(wsa_version, &wsa_data); + if (ret != 0) { + error_setg_win32(errp, ret, "failed socket startup"); + goto out; + } + + for (addr = adptr_addrs; addr; addr = addr->Next) { + info = g_malloc0(sizeof(*info)); + + if (cur_item == NULL) { + head = cur_item = info; + } else { + cur_item->next = info; + cur_item = info; + } + + info->value = g_malloc0(sizeof(*info->value)); + info->value->name = guest_wctomb_dup(addr->FriendlyName); + + if (addr->PhysicalAddressLength != 0) { + mac_addr = addr->PhysicalAddress; + + info->value->hardware_address = + g_strdup_printf("%02x:%02x:%02x:%02x:%02x:%02x", + (int) mac_addr[0], (int) mac_addr[1], + (int) mac_addr[2], (int) mac_addr[3], + (int) mac_addr[4], (int) mac_addr[5]); + + info->value->has_hardware_address = true; + } + + head_addr = NULL; + cur_addr = NULL; + for (ip_addr = addr->FirstUnicastAddress; + ip_addr; + ip_addr = ip_addr->Next) { + addr_str = guest_addr_to_str(ip_addr, errp); + if (addr_str == NULL) { + continue; + } + + address_item = g_malloc0(sizeof(*address_item)); + + if (!cur_addr) { + head_addr = cur_addr = address_item; + } else { + cur_addr->next = address_item; + cur_addr = address_item; + } + + address_item->value = g_malloc0(sizeof(*address_item->value)); + address_item->value->ip_address = addr_str; + address_item->value->prefix = guest_ip_prefix(ip_addr); + if (ip_addr->Address.lpSockaddr->sa_family == AF_INET) { + address_item->value->ip_address_type = + GUEST_IP_ADDRESS_TYPE_IPV4; + } else if (ip_addr->Address.lpSockaddr->sa_family == AF_INET6) { + address_item->value->ip_address_type = + GUEST_IP_ADDRESS_TYPE_IPV6; + } + } + if (head_addr) { + info->value->has_ip_addresses = true; + info->value->ip_addresses = head_addr; + } + } + WSACleanup(); +out: + g_free(adptr_addrs); + return head; +} + int64_t qmp_guest_get_time(Error **errp) { SYSTEMTIME ts = {0}; @@ -707,12 +1223,12 @@ GuestMemoryBlockInfo *qmp_guest_get_memory_block_info(Error **errp) GList *ga_command_blacklist_init(GList *blacklist) { const char *list_unsupported[] = { - "guest-suspend-hybrid", "guest-network-get-interfaces", + "guest-suspend-hybrid", "guest-get-vcpus", "guest-set-vcpus", "guest-set-user-password", "guest-get-memory-blocks", "guest-set-memory-blocks", "guest-get-memory-block-size", - "guest-fsfreeze-freeze-list", "guest-get-fsinfo", + "guest-fsfreeze-freeze-list", "guest-fstrim", NULL}; char **p = (char **)list_unsupported; diff --git a/qga/main.c b/qga/main.c index 23cde0104a..791982ef01 100644 --- a/qga/main.c +++ b/qga/main.c @@ -274,7 +274,7 @@ static void ga_log(const gchar *domain, GLogLevelFlags level, level &= G_LOG_LEVEL_MASK; #ifndef _WIN32 - if (domain && strcmp(domain, "syslog") == 0) { + if (g_strcmp0(domain, "syslog") == 0) { syslog(LOG_INFO, "%s: %s", level_str, msg); } else if (level & s->log_level) { #else diff --git a/qga/qapi-schema.json b/qga/qapi-schema.json index b446dc729d..8a9b818d18 100644 --- a/qga/qapi-schema.json +++ b/qga/qapi-schema.json @@ -425,6 +425,30 @@ 'returns': 'int' } ## +# @GuestFilesystemTrimResult +# +# @path: path that was trimmed +# @error: an error message when trim failed +# @trimmed: bytes trimmed for this path +# @minimum: reported effective minimum for this path +# +# Since: 2.4 +## +{ 'struct': 'GuestFilesystemTrimResult', + 'data': {'path': 'str', + '*trimmed': 'int', '*minimum': 'int', '*error': 'str'} } + +## +# @GuestFilesystemTrimResponse +# +# @paths: list of @GuestFilesystemTrimResult per path that was trimmed +# +# Since: 2.4 +## +{ 'struct': 'GuestFilesystemTrimResponse', + 'data': {'paths': ['GuestFilesystemTrimResult']} } + +## # @guest-fstrim: # # Discard (or "trim") blocks which are not in use by the filesystem. @@ -437,12 +461,14 @@ # fragmented free space, although not all blocks will be discarded. # The default value is zero, meaning "discard every free block". # -# Returns: Nothing. +# Returns: A @GuestFilesystemTrimResponse which contains the +# status of all trimmed paths. (since 2.4) # # Since: 1.2 ## { 'command': 'guest-fstrim', - 'data': { '*minimum': 'int' } } + 'data': { '*minimum': 'int' }, + 'returns': 'GuestFilesystemTrimResponse' } ## # @guest-suspend-disk @@ -677,12 +703,24 @@ # @uml: UML disks # @sata: SATA disks # @sd: SD cards +# @unknown: Unknown bus type +# @ieee1394: Win IEEE 1394 bus type +# @ssa: Win SSA bus type +# @fibre: Win fiber channel bus type +# @raid: Win RAID bus type +# @iscsi: Win iScsi bus type +# @sas: Win serial-attaches SCSI bus type +# @mmc: Win multimedia card (MMC) bus type +# @virtual: Win virtual bus type +# @file-backed virtual: Win file-backed bus type # # Since: 2.2 ## { 'enum': 'GuestDiskBusType', 'data': [ 'ide', 'fdc', 'scsi', 'virtio', 'xen', 'usb', 'uml', 'sata', - 'sd' ] } + 'sd', 'unknown', 'ieee1394', 'ssa', 'fibre', 'raid', 'iscsi', + 'sas', 'mmc', 'virtual', 'file-backed-virtual' ] } + ## # @GuestPCIAddress: diff --git a/roms/SLOF b/roms/SLOF -Subproject c89b0df661c0a6bfa9ff0ed4a371f631f5ee38b +Subproject 7d766a3ac9b2474f6c7da0084d43590cbbf047b diff --git a/scripts/update-linux-headers.sh b/scripts/update-linux-headers.sh index baf4220b84..47378d93d4 100755 --- a/scripts/update-linux-headers.sh +++ b/scripts/update-linux-headers.sh @@ -31,7 +31,7 @@ fi cp_virtio() { from=$1 to=$2 - virtio=$(find "$from" -name '*virtio*h' -o -name "input.h") + virtio=$(find "$from" -name '*virtio*h' -o -name "input.h" -o -name "pci_regs.h") if [ "$virtio" ]; then rm -rf "$to" mkdir -p "$to" diff --git a/target-i386/cpu.c b/target-i386/cpu.c index 36b07f99aa..b4f9461969 100644 --- a/target-i386/cpu.c +++ b/target-i386/cpu.c @@ -286,6 +286,17 @@ static const char *cpuid_xsave_feature_name[] = { NULL, NULL, NULL, NULL, }; +static const char *cpuid_6_feature_name[] = { + NULL, NULL, "arat", NULL, + NULL, NULL, NULL, NULL, + NULL, NULL, NULL, NULL, + NULL, NULL, NULL, NULL, + NULL, NULL, NULL, NULL, + NULL, NULL, NULL, NULL, + NULL, NULL, NULL, NULL, + NULL, NULL, NULL, NULL, +}; + #define I486_FEATURES (CPUID_FP87 | CPUID_VME | CPUID_PSE) #define PENTIUM_FEATURES (I486_FEATURES | CPUID_DE | CPUID_TSC | \ CPUID_MSR | CPUID_MCE | CPUID_CX8 | CPUID_MMX | CPUID_APIC) @@ -341,6 +352,7 @@ static const char *cpuid_xsave_feature_name[] = { CPUID_7_0_EBX_ERMS, CPUID_7_0_EBX_INVPCID, CPUID_7_0_EBX_RTM, CPUID_7_0_EBX_RDSEED */ #define TCG_APM_FEATURES 0 +#define TCG_6_EAX_FEATURES CPUID_6_EAX_ARAT typedef struct FeatureWordInfo { @@ -410,6 +422,11 @@ static FeatureWordInfo feature_word_info[FEATURE_WORDS] = { .cpuid_reg = R_EAX, .tcg_features = 0, }, + [FEAT_6_EAX] = { + .feat_names = cpuid_6_feature_name, + .cpuid_eax = 6, .cpuid_reg = R_EAX, + .tcg_features = TCG_6_EAX_FEATURES, + }, }; typedef struct X86RegisterInfo32 { @@ -1003,6 +1020,8 @@ static X86CPUDefinition builtin_x86_defs[] = { CPUID_EXT2_LM | CPUID_EXT2_SYSCALL | CPUID_EXT2_NX, .features[FEAT_8000_0001_ECX] = CPUID_EXT3_LAHF_LM, + .features[FEAT_6_EAX] = + CPUID_6_EAX_ARAT, .xlevel = 0x8000000A, .model_id = "Westmere E56xx/L56xx/X56xx (Nehalem-C)", }, @@ -1032,6 +1051,8 @@ static X86CPUDefinition builtin_x86_defs[] = { CPUID_EXT3_LAHF_LM, .features[FEAT_XSAVE] = CPUID_XSAVE_XSAVEOPT, + .features[FEAT_6_EAX] = + CPUID_6_EAX_ARAT, .xlevel = 0x8000000A, .model_id = "Intel Xeon E312xx (Sandy Bridge)", }, @@ -1064,6 +1085,8 @@ static X86CPUDefinition builtin_x86_defs[] = { CPUID_EXT3_LAHF_LM, .features[FEAT_XSAVE] = CPUID_XSAVE_XSAVEOPT, + .features[FEAT_6_EAX] = + CPUID_6_EAX_ARAT, .xlevel = 0x8000000A, .model_id = "Intel Xeon E3-12xx v2 (Ivy Bridge)", }, @@ -1098,6 +1121,8 @@ static X86CPUDefinition builtin_x86_defs[] = { CPUID_7_0_EBX_BMI2 | CPUID_7_0_EBX_ERMS | CPUID_7_0_EBX_INVPCID, .features[FEAT_XSAVE] = CPUID_XSAVE_XSAVEOPT, + .features[FEAT_6_EAX] = + CPUID_6_EAX_ARAT, .xlevel = 0x8000000A, .model_id = "Intel Core Processor (Haswell, no TSX)", }, { @@ -1132,6 +1157,8 @@ static X86CPUDefinition builtin_x86_defs[] = { CPUID_7_0_EBX_RTM, .features[FEAT_XSAVE] = CPUID_XSAVE_XSAVEOPT, + .features[FEAT_6_EAX] = + CPUID_6_EAX_ARAT, .xlevel = 0x8000000A, .model_id = "Intel Core Processor (Haswell)", }, @@ -1168,6 +1195,8 @@ static X86CPUDefinition builtin_x86_defs[] = { CPUID_7_0_EBX_SMAP, .features[FEAT_XSAVE] = CPUID_XSAVE_XSAVEOPT, + .features[FEAT_6_EAX] = + CPUID_6_EAX_ARAT, .xlevel = 0x8000000A, .model_id = "Intel Core Processor (Broadwell, no TSX)", }, @@ -1204,6 +1233,8 @@ static X86CPUDefinition builtin_x86_defs[] = { CPUID_7_0_EBX_SMAP, .features[FEAT_XSAVE] = CPUID_XSAVE_XSAVEOPT, + .features[FEAT_6_EAX] = + CPUID_6_EAX_ARAT, .xlevel = 0x8000000A, .model_id = "Intel Core Processor (Broadwell)", }, @@ -2359,7 +2390,7 @@ void cpu_x86_cpuid(CPUX86State *env, uint32_t index, uint32_t count, break; case 6: /* Thermal and Power Leaf */ - *eax = 0; + *eax = env->features[FEAT_6_EAX]; *ebx = 0; *ecx = 0; *edx = 0; diff --git a/target-i386/cpu.h b/target-i386/cpu.h index ac39291b48..14dced08a7 100644 --- a/target-i386/cpu.h +++ b/target-i386/cpu.h @@ -415,6 +415,7 @@ typedef enum FeatureWord { FEAT_KVM, /* CPUID[4000_0001].EAX (KVM_CPUID_FEATURES) */ FEAT_SVM, /* CPUID[8000_000A].EDX */ FEAT_XSAVE, /* CPUID[EAX=0xd,ECX=1].EAX */ + FEAT_6_EAX, /* CPUID[6].EAX */ FEATURE_WORDS, } FeatureWord; @@ -580,6 +581,8 @@ typedef uint32_t FeatureWordArray[FEATURE_WORDS]; #define CPUID_XSAVE_XGETBV1 (1U << 2) #define CPUID_XSAVE_XSAVES (1U << 3) +#define CPUID_6_EAX_ARAT (1U << 2) + /* CPUID[0x80000007].EDX flags: */ #define CPUID_APM_INVTSC (1U << 8) @@ -959,7 +962,7 @@ typedef struct CPUX86State { uint8_t has_error_code; uint32_t sipi_vector; bool tsc_valid; - int tsc_khz; + int64_t tsc_khz; void *kvm_xsave_buf; uint64_t mcg_cap; diff --git a/target-i386/kvm.c b/target-i386/kvm.c index 9038bf7077..066d03d99e 100644 --- a/target-i386/kvm.c +++ b/target-i386/kvm.c @@ -238,6 +238,8 @@ uint32_t kvm_arch_get_supported_cpuid(KVMState *s, uint32_t function, if (!kvm_irqchip_in_kernel()) { ret &= ~CPUID_EXT_X2APIC; } + } else if (function == 6 && reg == R_EAX) { + ret |= CPUID_6_EAX_ARAT; /* safe to allow because of emulated APIC */ } else if (function == 0x80000001 && reg == R_EDX) { /* On Intel, kvm returns cpuid according to the Intel spec, * so add missing bits according to the AMD spec: diff --git a/target-ppc/kvm.c b/target-ppc/kvm.c index ddf469fe09..110436d088 100644 --- a/target-ppc/kvm.c +++ b/target-ppc/kvm.c @@ -40,6 +40,7 @@ #include "trace.h" #include "exec/gdbstub.h" #include "exec/memattrs.h" +#include "sysemu/hostmem.h" //#define DEBUG_KVM @@ -303,16 +304,11 @@ static void kvm_get_smmu_info(PowerPCCPU *cpu, struct kvm_ppc_smmu_info *info) kvm_get_fallback_smmu_info(cpu, info); } -static long getrampagesize(void) +static long gethugepagesize(const char *mem_path) { struct statfs fs; int ret; - if (!mem_path) { - /* guest RAM is backed by normal anonymous pages */ - return getpagesize(); - } - do { ret = statfs(mem_path, &fs); } while (ret != 0 && errno == EINTR); @@ -334,6 +330,55 @@ static long getrampagesize(void) return fs.f_bsize; } +static int find_max_supported_pagesize(Object *obj, void *opaque) +{ + char *mem_path; + long *hpsize_min = opaque; + + if (object_dynamic_cast(obj, TYPE_MEMORY_BACKEND)) { + mem_path = object_property_get_str(obj, "mem-path", NULL); + if (mem_path) { + long hpsize = gethugepagesize(mem_path); + if (hpsize < *hpsize_min) { + *hpsize_min = hpsize; + } + } else { + *hpsize_min = getpagesize(); + } + } + + return 0; +} + +static long getrampagesize(void) +{ + long hpsize = LONG_MAX; + Object *memdev_root; + + if (mem_path) { + return gethugepagesize(mem_path); + } + + /* it's possible we have memory-backend objects with + * hugepage-backed RAM. these may get mapped into system + * address space via -numa parameters or memory hotplug + * hooks. we want to take these into account, but we + * also want to make sure these supported hugepage + * sizes are applicable across the entire range of memory + * we may boot from, so we take the min across all + * backends, and assume normal pages in cases where a + * backend isn't backed by hugepages. + */ + memdev_root = object_resolve_path("/objects", NULL); + if (!memdev_root) { + return getpagesize(); + } + + object_child_foreach(memdev_root, find_max_supported_pagesize, &hpsize); + + return (hpsize == LONG_MAX) ? getpagesize() : hpsize; +} + static bool kvm_valid_page_size(uint32_t flags, long rampgsize, uint32_t shift) { if (!(flags & KVM_PPC_PAGE_SIZES_REAL)) { diff --git a/target-s390x/int_helper.c b/target-s390x/int_helper.c index 2c2b3f622c..a46c736d67 100644 --- a/target-s390x/int_helper.c +++ b/target-s390x/int_helper.c @@ -121,11 +121,12 @@ uint64_t HELPER(clz)(uint64_t v) return clz64(v); } -uint64_t HELPER(cvd)(int32_t bin) +uint64_t HELPER(cvd)(int32_t reg) { /* positive 0 */ uint64_t dec = 0x0c; - int shift = 4; + int64_t bin = reg; + int shift; if (bin < 0) { bin = -bin; @@ -133,9 +134,7 @@ uint64_t HELPER(cvd)(int32_t bin) } for (shift = 4; (shift < 64) && bin; shift += 4) { - int current_number = bin % 10; - - dec |= (current_number) << shift; + dec |= (bin % 10) << shift; bin /= 10; } diff --git a/target-s390x/mem_helper.c b/target-s390x/mem_helper.c index 3ccbeb99e4..6f8bd796ad 100644 --- a/target-s390x/mem_helper.c +++ b/target-s390x/mem_helper.c @@ -482,6 +482,7 @@ uint32_t HELPER(ex)(CPUS390XState *env, uint32_t cc, uint64_t v1, case 0xc00: helper_tr(env, l, get_address(env, 0, b1, d1), get_address(env, 0, b2, d2)); + break; case 0xd00: cc = helper_trt(env, l, get_address(env, 0, b1, d1), get_address(env, 0, b2, d2)); @@ -550,7 +551,7 @@ uint32_t HELPER(mvcl)(CPUS390XState *env, uint32_t r1, uint32_t r2) uint64_t dest = get_address_31fix(env, r1); uint64_t srclen = env->regs[r2 + 1] & 0xffffff; uint64_t src = get_address_31fix(env, r2); - uint8_t pad = src >> 24; + uint8_t pad = env->regs[r2 + 1] >> 24; uint8_t v; uint32_t cc; diff --git a/tcg/s390/tcg-target.c b/tcg/s390/tcg-target.c index 669fafe24f..921991ebfa 100644 --- a/tcg/s390/tcg-target.c +++ b/tcg/s390/tcg-target.c @@ -1643,8 +1643,10 @@ static void tcg_out_qemu_ld(TCGContext* s, TCGReg data_reg, TCGReg addr_reg, base_reg = tcg_out_tlb_read(s, addr_reg, opc, mem_index, 1); - label_ptr = s->code_ptr + 1; - tcg_out_insn(s, RI, BRC, S390_CC_NE, 0); + /* We need to keep the offset unchanged for retranslation. */ + tcg_out16(s, RI_BRC | (S390_CC_NE << 4)); + label_ptr = s->code_ptr; + s->code_ptr += 1; tcg_out_qemu_ld_direct(s, opc, data_reg, base_reg, TCG_REG_R2, 0); @@ -1669,8 +1671,10 @@ static void tcg_out_qemu_st(TCGContext* s, TCGReg data_reg, TCGReg addr_reg, base_reg = tcg_out_tlb_read(s, addr_reg, opc, mem_index, 0); - label_ptr = s->code_ptr + 1; - tcg_out_insn(s, RI, BRC, S390_CC_NE, 0); + /* We need to keep the offset unchanged for retranslation. */ + tcg_out16(s, RI_BRC | (S390_CC_NE << 4)); + label_ptr = s->code_ptr; + s->code_ptr += 1; tcg_out_qemu_st_direct(s, opc, data_reg, base_reg, TCG_REG_R2, 0); diff --git a/tests/Makefile b/tests/Makefile index 09838ac094..2c4b8dc5ec 100644 --- a/tests/Makefile +++ b/tests/Makefile @@ -137,6 +137,9 @@ check-qtest-pci-y += tests/display-vga-test$(EXESUF) gcov-files-pci-y += hw/display/vga.c gcov-files-pci-y += hw/display/cirrus_vga.c gcov-files-pci-y += hw/display/vga-pci.c +gcov-files-pci-y += hw/display/virtio-gpu.c +gcov-files-pci-y += hw/display/virtio-gpu-pci.c +gcov-files-pci-$(CONFIG_VIRTIO_VGA) += hw/display/virtio-vga.c check-qtest-pci-y += tests/intel-hda-test$(EXESUF) gcov-files-pci-y += hw/audio/intel-hda.c hw/audio/hda-codec.c @@ -154,6 +157,7 @@ check-qtest-i386-y += tests/i440fx-test$(EXESUF) check-qtest-i386-y += tests/fw_cfg-test$(EXESUF) check-qtest-i386-y += tests/drive_del-test$(EXESUF) check-qtest-i386-y += tests/wdt_ib700-test$(EXESUF) +check-qtest-i386-y += tests/tco-test$(EXESUF) gcov-files-i386-y += hw/watchdog/watchdog.c hw/watchdog/wdt_ib700.c check-qtest-i386-y += $(check-qtest-pci-y) gcov-files-i386-y += $(gcov-files-pci-y) @@ -374,6 +378,7 @@ tests/eepro100-test$(EXESUF): tests/eepro100-test.o tests/vmxnet3-test$(EXESUF): tests/vmxnet3-test.o tests/ne2000-test$(EXESUF): tests/ne2000-test.o tests/wdt_ib700-test$(EXESUF): tests/wdt_ib700-test.o +tests/tco-test$(EXESUF): tests/tco-test.o $(libqos-pc-obj-y) tests/virtio-balloon-test$(EXESUF): tests/virtio-balloon-test.o tests/virtio-blk-test$(EXESUF): tests/virtio-blk-test.o $(libqos-virtio-obj-y) tests/virtio-net-test$(EXESUF): tests/virtio-net-test.o $(libqos-pc-obj-y) diff --git a/tests/display-vga-test.c b/tests/display-vga-test.c index 17f59101e8..7694344eaf 100644 --- a/tests/display-vga-test.c +++ b/tests/display-vga-test.c @@ -36,6 +36,20 @@ static void pci_multihead(void) qtest_end(); } +static void pci_virtio_gpu(void) +{ + qtest_start("-vga none -device virtio-gpu-pci"); + qtest_end(); +} + +#ifdef CONFIG_VIRTIO_VGA +static void pci_virtio_vga(void) +{ + qtest_start("-vga none -device virtio-vga"); + qtest_end(); +} +#endif + int main(int argc, char **argv) { int ret; @@ -46,6 +60,10 @@ int main(int argc, char **argv) qtest_add_func("/display/pci/stdvga", pci_stdvga); qtest_add_func("/display/pci/secondary", pci_secondary); qtest_add_func("/display/pci/multihead", pci_multihead); + qtest_add_func("/display/pci/virtio-gpu", pci_virtio_gpu); +#ifdef CONFIG_VIRTIO_VGA + qtest_add_func("/display/pci/virtio-vga", pci_virtio_vga); +#endif ret = g_test_run(); return ret; diff --git a/tests/libqos/ahci.c b/tests/libqos/ahci.c index 33ecd2abfb..cf66b3e32c 100644 --- a/tests/libqos/ahci.c +++ b/tests/libqos/ahci.c @@ -545,16 +545,18 @@ void ahci_destroy_command(AHCIQState *ahci, uint8_t port, uint8_t slot) ahci->port[port].prdtl[slot] = 0; } -void ahci_write_fis(AHCIQState *ahci, RegH2DFIS *fis, uint64_t addr) +void ahci_write_fis(AHCIQState *ahci, AHCICommand *cmd) { - RegH2DFIS tmp = *fis; + RegH2DFIS tmp = cmd->fis; + uint64_t addr = cmd->header.ctba; - /* The auxiliary FIS fields are defined per-command and are not - * currently implemented in libqos/ahci.o, but may or may not need - * to be flipped. */ - - /* All other FIS fields are 8 bit and do not need to be flipped. */ - tmp.count = cpu_to_le16(tmp.count); + /* NCQ commands use exclusively 8 bit fields and needs no adjustment. + * Only the count field needs to be adjusted for non-NCQ commands. + * The auxiliary FIS fields are defined per-command and are not currently + * implemented in libqos/ahci.o, but may or may not need to be flipped. */ + if (!cmd->props->ncq) { + tmp.count = cpu_to_le16(tmp.count); + } memwrite(addr, &tmp, sizeof(tmp)); } @@ -877,7 +879,7 @@ void ahci_command_commit(AHCIQState *ahci, AHCICommand *cmd, uint8_t port) /* Commit the command header and command FIS */ ahci_set_command_header(ahci, port, cmd->slot, &(cmd->header)); - ahci_write_fis(ahci, &(cmd->fis), table_ptr); + ahci_write_fis(ahci, cmd); /* Construct and write the PRDs to the command table */ g_assert_cmphex(prdtl, ==, cmd->header.prdtl); diff --git a/tests/libqos/ahci.h b/tests/libqos/ahci.h index a08a9ddac1..cffc2c351c 100644 --- a/tests/libqos/ahci.h +++ b/tests/libqos/ahci.h @@ -548,7 +548,7 @@ void ahci_get_command_header(AHCIQState *ahci, uint8_t port, void ahci_set_command_header(AHCIQState *ahci, uint8_t port, uint8_t slot, AHCICommandHeader *cmd); void ahci_destroy_command(AHCIQState *ahci, uint8_t port, uint8_t slot); -void ahci_write_fis(AHCIQState *ahci, RegH2DFIS *fis, uint64_t addr); +void ahci_write_fis(AHCIQState *ahci, AHCICommand *cmd); unsigned ahci_pick_cmd(AHCIQState *ahci, uint8_t port); unsigned size_to_prdtl(unsigned bytes, unsigned bytes_per_prd); void ahci_guest_io(AHCIQState *ahci, uint8_t port, uint8_t ide_cmd, diff --git a/tests/rocker/bridge-vlan b/tests/rocker/bridge-vlan index ef9e5f53bb..897d82c5c7 100755 --- a/tests/rocker/bridge-vlan +++ b/tests/rocker/bridge-vlan @@ -20,8 +20,8 @@ simp ssh tut sw1 --cmd "echo 1 | sudo dd of=/sys/class/net/br0/bridge/vlan_filte # add both ports to VLAN 57 -simp ssh tut sw1 --cmd "sudo /sbin/bridge vlan add vid 57 dev sw1p1 master self" -simp ssh tut sw1 --cmd "sudo /sbin/bridge vlan add vid 57 dev sw1p2 master self" +simp ssh tut sw1 --cmd "sudo /sbin/bridge vlan add vid 57 dev sw1p1" +simp ssh tut sw1 --cmd "sudo /sbin/bridge vlan add vid 57 dev sw1p2" # turn off learning and flooding in SW diff --git a/tests/rocker/bridge-vlan-stp b/tests/rocker/bridge-vlan-stp index c660312bc6..85d2646820 100755 --- a/tests/rocker/bridge-vlan-stp +++ b/tests/rocker/bridge-vlan-stp @@ -21,8 +21,8 @@ simp ssh tut sw1 --cmd "echo 1 | sudo dd of=/sys/class/net/br0/bridge/vlan_filte # add both ports to VLAN 57 -simp ssh tut sw1 --cmd "sudo /sbin/bridge vlan add vid 57 dev sw1p1 master self" -simp ssh tut sw1 --cmd "sudo /sbin/bridge vlan add vid 57 dev sw1p2 master self" +simp ssh tut sw1 --cmd "sudo /sbin/bridge vlan add vid 57 dev sw1p1" +simp ssh tut sw1 --cmd "sudo /sbin/bridge vlan add vid 57 dev sw1p2" # turn off learning and flooding in SW diff --git a/tests/tco-test.c b/tests/tco-test.c new file mode 100644 index 0000000000..419f7cf46c --- /dev/null +++ b/tests/tco-test.c @@ -0,0 +1,465 @@ +/* + * QEMU ICH9 TCO emulation tests + * + * Copyright (c) 2015 Paulo Alcantara <pcacjr@zytor.com> + * + * This work is licensed under the terms of the GNU GPL, version 2 or later. + * See the COPYING file in the top-level directory. + */ +#include <glib.h> +#include <string.h> +#include <stdio.h> +#include <stdlib.h> + +#include "libqtest.h" +#include "libqos/pci.h" +#include "libqos/pci-pc.h" +#include "hw/pci/pci_regs.h" +#include "hw/i386/ich9.h" +#include "hw/acpi/ich9.h" +#include "hw/acpi/tco.h" + +#define RCBA_BASE_ADDR 0xfed1c000 +#define PM_IO_BASE_ADDR 0xb000 + +enum { + TCO_RLD_DEFAULT = 0x0000, + TCO_DAT_IN_DEFAULT = 0x00, + TCO_DAT_OUT_DEFAULT = 0x00, + TCO1_STS_DEFAULT = 0x0000, + TCO2_STS_DEFAULT = 0x0000, + TCO1_CNT_DEFAULT = 0x0000, + TCO2_CNT_DEFAULT = 0x0008, + TCO_MESSAGE1_DEFAULT = 0x00, + TCO_MESSAGE2_DEFAULT = 0x00, + TCO_WDCNT_DEFAULT = 0x00, + TCO_TMR_DEFAULT = 0x0004, + SW_IRQ_GEN_DEFAULT = 0x03, +}; + +#define TCO_SECS_TO_TICKS(secs) (((secs) * 10) / 6) +#define TCO_TICKS_TO_SECS(ticks) (((ticks) * 6) / 10) + +typedef struct { + const char *args; + bool noreboot; + QPCIDevice *dev; + void *tco_io_base; +} TestData; + +static void test_init(TestData *d) +{ + QPCIBus *bus; + QTestState *qs; + char *s; + + s = g_strdup_printf("-machine q35 %s %s", + d->noreboot ? "" : "-global ICH9-LPC.noreboot=false", + !d->args ? "" : d->args); + qs = qtest_start(s); + qtest_irq_intercept_in(qs, "ioapic"); + g_free(s); + + bus = qpci_init_pc(); + d->dev = qpci_device_find(bus, QPCI_DEVFN(0x1f, 0x00)); + g_assert(d->dev != NULL); + + qpci_device_enable(d->dev); + + /* set ACPI PM I/O space base address */ + qpci_config_writel(d->dev, ICH9_LPC_PMBASE, PM_IO_BASE_ADDR | 0x1); + /* enable ACPI I/O */ + qpci_config_writeb(d->dev, ICH9_LPC_ACPI_CTRL, 0x80); + /* set Root Complex BAR */ + qpci_config_writel(d->dev, ICH9_LPC_RCBA, RCBA_BASE_ADDR | 0x1); + + d->tco_io_base = (void *)((uintptr_t)PM_IO_BASE_ADDR + 0x60); +} + +static void stop_tco(const TestData *d) +{ + uint32_t val; + + val = qpci_io_readw(d->dev, d->tco_io_base + TCO1_CNT); + val |= TCO_TMR_HLT; + qpci_io_writew(d->dev, d->tco_io_base + TCO1_CNT, val); +} + +static void start_tco(const TestData *d) +{ + uint32_t val; + + val = qpci_io_readw(d->dev, d->tco_io_base + TCO1_CNT); + val &= ~TCO_TMR_HLT; + qpci_io_writew(d->dev, d->tco_io_base + TCO1_CNT, val); +} + +static void load_tco(const TestData *d) +{ + qpci_io_writew(d->dev, d->tco_io_base + TCO_RLD, 4); +} + +static void set_tco_timeout(const TestData *d, uint16_t ticks) +{ + qpci_io_writew(d->dev, d->tco_io_base + TCO_TMR, ticks); +} + +static void clear_tco_status(const TestData *d) +{ + qpci_io_writew(d->dev, d->tco_io_base + TCO1_STS, 0x0008); + qpci_io_writew(d->dev, d->tco_io_base + TCO2_STS, 0x0002); + qpci_io_writew(d->dev, d->tco_io_base + TCO2_STS, 0x0004); +} + +static void reset_on_second_timeout(bool enable) +{ + uint32_t val; + + val = readl(RCBA_BASE_ADDR + ICH9_CC_GCS); + if (enable) { + val &= ~ICH9_CC_GCS_NO_REBOOT; + } else { + val |= ICH9_CC_GCS_NO_REBOOT; + } + writel(RCBA_BASE_ADDR + ICH9_CC_GCS, val); +} + +static void test_tco_defaults(void) +{ + TestData d; + + d.args = NULL; + d.noreboot = true; + test_init(&d); + g_assert_cmpint(qpci_io_readw(d.dev, d.tco_io_base + TCO_RLD), ==, + TCO_RLD_DEFAULT); + /* TCO_DAT_IN & TCO_DAT_OUT */ + g_assert_cmpint(qpci_io_readw(d.dev, d.tco_io_base + TCO_DAT_IN), ==, + (TCO_DAT_OUT_DEFAULT << 8) | TCO_DAT_IN_DEFAULT); + /* TCO1_STS & TCO2_STS */ + g_assert_cmpint(qpci_io_readl(d.dev, d.tco_io_base + TCO1_STS), ==, + (TCO2_STS_DEFAULT << 16) | TCO1_STS_DEFAULT); + /* TCO1_CNT & TCO2_CNT */ + g_assert_cmpint(qpci_io_readl(d.dev, d.tco_io_base + TCO1_CNT), ==, + (TCO2_CNT_DEFAULT << 16) | TCO1_CNT_DEFAULT); + /* TCO_MESSAGE1 & TCO_MESSAGE2 */ + g_assert_cmpint(qpci_io_readw(d.dev, d.tco_io_base + TCO_MESSAGE1), ==, + (TCO_MESSAGE2_DEFAULT << 8) | TCO_MESSAGE1_DEFAULT); + g_assert_cmpint(qpci_io_readb(d.dev, d.tco_io_base + TCO_WDCNT), ==, + TCO_WDCNT_DEFAULT); + g_assert_cmpint(qpci_io_readb(d.dev, d.tco_io_base + SW_IRQ_GEN), ==, + SW_IRQ_GEN_DEFAULT); + g_assert_cmpint(qpci_io_readw(d.dev, d.tco_io_base + TCO_TMR), ==, + TCO_TMR_DEFAULT); + qtest_end(); +} + +static void test_tco_timeout(void) +{ + TestData d; + const uint16_t ticks = TCO_SECS_TO_TICKS(4); + uint32_t val; + int ret; + + d.args = NULL; + d.noreboot = true; + test_init(&d); + + stop_tco(&d); + clear_tco_status(&d); + reset_on_second_timeout(false); + set_tco_timeout(&d, ticks); + load_tco(&d); + start_tco(&d); + clock_step(ticks * TCO_TICK_NSEC); + + /* test first timeout */ + val = qpci_io_readw(d.dev, d.tco_io_base + TCO1_STS); + ret = val & TCO_TIMEOUT ? 1 : 0; + g_assert(ret == 1); + + /* test clearing timeout bit */ + val |= TCO_TIMEOUT; + qpci_io_writew(d.dev, d.tco_io_base + TCO1_STS, val); + val = qpci_io_readw(d.dev, d.tco_io_base + TCO1_STS); + ret = val & TCO_TIMEOUT ? 1 : 0; + g_assert(ret == 0); + + /* test second timeout */ + clock_step(ticks * TCO_TICK_NSEC); + val = qpci_io_readw(d.dev, d.tco_io_base + TCO1_STS); + ret = val & TCO_TIMEOUT ? 1 : 0; + g_assert(ret == 1); + val = qpci_io_readw(d.dev, d.tco_io_base + TCO2_STS); + ret = val & TCO_SECOND_TO_STS ? 1 : 0; + g_assert(ret == 1); + + stop_tco(&d); + qtest_end(); +} + +static void test_tco_max_timeout(void) +{ + TestData d; + const uint16_t ticks = 0xffff; + uint32_t val; + int ret; + + d.args = NULL; + d.noreboot = true; + test_init(&d); + + stop_tco(&d); + clear_tco_status(&d); + reset_on_second_timeout(false); + set_tco_timeout(&d, ticks); + load_tco(&d); + start_tco(&d); + clock_step(((ticks & TCO_TMR_MASK) - 1) * TCO_TICK_NSEC); + + val = qpci_io_readw(d.dev, d.tco_io_base + TCO_RLD); + g_assert_cmpint(val & TCO_RLD_MASK, ==, 1); + val = qpci_io_readw(d.dev, d.tco_io_base + TCO1_STS); + ret = val & TCO_TIMEOUT ? 1 : 0; + g_assert(ret == 0); + clock_step(TCO_TICK_NSEC); + val = qpci_io_readw(d.dev, d.tco_io_base + TCO1_STS); + ret = val & TCO_TIMEOUT ? 1 : 0; + g_assert(ret == 1); + + stop_tco(&d); + qtest_end(); +} + +static QDict *get_watchdog_action(void) +{ + QDict *ev = qmp(""); + QDict *data; + g_assert(!strcmp(qdict_get_str(ev, "event"), "WATCHDOG")); + + data = qdict_get_qdict(ev, "data"); + QINCREF(data); + QDECREF(ev); + return data; +} + +static void test_tco_second_timeout_pause(void) +{ + TestData td; + const uint16_t ticks = TCO_SECS_TO_TICKS(32); + QDict *ad; + + td.args = "-watchdog-action pause"; + td.noreboot = false; + test_init(&td); + + stop_tco(&td); + clear_tco_status(&td); + reset_on_second_timeout(true); + set_tco_timeout(&td, TCO_SECS_TO_TICKS(16)); + load_tco(&td); + start_tco(&td); + clock_step(ticks * TCO_TICK_NSEC * 2); + ad = get_watchdog_action(); + g_assert(!strcmp(qdict_get_str(ad, "action"), "pause")); + QDECREF(ad); + + stop_tco(&td); + qtest_end(); +} + +static void test_tco_second_timeout_reset(void) +{ + TestData td; + const uint16_t ticks = TCO_SECS_TO_TICKS(16); + QDict *ad; + + td.args = "-watchdog-action reset"; + td.noreboot = false; + test_init(&td); + + stop_tco(&td); + clear_tco_status(&td); + reset_on_second_timeout(true); + set_tco_timeout(&td, TCO_SECS_TO_TICKS(16)); + load_tco(&td); + start_tco(&td); + clock_step(ticks * TCO_TICK_NSEC * 2); + ad = get_watchdog_action(); + g_assert(!strcmp(qdict_get_str(ad, "action"), "reset")); + QDECREF(ad); + + stop_tco(&td); + qtest_end(); +} + +static void test_tco_second_timeout_shutdown(void) +{ + TestData td; + const uint16_t ticks = TCO_SECS_TO_TICKS(128); + QDict *ad; + + td.args = "-watchdog-action shutdown"; + td.noreboot = false; + test_init(&td); + + stop_tco(&td); + clear_tco_status(&td); + reset_on_second_timeout(true); + set_tco_timeout(&td, ticks); + load_tco(&td); + start_tco(&td); + clock_step(ticks * TCO_TICK_NSEC * 2); + ad = get_watchdog_action(); + g_assert(!strcmp(qdict_get_str(ad, "action"), "shutdown")); + QDECREF(ad); + + stop_tco(&td); + qtest_end(); +} + +static void test_tco_second_timeout_none(void) +{ + TestData td; + const uint16_t ticks = TCO_SECS_TO_TICKS(256); + QDict *ad; + + td.args = "-watchdog-action none"; + td.noreboot = false; + test_init(&td); + + stop_tco(&td); + clear_tco_status(&td); + reset_on_second_timeout(true); + set_tco_timeout(&td, ticks); + load_tco(&td); + start_tco(&td); + clock_step(ticks * TCO_TICK_NSEC * 2); + ad = get_watchdog_action(); + g_assert(!strcmp(qdict_get_str(ad, "action"), "none")); + QDECREF(ad); + + stop_tco(&td); + qtest_end(); +} + +static void test_tco_ticks_counter(void) +{ + TestData d; + uint16_t ticks = TCO_SECS_TO_TICKS(8); + uint16_t rld; + + d.args = NULL; + d.noreboot = true; + test_init(&d); + + stop_tco(&d); + clear_tco_status(&d); + reset_on_second_timeout(false); + set_tco_timeout(&d, ticks); + load_tco(&d); + start_tco(&d); + + do { + rld = qpci_io_readw(d.dev, d.tco_io_base + TCO_RLD) & TCO_RLD_MASK; + g_assert_cmpint(rld, ==, ticks); + clock_step(TCO_TICK_NSEC); + ticks--; + } while (!(qpci_io_readw(d.dev, d.tco_io_base + TCO1_STS) & TCO_TIMEOUT)); + + stop_tco(&d); + qtest_end(); +} + +static void test_tco1_control_bits(void) +{ + TestData d; + uint16_t val; + + d.args = NULL; + d.noreboot = true; + test_init(&d); + + val = TCO_LOCK; + qpci_io_writew(d.dev, d.tco_io_base + TCO1_CNT, val); + val &= ~TCO_LOCK; + qpci_io_writew(d.dev, d.tco_io_base + TCO1_CNT, val); + g_assert_cmpint(qpci_io_readw(d.dev, d.tco_io_base + TCO1_CNT), ==, + TCO_LOCK); + qtest_end(); +} + +static void test_tco1_status_bits(void) +{ + TestData d; + uint16_t ticks = 8; + uint16_t val; + int ret; + + d.args = NULL; + d.noreboot = true; + test_init(&d); + + stop_tco(&d); + clear_tco_status(&d); + reset_on_second_timeout(false); + set_tco_timeout(&d, ticks); + load_tco(&d); + start_tco(&d); + clock_step(ticks * TCO_TICK_NSEC); + + qpci_io_writeb(d.dev, d.tco_io_base + TCO_DAT_IN, 0); + qpci_io_writeb(d.dev, d.tco_io_base + TCO_DAT_OUT, 0); + val = qpci_io_readw(d.dev, d.tco_io_base + TCO1_STS); + ret = val & (TCO_TIMEOUT | SW_TCO_SMI | TCO_INT_STS) ? 1 : 0; + g_assert(ret == 1); + qpci_io_writew(d.dev, d.tco_io_base + TCO1_STS, val); + g_assert_cmpint(qpci_io_readw(d.dev, d.tco_io_base + TCO1_STS), ==, 0); + qtest_end(); +} + +static void test_tco2_status_bits(void) +{ + TestData d; + uint16_t ticks = 8; + uint16_t val; + int ret; + + d.args = NULL; + d.noreboot = true; + test_init(&d); + + stop_tco(&d); + clear_tco_status(&d); + reset_on_second_timeout(true); + set_tco_timeout(&d, ticks); + load_tco(&d); + start_tco(&d); + clock_step(ticks * TCO_TICK_NSEC * 2); + + val = qpci_io_readw(d.dev, d.tco_io_base + TCO2_STS); + ret = val & (TCO_SECOND_TO_STS | TCO_BOOT_STS) ? 1 : 0; + g_assert(ret == 1); + qpci_io_writew(d.dev, d.tco_io_base + TCO2_STS, val); + g_assert_cmpint(qpci_io_readw(d.dev, d.tco_io_base + TCO2_STS), ==, 0); + qtest_end(); +} + +int main(int argc, char **argv) +{ + g_test_init(&argc, &argv, NULL); + + qtest_add_func("tco/defaults", test_tco_defaults); + qtest_add_func("tco/timeout/no_action", test_tco_timeout); + qtest_add_func("tco/timeout/no_action/max", test_tco_max_timeout); + qtest_add_func("tco/second_timeout/pause", test_tco_second_timeout_pause); + qtest_add_func("tco/second_timeout/reset", test_tco_second_timeout_reset); + qtest_add_func("tco/second_timeout/shutdown", + test_tco_second_timeout_shutdown); + qtest_add_func("tco/second_timeout/none", test_tco_second_timeout_none); + qtest_add_func("tco/counter", test_tco_ticks_counter); + qtest_add_func("tco/tco1_control/bits", test_tco1_control_bits); + qtest_add_func("tco/tco1_status/bits", test_tco1_status_bits); + qtest_add_func("tco/tco2_status/bits", test_tco2_status_bits); + return g_test_run(); +} diff --git a/trace-events b/trace-events index 52b7efa9a4..2d395c59a9 100644 --- a/trace-events +++ b/trace-events @@ -1191,6 +1191,7 @@ qemu_loadvm_state_section_partend(uint32_t section_id) "%u" qemu_loadvm_state_section_startfull(uint32_t section_id, const char *idstr, uint32_t instance_id, uint32_t version_id) "%u(%s) %u %u" savevm_section_start(const char *id, unsigned int section_id) "%s, section_id %u" savevm_section_end(const char *id, unsigned int section_id, int ret) "%s, section_id %u -> %d" +savevm_section_skip(const char *id, unsigned int section_id) "%s, section_id %u" savevm_state_begin(void) "" savevm_state_header(void) "" savevm_state_iterate(void) "" @@ -1403,10 +1404,13 @@ migrate_fd_error(void) "" migrate_fd_cancel(void) "" migrate_pending(uint64_t size, uint64_t max) "pending size %" PRIu64 " max %" PRIu64 migrate_transferred(uint64_t tranferred, uint64_t time_spent, double bandwidth, uint64_t size) "transferred %" PRIu64 " time_spent %" PRIu64 " bandwidth %g max_size %" PRId64 +migrate_state_too_big(void) "" +migrate_global_state_post_load(const char *state) "loaded state: %s" +migrate_global_state_pre_save(const char *state) "saved state: %s" # migration/rdma.c -qemu_dma_accept_incoming_migration(void) "" -qemu_dma_accept_incoming_migration_accepted(void) "" +qemu_rdma_accept_incoming_migration(void) "" +qemu_rdma_accept_incoming_migration_accepted(void) "" qemu_rdma_accept_pin_state(bool pin) "%d" qemu_rdma_accept_pin_verbsc(void *verbs) "Verbs context after listen: %p" qemu_rdma_block_for_wrid_miss(const char *wcompstr, int wcomp, const char *gcompstr, uint64_t req) "A Wanted wrid %s (%d) but got %s (%" PRIu64 ")" @@ -1433,13 +1437,14 @@ qemu_rdma_register_and_get_keys(uint64_t len, void *start) "Registering %" PRIu6 qemu_rdma_registration_handle_compress(int64_t length, int index, int64_t offset) "Zapping zero chunk: %" PRId64 " bytes, index %d, offset %" PRId64 qemu_rdma_registration_handle_finished(void) "" qemu_rdma_registration_handle_ram_blocks(void) "" +qemu_rdma_registration_handle_ram_blocks_loop(const char *name, uint64_t offset, uint64_t length, void *local_host_addr, unsigned int src_index) "%s: @%" PRIx64 "/%" PRIu64 " host:@%p src_index: %u" qemu_rdma_registration_handle_register(int requests) "%d requests" qemu_rdma_registration_handle_register_loop(int req, int index, uint64_t addr, uint64_t chunks) "Registration request (%d): index %d, current_addr %" PRIu64 " chunks: %" PRIu64 qemu_rdma_registration_handle_register_rkey(int rkey) "%x" qemu_rdma_registration_handle_unregister(int requests) "%d requests" qemu_rdma_registration_handle_unregister_loop(int count, int index, uint64_t chunk) "Unregistration request (%d): index %d, chunk %" PRIu64 qemu_rdma_registration_handle_unregister_success(uint64_t chunk) "%" PRIu64 -qemu_rdma_registration_handle_wait(uint64_t flags) "Waiting for next request %" PRIu64 +qemu_rdma_registration_handle_wait(void) "" qemu_rdma_registration_start(uint64_t flags) "%" PRIu64 qemu_rdma_registration_stop(uint64_t flags) "%" PRIu64 qemu_rdma_registration_stop_ram(void) "" @@ -1458,8 +1463,9 @@ qemu_rdma_write_one_recvregres(int mykey, int theirkey, uint64_t chunk) "Receive qemu_rdma_write_one_sendreg(uint64_t chunk, int len, int index, int64_t offset) "Sending registration request chunk %" PRIu64 " for %d bytes, index: %d, offset: %" PRId64 qemu_rdma_write_one_top(uint64_t chunks, uint64_t size) "Writing %" PRIu64 " chunks, (%" PRIu64 " MB)" qemu_rdma_write_one_zero(uint64_t chunk, int len, int index, int64_t offset) "Entire chunk is zero, sending compress: %" PRIu64 " for %d bytes, index: %d, offset: %" PRId64 -rdma_add_block(int block, uint64_t addr, uint64_t offset, uint64_t len, uint64_t end, uint64_t bits, int chunks) "Added Block: %d, addr: %" PRIu64 ", offset: %" PRIu64 " length: %" PRIu64 " end: %" PRIu64 " bits %" PRIu64 " chunks %d" -rdma_delete_block(int block, uint64_t addr, uint64_t offset, uint64_t len, uint64_t end, uint64_t bits, int chunks) "Deleted Block: %d, addr: %" PRIu64 ", offset: %" PRIu64 " length: %" PRIu64 " end: %" PRIu64 " bits %" PRIu64 " chunks %d" +rdma_add_block(const char *block_name, int block, uint64_t addr, uint64_t offset, uint64_t len, uint64_t end, uint64_t bits, int chunks) "Added Block: '%s':%d, addr: %" PRIu64 ", offset: %" PRIu64 " length: %" PRIu64 " end: %" PRIu64 " bits %" PRIu64 " chunks %d" +rdma_block_notification_handle(const char *name, int index) "%s at %d" +rdma_delete_block(void *block, uint64_t addr, uint64_t offset, uint64_t len, uint64_t end, uint64_t bits, int chunks) "Deleted Block: %p, addr: %" PRIu64 ", offset: %" PRIu64 " length: %" PRIu64 " end: %" PRIu64 " bits %" PRIu64 " chunks %d" rdma_start_incoming_migration(void) "" rdma_start_incoming_migration_after_dest_init(void) "" rdma_start_incoming_migration_after_rdma_listen(void) "" @@ -1594,6 +1600,7 @@ vfio_platform_intp_interrupt(int pin, int fd) "Inject IRQ #%d (fd = %d)" vfio_platform_intp_inject_pending_lockheld(int pin, int fd) "Inject pending IRQ #%d (fd = %d)" vfio_platform_populate_interrupts(int pin, int count, int flags) "- IRQ index %d: count %d, flags=0x%x" vfio_intp_interrupt_set_pending(int index) "irq %d is set PENDING" +vfio_platform_start_irqfd_injection(int index, int fd, int resamplefd) "IRQ index=%d, fd = %d, resamplefd = %d" #hw/acpi/memory_hotplug.c mhp_acpi_invalid_slot_selected(uint32_t slot) "0x%"PRIx32 diff --git a/translate-all.c b/translate-all.c index 412bc9005f..50d53fdac0 100644 --- a/translate-all.c +++ b/translate-all.c @@ -118,6 +118,7 @@ typedef struct PageDesc { #define V_L1_SHIFT (L1_MAP_ADDR_SPACE_BITS - TARGET_PAGE_BITS - V_L1_BITS) uintptr_t qemu_real_host_page_size; +uintptr_t qemu_real_host_page_mask; uintptr_t qemu_host_page_size; uintptr_t qemu_host_page_mask; @@ -307,6 +308,7 @@ void page_size_init(void) /* NOTE: we can always suppose that qemu_host_page_size >= TARGET_PAGE_SIZE */ qemu_real_host_page_size = getpagesize(); + qemu_real_host_page_mask = ~(qemu_real_host_page_size - 1); if (qemu_host_page_size == 0) { qemu_host_page_size = qemu_real_host_page_size; } @@ -574,8 +574,14 @@ static const RunStateTransition runstate_transitions_def[] = { { RUN_STATE_DEBUG, RUN_STATE_RUNNING }, { RUN_STATE_DEBUG, RUN_STATE_FINISH_MIGRATE }, - { RUN_STATE_INMIGRATE, RUN_STATE_RUNNING }, + { RUN_STATE_INMIGRATE, RUN_STATE_INTERNAL_ERROR }, + { RUN_STATE_INMIGRATE, RUN_STATE_IO_ERROR }, { RUN_STATE_INMIGRATE, RUN_STATE_PAUSED }, + { RUN_STATE_INMIGRATE, RUN_STATE_RUNNING }, + { RUN_STATE_INMIGRATE, RUN_STATE_SHUTDOWN }, + { RUN_STATE_INMIGRATE, RUN_STATE_SUSPENDED }, + { RUN_STATE_INMIGRATE, RUN_STATE_WATCHDOG }, + { RUN_STATE_INMIGRATE, RUN_STATE_GUEST_PANICKED }, { RUN_STATE_INTERNAL_ERROR, RUN_STATE_PAUSED }, { RUN_STATE_INTERNAL_ERROR, RUN_STATE_FINISH_MIGRATE }, @@ -635,6 +641,18 @@ bool runstate_check(RunState state) return current_run_state == state; } +bool runstate_store(char *str, size_t size) +{ + const char *state = RunState_lookup[current_run_state]; + size_t len = strlen(state) + 1; + + if (len > size) { + return false; + } + memcpy(str, state, len); + return true; +} + static void runstate_init(void) { const RunStateTransition *p; @@ -4610,6 +4628,7 @@ int main(int argc, char **argv, char **envp) return 0; } + register_global_state(); if (incoming) { Error *local_err = NULL; qemu_start_incoming_migration(incoming, &local_err); |