diff options
author | Peter Maydell <peter.maydell@linaro.org> | 2017-07-17 12:52:59 +0100 |
---|---|---|
committer | Peter Maydell <peter.maydell@linaro.org> | 2017-07-17 12:52:59 +0100 |
commit | 77031ee1ce4c7e5f4b4941535c4912eb2eb3503c (patch) | |
tree | c70b354b3cdb137d9b4bb0d4d395b3a49022b067 /hw | |
parent | 6632f6ff96f0537fc34cdc00c760656fc62e23c5 (diff) | |
parent | 346ebfc6fbf1a0c07cbdfc52beef0f0b9b5180d4 (diff) |
Merge remote-tracking branch 'remotes/dgibson/tags/ppc-for-2.10-20170717' into staging
ppc patch queue 2017-07-17
This pull requests supersedes the one from 2017-07-14. That one had a
couple of subtle regressions: there was a build error for mingw32, and
an instance_size which was theoretically wrong everywhere, but only
actually bit on the Travis OSX build.
There are two major batches in this set, rather than the usual
collection of assorted fixes.
* More DRC cleanup. This gets the state management into a state
which should fix many of the hotplug+migration problems we've
had. Plus it gets the migration stream format into something
well defined and pretty minimal which we can reasonably support
into the future.
* Hashed Page Table resizing. It's been a while since this was
posted, but it's been through several previous rounds of review.
The kernel parts (both guest and host) are merged in 4.11, so
this is the only remaining piece left to allow resizing of the
HPT in a running guest.
There are also a handful of unrelated fixes.
# gpg: Signature made Mon 17 Jul 2017 07:36:52 BST
# gpg: using RSA key 0x6C38CACA20D9B392
# gpg: Good signature from "David Gibson <david@gibson.dropbear.id.au>"
# gpg: aka "David Gibson (Red Hat) <dgibson@redhat.com>"
# gpg: aka "David Gibson (ozlabs.org) <dgibson@ozlabs.org>"
# gpg: aka "David Gibson (kernel.org) <dwg@kernel.org>"
# Primary key fingerprint: 75F4 6586 AE61 A66C C44E 87DC 6C38 CACA 20D9 B392
* remotes/dgibson/tags/ppc-for-2.10-20170717: (21 commits)
target/ppc: fix CPU hotplug when radix is enabled (TCG)
spapr: fix memory leak in spapr_core_pre_plug()
pseries: Allow HPT resizing with KVM
pseries: Use smaller default hash page tables when guest can resize
pseries: Enable HPT resizing for 2.10
pseries: Implement HPT resizing
pseries: Stubs for HPT resizing
ppc/pnv: Remove unused XICSState reference
spapr: fix potential memory leak in spapr_core_plug()
spapr: Implement DR-indicator for physical DRCs only
spapr: Remove sPAPRConfigureConnectorState sub-structure
spapr: Consolidate DRC state variables
spapr: Cleanups relating to DRC awaiting_release field
spapr: Refactor spapr_drc_detach()
spapr: Abort on delete failure in spapr_drc_release()
spapr: Simplify unplug path
spapr: Remove 'awaiting_allocation' DRC flag
spapr: Treat devices added before inbound migration as coldplugged
spapr: Minor cleanups to events handling
spapr: migrate pending_events of spapr state
...
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
Diffstat (limited to 'hw')
-rw-r--r-- | hw/ppc/spapr.c | 239 | ||||
-rw-r--r-- | hw/ppc/spapr_drc.c | 405 | ||||
-rw-r--r-- | hw/ppc/spapr_events.c | 98 | ||||
-rw-r--r-- | hw/ppc/spapr_hcall.c | 439 | ||||
-rw-r--r-- | hw/ppc/spapr_pci.c | 17 | ||||
-rw-r--r-- | hw/ppc/trace-events | 5 |
6 files changed, 879 insertions, 324 deletions
diff --git a/hw/ppc/spapr.c b/hw/ppc/spapr.c index 16638ce80c..970093e6b5 100644 --- a/hw/ppc/spapr.c +++ b/hw/ppc/spapr.c @@ -98,8 +98,6 @@ #define PHANDLE_XICP 0x00001111 -#define HTAB_SIZE(spapr) (1ULL << ((spapr)->htab_shift)) - static ICSState *spapr_ics_create(sPAPRMachineState *spapr, const char *type_ics, int nr_irqs, Error **errp) @@ -874,6 +872,11 @@ static void spapr_dt_rtas(sPAPRMachineState *spapr, void *fdt) if (!kvm_enabled() || kvmppc_spapr_use_multitce()) { add_str(hypertas, "hcall-multi-tce"); } + + if (spapr->resize_hpt != SPAPR_RESIZE_HPT_DISABLED) { + add_str(hypertas, "hcall-hpt-resize"); + } + _FDT(fdt_setprop(fdt, rtas, "ibm,hypertas-functions", hypertas->str, hypertas->len)); g_string_free(hypertas, TRUE); @@ -1264,7 +1267,7 @@ static void spapr_store_hpte(PPCVirtualHypervisor *vhyp, hwaddr ptex, } } -static int spapr_hpt_shift_for_ramsize(uint64_t ramsize) +int spapr_hpt_shift_for_ramsize(uint64_t ramsize) { int shift; @@ -1285,8 +1288,8 @@ void spapr_free_hpt(sPAPRMachineState *spapr) close_htab_fd(spapr); } -static void spapr_reallocate_hpt(sPAPRMachineState *spapr, int shift, - Error **errp) +void spapr_reallocate_hpt(sPAPRMachineState *spapr, int shift, + Error **errp) { long rc; @@ -1334,9 +1337,17 @@ static void spapr_reallocate_hpt(sPAPRMachineState *spapr, int shift, void spapr_setup_hpt_and_vrma(sPAPRMachineState *spapr) { - spapr_reallocate_hpt(spapr, - spapr_hpt_shift_for_ramsize(MACHINE(spapr)->maxram_size), - &error_fatal); + int hpt_shift; + + if ((spapr->resize_hpt == SPAPR_RESIZE_HPT_DISABLED) + || (spapr->cas_reboot + && !spapr_ovec_test(spapr->ov5_cas, OV5_HPT_RESIZE))) { + hpt_shift = spapr_hpt_shift_for_ramsize(MACHINE(spapr)->maxram_size); + } else { + hpt_shift = spapr_hpt_shift_for_ramsize(MACHINE(spapr)->ram_size); + } + spapr_reallocate_hpt(spapr, hpt_shift, &error_fatal); + if (spapr->vrma_adjust) { spapr->rma_size = kvmppc_rma_size(spapr_node0_size(), spapr->htab_shift); @@ -1517,6 +1528,37 @@ static bool version_before_3(void *opaque, int version_id) return version_id < 3; } +static bool spapr_pending_events_needed(void *opaque) +{ + sPAPRMachineState *spapr = (sPAPRMachineState *)opaque; + return !QTAILQ_EMPTY(&spapr->pending_events); +} + +static const VMStateDescription vmstate_spapr_event_entry = { + .name = "spapr_event_log_entry", + .version_id = 1, + .minimum_version_id = 1, + .fields = (VMStateField[]) { + VMSTATE_UINT32(summary, sPAPREventLogEntry), + VMSTATE_UINT32(extended_length, sPAPREventLogEntry), + VMSTATE_VBUFFER_ALLOC_UINT32(extended_log, sPAPREventLogEntry, 0, + NULL, extended_length), + VMSTATE_END_OF_LIST() + }, +}; + +static const VMStateDescription vmstate_spapr_pending_events = { + .name = "spapr_pending_events", + .version_id = 1, + .minimum_version_id = 1, + .needed = spapr_pending_events_needed, + .fields = (VMStateField[]) { + VMSTATE_QTAILQ_V(pending_events, sPAPRMachineState, 1, + vmstate_spapr_event_entry, sPAPREventLogEntry, next), + VMSTATE_END_OF_LIST() + }, +}; + static bool spapr_ov5_cas_needed(void *opaque) { sPAPRMachineState *spapr = opaque; @@ -1615,6 +1657,7 @@ static const VMStateDescription vmstate_spapr = { .subsections = (const VMStateDescription*[]) { &vmstate_spapr_ov5_cas, &vmstate_spapr_patb_entry, + &vmstate_spapr_pending_events, NULL } }; @@ -2116,12 +2159,41 @@ static void ppc_spapr_init(MachineState *machine) hwaddr node0_size = spapr_node0_size(); long load_limit, fw_size; char *filename; + Error *resize_hpt_err = NULL; msi_nonbroken = true; QLIST_INIT(&spapr->phbs); QTAILQ_INIT(&spapr->pending_dimm_unplugs); + /* Check HPT resizing availability */ + kvmppc_check_papr_resize_hpt(&resize_hpt_err); + if (spapr->resize_hpt == SPAPR_RESIZE_HPT_DEFAULT) { + /* + * If the user explicitly requested a mode we should either + * supply it, or fail completely (which we do below). But if + * it's not set explicitly, we reset our mode to something + * that works + */ + if (resize_hpt_err) { + spapr->resize_hpt = SPAPR_RESIZE_HPT_DISABLED; + error_free(resize_hpt_err); + resize_hpt_err = NULL; + } else { + spapr->resize_hpt = smc->resize_hpt_default; + } + } + + assert(spapr->resize_hpt != SPAPR_RESIZE_HPT_DEFAULT); + + if ((spapr->resize_hpt != SPAPR_RESIZE_HPT_DISABLED) && resize_hpt_err) { + /* + * User requested HPT resize, but this host can't supply it. Bail out + */ + error_report_err(resize_hpt_err); + exit(1); + } + /* Allocate RMA if necessary */ rma_alloc_size = kvmppc_alloc_rma(&rma); @@ -2190,6 +2262,11 @@ static void ppc_spapr_init(MachineState *machine) spapr_ovec_set(spapr->ov5, OV5_HP_EVT); } + /* advertise support for HPT resizing */ + if (spapr->resize_hpt != SPAPR_RESIZE_HPT_DISABLED) { + spapr_ovec_set(spapr->ov5, OV5_HPT_RESIZE); + } + /* init CPUs */ if (machine->cpu_model == NULL) { machine->cpu_model = kvm_enabled() ? "host" : smc->tcg_default_cpu; @@ -2547,6 +2624,40 @@ static void spapr_set_modern_hotplug_events(Object *obj, bool value, spapr->use_hotplug_event_source = value; } +static char *spapr_get_resize_hpt(Object *obj, Error **errp) +{ + sPAPRMachineState *spapr = SPAPR_MACHINE(obj); + + switch (spapr->resize_hpt) { + case SPAPR_RESIZE_HPT_DEFAULT: + return g_strdup("default"); + case SPAPR_RESIZE_HPT_DISABLED: + return g_strdup("disabled"); + case SPAPR_RESIZE_HPT_ENABLED: + return g_strdup("enabled"); + case SPAPR_RESIZE_HPT_REQUIRED: + return g_strdup("required"); + } + g_assert_not_reached(); +} + +static void spapr_set_resize_hpt(Object *obj, const char *value, Error **errp) +{ + sPAPRMachineState *spapr = SPAPR_MACHINE(obj); + + if (strcmp(value, "default") == 0) { + spapr->resize_hpt = SPAPR_RESIZE_HPT_DEFAULT; + } else if (strcmp(value, "disabled") == 0) { + spapr->resize_hpt = SPAPR_RESIZE_HPT_DISABLED; + } else if (strcmp(value, "enabled") == 0) { + spapr->resize_hpt = SPAPR_RESIZE_HPT_ENABLED; + } else if (strcmp(value, "required") == 0) { + spapr->resize_hpt = SPAPR_RESIZE_HPT_REQUIRED; + } else { + error_setg(errp, "Bad value for \"resize-hpt\" property"); + } +} + static void spapr_machine_initfn(Object *obj) { sPAPRMachineState *spapr = SPAPR_MACHINE(obj); @@ -2571,6 +2682,12 @@ static void spapr_machine_initfn(Object *obj) ppc_compat_add_property(obj, "max-cpu-compat", &spapr->max_compat_pvr, "Maximum permitted CPU compatibility mode", &error_fatal); + + object_property_add_str(obj, "resize-hpt", + spapr_get_resize_hpt, spapr_set_resize_hpt, NULL); + object_property_set_description(obj, "resize-hpt", + "Resizing of the Hash Page Table (enabled, disabled, required)", + NULL); } static void spapr_machine_finalizefn(Object *obj) @@ -2604,6 +2721,7 @@ static void spapr_add_lmbs(DeviceState *dev, uint64_t addr_start, uint64_t size, int i, fdt_offset, fdt_size; void *fdt; uint64_t addr = addr_start; + bool hotplugged = spapr_drc_hotplugged(dev); Error *local_err = NULL; for (i = 0; i < nr_lmbs; i++) { @@ -2621,18 +2739,21 @@ static void spapr_add_lmbs(DeviceState *dev, uint64_t addr_start, uint64_t size, addr -= SPAPR_MEMORY_BLOCK_SIZE; drc = spapr_drc_by_id(TYPE_SPAPR_DRC_LMB, addr / SPAPR_MEMORY_BLOCK_SIZE); - spapr_drc_detach(drc, dev, NULL); + spapr_drc_detach(drc); } g_free(fdt); error_propagate(errp, local_err); return; } + if (!hotplugged) { + spapr_drc_reset(drc); + } addr += SPAPR_MEMORY_BLOCK_SIZE; } /* send hotplug notification to the * guest only in case of hotplugged memory */ - if (dev->hotplugged) { + if (hotplugged) { if (dedicated_hp_event_source) { drc = spapr_drc_by_id(TYPE_SPAPR_DRC_LMB, addr_start / SPAPR_MEMORY_BLOCK_SIZE); @@ -2780,8 +2901,10 @@ static sPAPRDIMMState *spapr_recover_pending_dimm_state(sPAPRMachineState *ms, /* Callback to be called during DRC release. */ void spapr_lmb_release(DeviceState *dev) { - HotplugHandler *hotplug_ctrl = qdev_get_hotplug_handler(dev); - sPAPRMachineState *spapr = SPAPR_MACHINE(hotplug_ctrl); + sPAPRMachineState *spapr = SPAPR_MACHINE(qdev_get_hotplug_handler(dev)); + PCDIMMDevice *dimm = PC_DIMM(dev); + PCDIMMDeviceClass *ddc = PC_DIMM_GET_CLASS(dimm); + MemoryRegion *mr = ddc->get_memory_region(dimm); sPAPRDIMMState *ds = spapr_pending_dimm_unplugs_find(spapr, PC_DIMM(dev)); /* This information will get lost if a migration occurs @@ -2802,18 +2925,7 @@ void spapr_lmb_release(DeviceState *dev) * Now that all the LMBs have been removed by the guest, call the * pc-dimm unplug handler to cleanup up the pc-dimm device. */ - hotplug_handler_unplug(hotplug_ctrl, dev, &error_abort); -} - -static void spapr_memory_unplug(HotplugHandler *hotplug_dev, DeviceState *dev, - Error **errp) -{ - sPAPRMachineState *ms = SPAPR_MACHINE(hotplug_dev); - PCDIMMDevice *dimm = PC_DIMM(dev); - PCDIMMDeviceClass *ddc = PC_DIMM_GET_CLASS(dimm); - MemoryRegion *mr = ddc->get_memory_region(dimm); - - pc_dimm_memory_unplug(dev, &ms->hotplug_memory, mr); + pc_dimm_memory_unplug(dev, &spapr->hotplug_memory, mr); object_unparent(OBJECT(dev)); } @@ -2849,7 +2961,7 @@ static void spapr_memory_unplug_request(HotplugHandler *hotplug_dev, addr / SPAPR_MEMORY_BLOCK_SIZE); g_assert(drc); - spapr_drc_detach(drc, dev, errp); + spapr_drc_detach(drc); addr += SPAPR_MEMORY_BLOCK_SIZE; } @@ -2882,10 +2994,10 @@ static void *spapr_populate_hotplug_cpu_dt(CPUState *cs, int *fdt_offset, return fdt; } -static void spapr_core_unplug(HotplugHandler *hotplug_dev, DeviceState *dev, - Error **errp) +/* Callback to be called during DRC release. */ +void spapr_core_release(DeviceState *dev) { - MachineState *ms = MACHINE(qdev_get_machine()); + MachineState *ms = MACHINE(qdev_get_hotplug_handler(dev)); sPAPRMachineClass *smc = SPAPR_MACHINE_GET_CLASS(ms); CPUCore *cc = CPU_CORE(dev); CPUArchId *core_slot = spapr_find_cpu_slot(ms, cc->core_id, NULL); @@ -2909,22 +3021,12 @@ static void spapr_core_unplug(HotplugHandler *hotplug_dev, DeviceState *dev, object_unparent(OBJECT(dev)); } -/* Callback to be called during DRC release. */ -void spapr_core_release(DeviceState *dev) -{ - HotplugHandler *hotplug_ctrl; - - hotplug_ctrl = qdev_get_hotplug_handler(dev); - hotplug_handler_unplug(hotplug_ctrl, dev, &error_abort); -} - static void spapr_core_unplug_request(HotplugHandler *hotplug_dev, DeviceState *dev, Error **errp) { int index; sPAPRDRConnector *drc; - Error *local_err = NULL; CPUCore *cc = CPU_CORE(dev); int smt = kvmppc_smt_threads(); @@ -2941,11 +3043,7 @@ void spapr_core_unplug_request(HotplugHandler *hotplug_dev, DeviceState *dev, drc = spapr_drc_by_id(TYPE_SPAPR_DRC_CPU, index * smt); g_assert(drc); - spapr_drc_detach(drc, dev, &local_err); - if (local_err) { - error_propagate(errp, local_err); - return; - } + spapr_drc_detach(drc); spapr_hotplug_req_remove_by_index(drc); } @@ -2961,11 +3059,10 @@ static void spapr_core_plug(HotplugHandler *hotplug_dev, DeviceState *dev, CPUState *cs = CPU(core->threads); sPAPRDRConnector *drc; Error *local_err = NULL; - void *fdt = NULL; - int fdt_offset = 0; int smt = kvmppc_smt_threads(); CPUArchId *core_slot; int index; + bool hotplugged = spapr_drc_hotplugged(dev); core_slot = spapr_find_cpu_slot(MACHINE(hotplug_dev), cc->core_id, &index); if (!core_slot) { @@ -2977,24 +3074,30 @@ static void spapr_core_plug(HotplugHandler *hotplug_dev, DeviceState *dev, g_assert(drc || !mc->has_hotpluggable_cpus); - fdt = spapr_populate_hotplug_cpu_dt(cs, &fdt_offset, spapr); - if (drc) { + void *fdt; + int fdt_offset; + + fdt = spapr_populate_hotplug_cpu_dt(cs, &fdt_offset, spapr); + spapr_drc_attach(drc, dev, fdt, fdt_offset, &local_err); if (local_err) { g_free(fdt); error_propagate(errp, local_err); return; } - } - if (dev->hotplugged) { - /* - * Send hotplug notification interrupt to the guest only in case - * of hotplugged CPUs. - */ - spapr_hotplug_req_add_by_index(drc); + if (hotplugged) { + /* + * Send hotplug notification interrupt to the guest only + * in case of hotplugged CPUs. + */ + spapr_hotplug_req_add_by_index(drc); + } else { + spapr_drc_reset(drc); + } } + core_slot->cpu = OBJECT(dev); if (smc->pre_2_10_has_unused_icps) { @@ -3047,9 +3150,9 @@ static void spapr_core_pre_plug(HotplugHandler *hotplug_dev, DeviceState *dev, * total vcpus not a multiple of threads-per-core. */ if (mc->has_hotpluggable_cpus && (cc->nr_threads != smp_threads)) { - error_setg(errp, "invalid nr-threads %d, must be %d", + error_setg(&local_err, "invalid nr-threads %d, must be %d", cc->nr_threads, smp_threads); - return; + goto out; } core_slot = spapr_find_cpu_slot(MACHINE(hotplug_dev), cc->core_id, &index); @@ -3119,27 +3222,6 @@ static void spapr_machine_device_plug(HotplugHandler *hotplug_dev, } } -static void spapr_machine_device_unplug(HotplugHandler *hotplug_dev, - DeviceState *dev, Error **errp) -{ - sPAPRMachineState *sms = SPAPR_MACHINE(qdev_get_machine()); - MachineClass *mc = MACHINE_GET_CLASS(qdev_get_machine()); - - if (object_dynamic_cast(OBJECT(dev), TYPE_PC_DIMM)) { - if (spapr_ovec_test(sms->ov5_cas, OV5_HP_EVT)) { - spapr_memory_unplug(hotplug_dev, dev, errp); - } else { - error_setg(errp, "Memory hot unplug not supported for this guest"); - } - } else if (object_dynamic_cast(OBJECT(dev), TYPE_SPAPR_CPU_CORE)) { - if (!mc->has_hotpluggable_cpus) { - error_setg(errp, "CPU hot unplug not supported on this machine"); - return; - } - spapr_core_unplug(hotplug_dev, dev, errp); - } -} - static void spapr_machine_device_unplug_request(HotplugHandler *hotplug_dev, DeviceState *dev, Error **errp) { @@ -3357,7 +3439,6 @@ static void spapr_machine_class_init(ObjectClass *oc, void *data) mc->get_hotplug_handler = spapr_get_hotplug_handler; hc->pre_plug = spapr_machine_device_pre_plug; hc->plug = spapr_machine_device_plug; - hc->unplug = spapr_machine_device_unplug; mc->cpu_index_to_instance_props = spapr_cpu_index_to_props; mc->possible_cpu_arch_ids = spapr_possible_cpu_arch_ids; hc->unplug_request = spapr_machine_device_unplug_request; @@ -3365,6 +3446,7 @@ static void spapr_machine_class_init(ObjectClass *oc, void *data) smc->dr_lmb_enabled = true; smc->tcg_default_cpu = "POWER8"; mc->has_hotpluggable_cpus = true; + smc->resize_hpt_default = SPAPR_RESIZE_HPT_ENABLED; fwc->get_dev_path = spapr_get_fw_dev_path; nc->nmi_monitor_handler = spapr_nmi; smc->phb_placement = spapr_phb_placement; @@ -3471,6 +3553,7 @@ static void spapr_machine_2_9_class_options(MachineClass *mc) SET_MACHINE_COMPAT(mc, SPAPR_COMPAT_2_9); mc->numa_auto_assign_ram = numa_legacy_auto_assign_ram; smc->pre_2_10_has_unused_icps = true; + smc->resize_hpt_default = SPAPR_RESIZE_HPT_DISABLED; } DEFINE_SPAPR_MACHINE(2_9, "2.9", false); diff --git a/hw/ppc/spapr_drc.c b/hw/ppc/spapr_drc.c index f34355dad1..0ffcec6fb2 100644 --- a/hw/ppc/spapr_drc.c +++ b/hw/ppc/spapr_drc.c @@ -48,40 +48,40 @@ uint32_t spapr_drc_index(sPAPRDRConnector *drc) static uint32_t drc_isolate_physical(sPAPRDRConnector *drc) { - /* if the guest is configuring a device attached to this DRC, we - * should reset the configuration state at this point since it may - * no longer be reliable (guest released device and needs to start - * over, or unplug occurred so the FDT is no longer valid) - */ - g_free(drc->ccs); - drc->ccs = NULL; + switch (drc->state) { + case SPAPR_DRC_STATE_PHYSICAL_POWERON: + return RTAS_OUT_SUCCESS; /* Nothing to do */ + case SPAPR_DRC_STATE_PHYSICAL_CONFIGURED: + break; /* see below */ + case SPAPR_DRC_STATE_PHYSICAL_UNISOLATE: + return RTAS_OUT_PARAM_ERROR; /* not allowed */ + default: + g_assert_not_reached(); + } - drc->isolation_state = SPAPR_DR_ISOLATION_STATE_ISOLATED; + drc->state = SPAPR_DRC_STATE_PHYSICAL_POWERON; - /* if we're awaiting release, but still in an unconfigured state, - * it's likely the guest is still in the process of configuring - * the device and is transitioning the devices to an ISOLATED - * state as a part of that process. so we only complete the - * removal when this transition happens for a device in a - * configured state, as suggested by the state diagram from PAPR+ - * 2.7, 13.4 - */ - if (drc->awaiting_release) { + if (drc->unplug_requested) { uint32_t drc_index = spapr_drc_index(drc); - if (drc->configured) { - trace_spapr_drc_set_isolation_state_finalizing(drc_index); - spapr_drc_detach(drc, DEVICE(drc->dev), NULL); - } else { - trace_spapr_drc_set_isolation_state_deferring(drc_index); - } + trace_spapr_drc_set_isolation_state_finalizing(drc_index); + spapr_drc_detach(drc); } - drc->configured = false; return RTAS_OUT_SUCCESS; } static uint32_t drc_unisolate_physical(sPAPRDRConnector *drc) { + switch (drc->state) { + case SPAPR_DRC_STATE_PHYSICAL_UNISOLATE: + case SPAPR_DRC_STATE_PHYSICAL_CONFIGURED: + return RTAS_OUT_SUCCESS; /* Nothing to do */ + case SPAPR_DRC_STATE_PHYSICAL_POWERON: + break; /* see below */ + default: + g_assert_not_reached(); + } + /* cannot unisolate a non-existent resource, and, or resources * which are in an 'UNUSABLE' allocation state. (PAPR 2.7, * 13.5.3.5) @@ -90,20 +90,26 @@ static uint32_t drc_unisolate_physical(sPAPRDRConnector *drc) return RTAS_OUT_NO_SUCH_INDICATOR; } - drc->isolation_state = SPAPR_DR_ISOLATION_STATE_UNISOLATED; + drc->state = SPAPR_DRC_STATE_PHYSICAL_UNISOLATE; + drc->ccs_offset = drc->fdt_start_offset; + drc->ccs_depth = 0; return RTAS_OUT_SUCCESS; } static uint32_t drc_isolate_logical(sPAPRDRConnector *drc) { - /* if the guest is configuring a device attached to this DRC, we - * should reset the configuration state at this point since it may - * no longer be reliable (guest released device and needs to start - * over, or unplug occurred so the FDT is no longer valid) - */ - g_free(drc->ccs); - drc->ccs = NULL; + switch (drc->state) { + case SPAPR_DRC_STATE_LOGICAL_AVAILABLE: + case SPAPR_DRC_STATE_LOGICAL_UNUSABLE: + return RTAS_OUT_SUCCESS; /* Nothing to do */ + case SPAPR_DRC_STATE_LOGICAL_CONFIGURED: + break; /* see below */ + case SPAPR_DRC_STATE_LOGICAL_UNISOLATE: + return RTAS_OUT_PARAM_ERROR; /* not allowed */ + default: + g_assert_not_reached(); + } /* * Fail any requests to ISOLATE the LMB DRC if this LMB doesn't @@ -116,11 +122,11 @@ static uint32_t drc_isolate_logical(sPAPRDRConnector *drc) * actually being unplugged, fail the isolation request here. */ if (spapr_drc_type(drc) == SPAPR_DR_CONNECTOR_TYPE_LMB - && !drc->awaiting_release) { + && !drc->unplug_requested) { return RTAS_OUT_HW_ERROR; } - drc->isolation_state = SPAPR_DR_ISOLATION_STATE_ISOLATED; + drc->state = SPAPR_DRC_STATE_LOGICAL_AVAILABLE; /* if we're awaiting release, but still in an unconfigured state, * it's likely the guest is still in the process of configuring @@ -130,38 +136,51 @@ static uint32_t drc_isolate_logical(sPAPRDRConnector *drc) * configured state, as suggested by the state diagram from PAPR+ * 2.7, 13.4 */ - if (drc->awaiting_release) { + if (drc->unplug_requested) { uint32_t drc_index = spapr_drc_index(drc); - if (drc->configured) { - trace_spapr_drc_set_isolation_state_finalizing(drc_index); - spapr_drc_detach(drc, DEVICE(drc->dev), NULL); - } else { - trace_spapr_drc_set_isolation_state_deferring(drc_index); - } + trace_spapr_drc_set_isolation_state_finalizing(drc_index); + spapr_drc_detach(drc); } - drc->configured = false; - return RTAS_OUT_SUCCESS; } static uint32_t drc_unisolate_logical(sPAPRDRConnector *drc) { - /* cannot unisolate a non-existent resource, and, or resources - * which are in an 'UNUSABLE' allocation state. (PAPR 2.7, - * 13.5.3.5) - */ - if (!drc->dev || - drc->allocation_state == SPAPR_DR_ALLOCATION_STATE_UNUSABLE) { - return RTAS_OUT_NO_SUCH_INDICATOR; + switch (drc->state) { + case SPAPR_DRC_STATE_LOGICAL_UNISOLATE: + case SPAPR_DRC_STATE_LOGICAL_CONFIGURED: + return RTAS_OUT_SUCCESS; /* Nothing to do */ + case SPAPR_DRC_STATE_LOGICAL_AVAILABLE: + break; /* see below */ + case SPAPR_DRC_STATE_LOGICAL_UNUSABLE: + return RTAS_OUT_NO_SUCH_INDICATOR; /* not allowed */ + default: + g_assert_not_reached(); } - drc->isolation_state = SPAPR_DR_ISOLATION_STATE_UNISOLATED; + /* Move to AVAILABLE state should have ensured device was present */ + g_assert(drc->dev); + + drc->state = SPAPR_DRC_STATE_LOGICAL_UNISOLATE; + drc->ccs_offset = drc->fdt_start_offset; + drc->ccs_depth = 0; return RTAS_OUT_SUCCESS; } static uint32_t drc_set_usable(sPAPRDRConnector *drc) { + switch (drc->state) { + case SPAPR_DRC_STATE_LOGICAL_AVAILABLE: + case SPAPR_DRC_STATE_LOGICAL_UNISOLATE: + case SPAPR_DRC_STATE_LOGICAL_CONFIGURED: + return RTAS_OUT_SUCCESS; /* Nothing to do */ + case SPAPR_DRC_STATE_LOGICAL_UNUSABLE: + break; /* see below */ + default: + g_assert_not_reached(); + } + /* if there's no resource/device associated with the DRC, there's * no way for us to put it in an allocation state consistent with * being 'USABLE'. PAPR 2.7, 13.5.3.4 documents that this should @@ -170,30 +189,36 @@ static uint32_t drc_set_usable(sPAPRDRConnector *drc) if (!drc->dev) { return RTAS_OUT_NO_SUCH_INDICATOR; } - if (drc->awaiting_release && drc->awaiting_allocation) { - /* kernel is acknowledging a previous hotplug event - * while we are already removing it. - * it's safe to ignore awaiting_allocation here since we know the - * situation is predicated on the guest either already having done - * so (boot-time hotplug), or never being able to acquire in the - * first place (hotplug followed by immediate unplug). - */ + if (drc->unplug_requested) { + /* Don't allow the guest to move a device away from UNUSABLE + * state when we want to unplug it */ return RTAS_OUT_NO_SUCH_INDICATOR; } - drc->allocation_state = SPAPR_DR_ALLOCATION_STATE_USABLE; - drc->awaiting_allocation = false; + drc->state = SPAPR_DRC_STATE_LOGICAL_AVAILABLE; return RTAS_OUT_SUCCESS; } static uint32_t drc_set_unusable(sPAPRDRConnector *drc) { - drc->allocation_state = SPAPR_DR_ALLOCATION_STATE_UNUSABLE; - if (drc->awaiting_release) { + switch (drc->state) { + case SPAPR_DRC_STATE_LOGICAL_UNUSABLE: + return RTAS_OUT_SUCCESS; /* Nothing to do */ + case SPAPR_DRC_STATE_LOGICAL_AVAILABLE: + break; /* see below */ + case SPAPR_DRC_STATE_LOGICAL_UNISOLATE: + case SPAPR_DRC_STATE_LOGICAL_CONFIGURED: + return RTAS_OUT_NO_SUCH_INDICATOR; /* not allowed */ + default: + g_assert_not_reached(); + } + + drc->state = SPAPR_DRC_STATE_LOGICAL_UNUSABLE; + if (drc->unplug_requested) { uint32_t drc_index = spapr_drc_index(drc); trace_spapr_drc_set_allocation_state_finalizing(drc_index); - spapr_drc_detach(drc, DEVICE(drc->dev), NULL); + spapr_drc_detach(drc); } return RTAS_OUT_SUCCESS; @@ -247,11 +272,16 @@ static sPAPRDREntitySense physical_entity_sense(sPAPRDRConnector *drc) static sPAPRDREntitySense logical_entity_sense(sPAPRDRConnector *drc) { - if (drc->dev - && (drc->allocation_state != SPAPR_DR_ALLOCATION_STATE_UNUSABLE)) { - return SPAPR_DR_ENTITY_SENSE_PRESENT; - } else { + switch (drc->state) { + case SPAPR_DRC_STATE_LOGICAL_UNUSABLE: return SPAPR_DR_ENTITY_SENSE_UNUSABLE; + case SPAPR_DRC_STATE_LOGICAL_AVAILABLE: + case SPAPR_DRC_STATE_LOGICAL_UNISOLATE: + case SPAPR_DRC_STATE_LOGICAL_CONFIGURED: + g_assert(drc->dev); + return SPAPR_DR_ENTITY_SENSE_PRESENT; + default: + g_assert_not_reached(); } } @@ -344,23 +374,18 @@ void spapr_drc_attach(sPAPRDRConnector *drc, DeviceState *d, void *fdt, { trace_spapr_drc_attach(spapr_drc_index(drc)); - if (drc->isolation_state != SPAPR_DR_ISOLATION_STATE_ISOLATED) { + if (drc->dev) { error_setg(errp, "an attached device is still awaiting release"); return; } - if (spapr_drc_type(drc) == SPAPR_DR_CONNECTOR_TYPE_PCI) { - g_assert(drc->allocation_state == SPAPR_DR_ALLOCATION_STATE_USABLE); - } + g_assert((drc->state == SPAPR_DRC_STATE_LOGICAL_UNUSABLE) + || (drc->state == SPAPR_DRC_STATE_PHYSICAL_POWERON)); g_assert(fdt); drc->dev = d; drc->fdt = fdt; drc->fdt_start_offset = fdt_start_offset; - if (spapr_drc_type(drc) != SPAPR_DR_CONNECTOR_TYPE_PCI) { - drc->awaiting_allocation = true; - } - object_property_add_link(OBJECT(drc), "device", object_get_typename(OBJECT(drc->dev)), (Object **)(&drc->dev), @@ -373,85 +398,65 @@ static void spapr_drc_release(sPAPRDRConnector *drc) drck->release(drc->dev); - drc->awaiting_release = false; + drc->unplug_requested = false; g_free(drc->fdt); drc->fdt = NULL; drc->fdt_start_offset = 0; - object_property_del(OBJECT(drc), "device", NULL); + object_property_del(OBJECT(drc), "device", &error_abort); drc->dev = NULL; } -void spapr_drc_detach(sPAPRDRConnector *drc, DeviceState *d, Error **errp) +void spapr_drc_detach(sPAPRDRConnector *drc) { + sPAPRDRConnectorClass *drck = SPAPR_DR_CONNECTOR_GET_CLASS(drc); + trace_spapr_drc_detach(spapr_drc_index(drc)); - if (drc->isolation_state != SPAPR_DR_ISOLATION_STATE_ISOLATED) { - trace_spapr_drc_awaiting_isolated(spapr_drc_index(drc)); - drc->awaiting_release = true; - return; - } + g_assert(drc->dev); - if (spapr_drc_type(drc) != SPAPR_DR_CONNECTOR_TYPE_PCI && - drc->allocation_state != SPAPR_DR_ALLOCATION_STATE_UNUSABLE) { - trace_spapr_drc_awaiting_unusable(spapr_drc_index(drc)); - drc->awaiting_release = true; - return; - } + drc->unplug_requested = true; - if (drc->awaiting_allocation) { - drc->awaiting_release = true; - trace_spapr_drc_awaiting_allocation(spapr_drc_index(drc)); + if (drc->state != drck->empty_state) { + trace_spapr_drc_awaiting_quiesce(spapr_drc_index(drc)); return; } spapr_drc_release(drc); } -static bool release_pending(sPAPRDRConnector *drc) +void spapr_drc_reset(sPAPRDRConnector *drc) { - return drc->awaiting_release; -} - -static void drc_reset(void *opaque) -{ - sPAPRDRConnector *drc = SPAPR_DR_CONNECTOR(opaque); + sPAPRDRConnectorClass *drck = SPAPR_DR_CONNECTOR_GET_CLASS(drc); trace_spapr_drc_reset(spapr_drc_index(drc)); - g_free(drc->ccs); - drc->ccs = NULL; - /* immediately upon reset we can safely assume DRCs whose devices * are pending removal can be safely removed. */ - if (drc->awaiting_release) { + if (drc->unplug_requested) { spapr_drc_release(drc); } - drc->awaiting_allocation = false; - if (drc->dev) { - /* A device present at reset is coldplugged */ - drc->isolation_state = SPAPR_DR_ISOLATION_STATE_UNISOLATED; - if (spapr_drc_type(drc) != SPAPR_DR_CONNECTOR_TYPE_PCI) { - drc->allocation_state = SPAPR_DR_ALLOCATION_STATE_USABLE; - } - drc->dr_indicator = SPAPR_DR_INDICATOR_ACTIVE; + /* A device present at reset is ready to go, same as coldplugged */ + drc->state = drck->ready_state; } else { - /* Otherwise device is absent, but might be hotplugged */ - drc->isolation_state = SPAPR_DR_ISOLATION_STATE_ISOLATED; - if (spapr_drc_type(drc) != SPAPR_DR_CONNECTOR_TYPE_PCI) { - drc->allocation_state = SPAPR_DR_ALLOCATION_STATE_UNUSABLE; - } - drc->dr_indicator = SPAPR_DR_INDICATOR_INACTIVE; + drc->state = drck->empty_state; } + + drc->ccs_offset = -1; + drc->ccs_depth = -1; +} + +static void drc_reset(void *opaque) +{ + spapr_drc_reset(SPAPR_DR_CONNECTOR(opaque)); } static bool spapr_drc_needed(void *opaque) { sPAPRDRConnector *drc = (sPAPRDRConnector *)opaque; sPAPRDRConnectorClass *drck = SPAPR_DR_CONNECTOR_GET_CLASS(drc); - bool rc = false; sPAPRDREntitySense value = drck->dr_entity_sense(drc); /* If no dev is plugged in there is no need to migrate the DRC state */ @@ -460,23 +465,10 @@ static bool spapr_drc_needed(void *opaque) } /* - * If there is dev plugged in, we need to migrate the DRC state when - * it is different from cold-plugged state - */ - switch (spapr_drc_type(drc)) { - case SPAPR_DR_CONNECTOR_TYPE_PCI: - case SPAPR_DR_CONNECTOR_TYPE_CPU: - case SPAPR_DR_CONNECTOR_TYPE_LMB: - rc = !((drc->isolation_state == SPAPR_DR_ISOLATION_STATE_UNISOLATED) && - (drc->allocation_state == SPAPR_DR_ALLOCATION_STATE_USABLE) && - drc->configured && !drc->awaiting_release); - break; - case SPAPR_DR_CONNECTOR_TYPE_PHB: - case SPAPR_DR_CONNECTOR_TYPE_VIO: - default: - g_assert_not_reached(); - } - return rc; + * We need to migrate the state if it's not equal to the expected + * long-term state, which is the same as the coldplugged initial + * state */ + return (drc->state != drck->ready_state); } static const VMStateDescription vmstate_spapr_drc = { @@ -485,12 +477,7 @@ static const VMStateDescription vmstate_spapr_drc = { .minimum_version_id = 1, .needed = spapr_drc_needed, .fields = (VMStateField []) { - VMSTATE_UINT32(isolation_state, sPAPRDRConnector), - VMSTATE_UINT32(allocation_state, sPAPRDRConnector), - VMSTATE_UINT32(dr_indicator, sPAPRDRConnector), - VMSTATE_BOOL(configured, sPAPRDRConnector), - VMSTATE_BOOL(awaiting_release, sPAPRDRConnector), - VMSTATE_BOOL(awaiting_allocation, sPAPRDRConnector), + VMSTATE_UINT32(state, sPAPRDRConnector), VMSTATE_END_OF_LIST() } }; @@ -559,46 +546,96 @@ sPAPRDRConnector *spapr_dr_connector_new(Object *owner, const char *type, object_property_set_bool(OBJECT(drc), true, "realized", NULL); g_free(prop_name); - /* PCI slot always start in a USABLE state, and stay there */ - if (spapr_drc_type(drc) == SPAPR_DR_CONNECTOR_TYPE_PCI) { - drc->allocation_state = SPAPR_DR_ALLOCATION_STATE_USABLE; - } - return drc; } static void spapr_dr_connector_instance_init(Object *obj) { sPAPRDRConnector *drc = SPAPR_DR_CONNECTOR(obj); + sPAPRDRConnectorClass *drck = SPAPR_DR_CONNECTOR_GET_CLASS(drc); object_property_add_uint32_ptr(obj, "id", &drc->id, NULL); object_property_add(obj, "index", "uint32", prop_get_index, NULL, NULL, NULL, NULL); object_property_add(obj, "fdt", "struct", prop_get_fdt, NULL, NULL, NULL, NULL); + drc->state = drck->empty_state; } static void spapr_dr_connector_class_init(ObjectClass *k, void *data) { DeviceClass *dk = DEVICE_CLASS(k); - sPAPRDRConnectorClass *drck = SPAPR_DR_CONNECTOR_CLASS(k); dk->realize = realize; dk->unrealize = unrealize; - drck->release_pending = release_pending; /* * Reason: it crashes FIXME find and document the real reason */ dk->user_creatable = false; } +static bool drc_physical_needed(void *opaque) +{ + sPAPRDRCPhysical *drcp = (sPAPRDRCPhysical *)opaque; + sPAPRDRConnector *drc = SPAPR_DR_CONNECTOR(drcp); + + if ((drc->dev && (drcp->dr_indicator == SPAPR_DR_INDICATOR_ACTIVE)) + || (!drc->dev && (drcp->dr_indicator == SPAPR_DR_INDICATOR_INACTIVE))) { + return false; + } + return true; +} + +static const VMStateDescription vmstate_spapr_drc_physical = { + .name = "spapr_drc/physical", + .version_id = 1, + .minimum_version_id = 1, + .needed = drc_physical_needed, + .fields = (VMStateField []) { + VMSTATE_UINT32(dr_indicator, sPAPRDRCPhysical), + VMSTATE_END_OF_LIST() + } +}; + +static void drc_physical_reset(void *opaque) +{ + sPAPRDRConnector *drc = SPAPR_DR_CONNECTOR(opaque); + sPAPRDRCPhysical *drcp = SPAPR_DRC_PHYSICAL(drc); + + if (drc->dev) { + drcp->dr_indicator = SPAPR_DR_INDICATOR_ACTIVE; + } else { + drcp->dr_indicator = SPAPR_DR_INDICATOR_INACTIVE; + } +} + +static void realize_physical(DeviceState *d, Error **errp) +{ + sPAPRDRCPhysical *drcp = SPAPR_DRC_PHYSICAL(d); + Error *local_err = NULL; + + realize(d, &local_err); + if (local_err) { + error_propagate(errp, local_err); + return; + } + + vmstate_register(DEVICE(drcp), spapr_drc_index(SPAPR_DR_CONNECTOR(drcp)), + &vmstate_spapr_drc_physical, drcp); + qemu_register_reset(drc_physical_reset, drcp); +} + static void spapr_drc_physical_class_init(ObjectClass *k, void *data) { + DeviceClass *dk = DEVICE_CLASS(k); sPAPRDRConnectorClass *drck = SPAPR_DR_CONNECTOR_CLASS(k); + dk->realize = realize_physical; drck->dr_entity_sense = physical_entity_sense; drck->isolate = drc_isolate_physical; drck->unisolate = drc_unisolate_physical; + drck->ready_state = SPAPR_DRC_STATE_PHYSICAL_CONFIGURED; + drck->empty_state = SPAPR_DRC_STATE_PHYSICAL_POWERON; } static void spapr_drc_logical_class_init(ObjectClass *k, void *data) @@ -608,6 +645,8 @@ static void spapr_drc_logical_class_init(ObjectClass *k, void *data) drck->dr_entity_sense = logical_entity_sense; drck->isolate = drc_isolate_logical; drck->unisolate = drc_unisolate_logical; + drck->ready_state = SPAPR_DRC_STATE_LOGICAL_CONFIGURED; + drck->empty_state = SPAPR_DRC_STATE_LOGICAL_UNUSABLE; } static void spapr_drc_cpu_class_init(ObjectClass *k, void *data) @@ -653,7 +692,7 @@ static const TypeInfo spapr_dr_connector_info = { static const TypeInfo spapr_drc_physical_info = { .name = TYPE_SPAPR_DRC_PHYSICAL, .parent = TYPE_SPAPR_DR_CONNECTOR, - .instance_size = sizeof(sPAPRDRConnector), + .instance_size = sizeof(sPAPRDRCPhysical), .class_init = spapr_drc_physical_class_init, .abstract = true, }; @@ -661,7 +700,6 @@ static const TypeInfo spapr_drc_physical_info = { static const TypeInfo spapr_drc_logical_info = { .name = TYPE_SPAPR_DRC_LOGICAL, .parent = TYPE_SPAPR_DR_CONNECTOR, - .instance_size = sizeof(sPAPRDRConnector), .class_init = spapr_drc_logical_class_init, .abstract = true, }; @@ -669,21 +707,18 @@ static const TypeInfo spapr_drc_logical_info = { static const TypeInfo spapr_drc_cpu_info = { .name = TYPE_SPAPR_DRC_CPU, .parent = TYPE_SPAPR_DRC_LOGICAL, - .instance_size = sizeof(sPAPRDRConnector), .class_init = spapr_drc_cpu_class_init, }; static const TypeInfo spapr_drc_pci_info = { .name = TYPE_SPAPR_DRC_PCI, .parent = TYPE_SPAPR_DRC_PHYSICAL, - .instance_size = sizeof(sPAPRDRConnector), .class_init = spapr_drc_pci_class_init, }; static const TypeInfo spapr_drc_lmb_info = { .name = TYPE_SPAPR_DRC_LMB, .parent = TYPE_SPAPR_DRC_LOGICAL, - .instance_size = sizeof(sPAPRDRConnector), .class_init = spapr_drc_lmb_class_init, }; @@ -896,12 +931,18 @@ static uint32_t rtas_set_dr_indicator(uint32_t idx, uint32_t state) { sPAPRDRConnector *drc = spapr_drc_by_index(idx); - if (!drc) { - return RTAS_OUT_PARAM_ERROR; + if (!drc || !object_dynamic_cast(OBJECT(drc), TYPE_SPAPR_DRC_PHYSICAL)) { + return RTAS_OUT_NO_SUCH_INDICATOR; + } + if ((state != SPAPR_DR_INDICATOR_INACTIVE) + && (state != SPAPR_DR_INDICATOR_ACTIVE) + && (state != SPAPR_DR_INDICATOR_IDENTIFY) + && (state != SPAPR_DR_INDICATOR_ACTION)) { + return RTAS_OUT_PARAM_ERROR; /* bad state parameter */ } trace_spapr_drc_set_dr_indicator(idx, state); - drc->dr_indicator = state; + SPAPR_DRC_PHYSICAL(drc)->dr_indicator = state; return RTAS_OUT_SUCCESS; } @@ -1011,7 +1052,7 @@ static void rtas_ibm_configure_connector(PowerPCCPU *cpu, uint64_t wa_offset; uint32_t drc_index; sPAPRDRConnector *drc; - sPAPRConfigureConnectorState *ccs; + sPAPRDRConnectorClass *drck; sPAPRDRCCResponse resp = SPAPR_DR_CC_RESPONSE_CONTINUE; int rc; @@ -1030,18 +1071,16 @@ static void rtas_ibm_configure_connector(PowerPCCPU *cpu, goto out; } - if (!drc->fdt) { - trace_spapr_rtas_ibm_configure_connector_missing_fdt(drc_index); + if ((drc->state != SPAPR_DRC_STATE_LOGICAL_UNISOLATE) + && (drc->state != SPAPR_DRC_STATE_PHYSICAL_UNISOLATE)) { + /* Need to unisolate the device before configuring */ rc = SPAPR_DR_CC_RESPONSE_NOT_CONFIGURABLE; goto out; } - ccs = drc->ccs; - if (!ccs) { - ccs = g_new0(sPAPRConfigureConnectorState, 1); - ccs->fdt_offset = drc->fdt_start_offset; - drc->ccs = ccs; - } + g_assert(drc->fdt); + + drck = SPAPR_DR_CONNECTOR_GET_CLASS(drc); do { uint32_t tag; @@ -1049,12 +1088,12 @@ static void rtas_ibm_configure_connector(PowerPCCPU *cpu, const struct fdt_property *prop; int fdt_offset_next, prop_len; - tag = fdt_next_tag(drc->fdt, ccs->fdt_offset, &fdt_offset_next); + tag = fdt_next_tag(drc->fdt, drc->ccs_offset, &fdt_offset_next); switch (tag) { case FDT_BEGIN_NODE: - ccs->fdt_depth++; - name = fdt_get_name(drc->fdt, ccs->fdt_offset, NULL); + drc->ccs_depth++; + name = fdt_get_name(drc->fdt, drc->ccs_offset, NULL); /* provide the name of the next OF node */ wa_offset = CC_VAL_DATA_OFFSET; @@ -1063,30 +1102,22 @@ static void rtas_ibm_configure_connector(PowerPCCPU *cpu, resp = SPAPR_DR_CC_RESPONSE_NEXT_CHILD; break; case FDT_END_NODE: - ccs->fdt_depth--; - if (ccs->fdt_depth == 0) { - sPAPRDRIsolationState state = drc->isolation_state; + drc->ccs_depth--; + if (drc->ccs_depth == 0) { uint32_t drc_index = spapr_drc_index(drc); - /* done sending the device tree, don't need to track - * the state anymore - */ + + /* done sending the device tree, move to configured state */ trace_spapr_drc_set_configured(drc_index); - if (state == SPAPR_DR_ISOLATION_STATE_UNISOLATED) { - drc->configured = true; - } else { - /* guest should be not configuring an isolated device */ - trace_spapr_drc_set_configured_skipping(drc_index); - } - g_free(ccs); - drc->ccs = NULL; - ccs = NULL; + drc->state = drck->ready_state; + drc->ccs_offset = -1; + drc->ccs_depth = -1; resp = SPAPR_DR_CC_RESPONSE_SUCCESS; } else { resp = SPAPR_DR_CC_RESPONSE_PREV_PARENT; } break; case FDT_PROP: - prop = fdt_get_property_by_offset(drc->fdt, ccs->fdt_offset, + prop = fdt_get_property_by_offset(drc->fdt, drc->ccs_offset, &prop_len); name = fdt_string(drc->fdt, fdt32_to_cpu(prop->nameoff)); @@ -1111,8 +1142,8 @@ static void rtas_ibm_configure_connector(PowerPCCPU *cpu, /* keep seeking for an actionable tag */ break; } - if (ccs) { - ccs->fdt_offset = fdt_offset_next; + if (drc->ccs_offset >= 0) { + drc->ccs_offset = fdt_offset_next; } } while (resp == SPAPR_DR_CC_RESPONSE_CONTINUE); diff --git a/hw/ppc/spapr_events.c b/hw/ppc/spapr_events.c index 587a3dacb2..f952b78237 100644 --- a/hw/ppc/spapr_events.c +++ b/hw/ppc/spapr_events.c @@ -42,8 +42,6 @@ #include "hw/ppc/spapr_ovec.h" #include <libfdt.h> -struct rtas_error_log { - uint32_t summary; #define RTAS_LOG_VERSION_MASK 0xff000000 #define RTAS_LOG_VERSION_6 0x06000000 #define RTAS_LOG_SEVERITY_MASK 0x00e00000 @@ -85,6 +83,9 @@ struct rtas_error_log { #define RTAS_LOG_TYPE_ECC_CORR 0x0000000a #define RTAS_LOG_TYPE_EPOW 0x00000040 #define RTAS_LOG_TYPE_HOTPLUG 0x000000e5 + +struct rtas_error_log { + uint32_t summary; uint32_t extended_length; } QEMU_PACKED; @@ -166,8 +167,7 @@ struct rtas_event_log_v6_epow { uint64_t reason_code; } QEMU_PACKED; -struct epow_log_full { - struct rtas_error_log hdr; +struct epow_extended_log { struct rtas_event_log_v6 v6hdr; struct rtas_event_log_v6_maina maina; struct rtas_event_log_v6_mainb mainb; @@ -205,8 +205,7 @@ struct rtas_event_log_v6_hp { union drc_identifier drc_id; } QEMU_PACKED; -struct hp_log_full { - struct rtas_error_log hdr; +struct hp_extended_log { struct rtas_event_log_v6 v6hdr; struct rtas_event_log_v6_maina maina; struct rtas_event_log_v6_mainb mainb; @@ -341,25 +340,26 @@ static int rtas_event_log_to_irq(sPAPRMachineState *spapr, int log_type) return source->irq; } -static void rtas_event_log_queue(int log_type, void *data) +static uint32_t spapr_event_log_entry_type(sPAPREventLogEntry *entry) { - sPAPRMachineState *spapr = SPAPR_MACHINE(qdev_get_machine()); - sPAPREventLogEntry *entry = g_new(sPAPREventLogEntry, 1); + return entry->summary & RTAS_LOG_TYPE_MASK; +} - g_assert(data); - entry->log_type = log_type; - entry->data = data; +static void rtas_event_log_queue(sPAPRMachineState *spapr, + sPAPREventLogEntry *entry) +{ QTAILQ_INSERT_TAIL(&spapr->pending_events, entry, next); } -static sPAPREventLogEntry *rtas_event_log_dequeue(uint32_t event_mask) +static sPAPREventLogEntry *rtas_event_log_dequeue(sPAPRMachineState *spapr, + uint32_t event_mask) { - sPAPRMachineState *spapr = SPAPR_MACHINE(qdev_get_machine()); sPAPREventLogEntry *entry = NULL; QTAILQ_FOREACH(entry, &spapr->pending_events, next) { const sPAPREventSource *source = - rtas_event_log_to_source(spapr, entry->log_type); + rtas_event_log_to_source(spapr, + spapr_event_log_entry_type(entry)); if (source->mask & event_mask) { break; @@ -380,7 +380,8 @@ static bool rtas_event_log_contains(uint32_t event_mask) QTAILQ_FOREACH(entry, &spapr->pending_events, next) { const sPAPREventSource *source = - rtas_event_log_to_source(spapr, entry->log_type); + rtas_event_log_to_source(spapr, + spapr_event_log_entry_type(entry)); if (source->mask & event_mask) { return true; @@ -428,27 +429,28 @@ static void spapr_init_maina(struct rtas_event_log_v6_maina *maina, static void spapr_powerdown_req(Notifier *n, void *opaque) { sPAPRMachineState *spapr = SPAPR_MACHINE(qdev_get_machine()); - struct rtas_error_log *hdr; + sPAPREventLogEntry *entry; struct rtas_event_log_v6 *v6hdr; struct rtas_event_log_v6_maina *maina; struct rtas_event_log_v6_mainb *mainb; struct rtas_event_log_v6_epow *epow; - struct epow_log_full *new_epow; + struct epow_extended_log *new_epow; + entry = g_new(sPAPREventLogEntry, 1); new_epow = g_malloc0(sizeof(*new_epow)); - hdr = &new_epow->hdr; + entry->extended_log = new_epow; + v6hdr = &new_epow->v6hdr; maina = &new_epow->maina; mainb = &new_epow->mainb; epow = &new_epow->epow; - hdr->summary = cpu_to_be32(RTAS_LOG_VERSION_6 - | RTAS_LOG_SEVERITY_EVENT - | RTAS_LOG_DISPOSITION_NOT_RECOVERED - | RTAS_LOG_OPTIONAL_PART_PRESENT - | RTAS_LOG_TYPE_EPOW); - hdr->extended_length = cpu_to_be32(sizeof(*new_epow) - - sizeof(new_epow->hdr)); + entry->summary = RTAS_LOG_VERSION_6 + | RTAS_LOG_SEVERITY_EVENT + | RTAS_LOG_DISPOSITION_NOT_RECOVERED + | RTAS_LOG_OPTIONAL_PART_PRESENT + | RTAS_LOG_TYPE_EPOW; + entry->extended_length = sizeof(*new_epow); spapr_init_v6hdr(v6hdr); spapr_init_maina(maina, 3 /* Main-A, Main-B and EPOW */); @@ -468,7 +470,7 @@ static void spapr_powerdown_req(Notifier *n, void *opaque) epow->event_modifier = RTAS_LOG_V6_EPOW_MODIFIER_NORMAL; epow->extended_modifier = RTAS_LOG_V6_EPOW_XMODIFIER_PARTITION_SPECIFIC; - rtas_event_log_queue(RTAS_LOG_TYPE_EPOW, new_epow); + rtas_event_log_queue(spapr, entry); qemu_irq_pulse(xics_get_qirq(XICS_FABRIC(spapr), rtas_event_log_to_irq(spapr, @@ -480,28 +482,29 @@ static void spapr_hotplug_req_event(uint8_t hp_id, uint8_t hp_action, union drc_identifier *drc_id) { sPAPRMachineState *spapr = SPAPR_MACHINE(qdev_get_machine()); - struct hp_log_full *new_hp; - struct rtas_error_log *hdr; + sPAPREventLogEntry *entry; + struct hp_extended_log *new_hp; struct rtas_event_log_v6 *v6hdr; struct rtas_event_log_v6_maina *maina; struct rtas_event_log_v6_mainb *mainb; struct rtas_event_log_v6_hp *hp; - new_hp = g_malloc0(sizeof(struct hp_log_full)); - hdr = &new_hp->hdr; + entry = g_new(sPAPREventLogEntry, 1); + new_hp = g_malloc0(sizeof(struct hp_extended_log)); + entry->extended_log = new_hp; + v6hdr = &new_hp->v6hdr; maina = &new_hp->maina; mainb = &new_hp->mainb; hp = &new_hp->hp; - hdr->summary = cpu_to_be32(RTAS_LOG_VERSION_6 - | RTAS_LOG_SEVERITY_EVENT - | RTAS_LOG_DISPOSITION_NOT_RECOVERED - | RTAS_LOG_OPTIONAL_PART_PRESENT - | RTAS_LOG_INITIATOR_HOTPLUG - | RTAS_LOG_TYPE_HOTPLUG); - hdr->extended_length = cpu_to_be32(sizeof(*new_hp) - - sizeof(new_hp->hdr)); + entry->summary = RTAS_LOG_VERSION_6 + | RTAS_LOG_SEVERITY_EVENT + | RTAS_LOG_DISPOSITION_NOT_RECOVERED + | RTAS_LOG_OPTIONAL_PART_PRESENT + | RTAS_LOG_INITIATOR_HOTPLUG + | RTAS_LOG_TYPE_HOTPLUG; + entry->extended_length = sizeof(*new_hp); spapr_init_v6hdr(v6hdr); spapr_init_maina(maina, 3 /* Main-A, Main-B, HP */); @@ -551,7 +554,7 @@ static void spapr_hotplug_req_event(uint8_t hp_id, uint8_t hp_action, cpu_to_be32(drc_id->count_indexed.index); } - rtas_event_log_queue(RTAS_LOG_TYPE_HOTPLUG, new_hp); + rtas_event_log_queue(spapr, entry); qemu_irq_pulse(xics_get_qirq(XICS_FABRIC(spapr), rtas_event_log_to_irq(spapr, @@ -628,7 +631,7 @@ static void check_exception(PowerPCCPU *cpu, sPAPRMachineState *spapr, uint32_t mask, buf, len, event_len; uint64_t xinfo; sPAPREventLogEntry *event; - struct rtas_error_log *hdr; + struct rtas_error_log header; int i; if ((nargs < 6) || (nargs > 7) || nret != 1) { @@ -644,21 +647,24 @@ static void check_exception(PowerPCCPU *cpu, sPAPRMachineState *spapr, xinfo |= (uint64_t)rtas_ld(args, 6) << 32; } - event = rtas_event_log_dequeue(mask); + event = rtas_event_log_dequeue(spapr, mask); if (!event) { goto out_no_events; } - hdr = event->data; - event_len = be32_to_cpu(hdr->extended_length) + sizeof(*hdr); + event_len = event->extended_length + sizeof(header); if (event_len < len) { len = event_len; } - cpu_physical_memory_write(buf, event->data, len); + header.summary = cpu_to_be32(event->summary); + header.extended_length = cpu_to_be32(event->extended_length); + cpu_physical_memory_write(buf, &header, sizeof(header)); + cpu_physical_memory_write(buf + sizeof(header), event->extended_log, + event->extended_length); rtas_st(rets, 0, RTAS_OUT_SUCCESS); - g_free(event->data); + g_free(event->extended_log); g_free(event); /* according to PAPR+, the IRQ must be left asserted, or re-asserted, if diff --git a/hw/ppc/spapr_hcall.c b/hw/ppc/spapr_hcall.c index 8624ce8d5b..72ea5a8247 100644 --- a/hw/ppc/spapr_hcall.c +++ b/hw/ppc/spapr_hcall.c @@ -3,6 +3,7 @@ #include "sysemu/hw_accel.h" #include "sysemu/sysemu.h" #include "qemu/log.h" +#include "qemu/error-report.h" #include "cpu.h" #include "exec/exec-all.h" #include "helper_regs.h" @@ -354,6 +355,401 @@ static target_ulong h_read(PowerPCCPU *cpu, sPAPRMachineState *spapr, return H_SUCCESS; } +struct sPAPRPendingHPT { + /* These fields are read-only after initialization */ + int shift; + QemuThread thread; + + /* These fields are protected by the BQL */ + bool complete; + + /* These fields are private to the preparation thread if + * !complete, otherwise protected by the BQL */ + int ret; + void *hpt; +}; + +static void free_pending_hpt(sPAPRPendingHPT *pending) +{ + if (pending->hpt) { + qemu_vfree(pending->hpt); + } + + g_free(pending); +} + +static void *hpt_prepare_thread(void *opaque) +{ + sPAPRPendingHPT *pending = opaque; + size_t size = 1ULL << pending->shift; + + pending->hpt = qemu_memalign(size, size); + if (pending->hpt) { + memset(pending->hpt, 0, size); + pending->ret = H_SUCCESS; + } else { + pending->ret = H_NO_MEM; + } + + qemu_mutex_lock_iothread(); + + if (SPAPR_MACHINE(qdev_get_machine())->pending_hpt == pending) { + /* Ready to go */ + pending->complete = true; + } else { + /* We've been cancelled, clean ourselves up */ + free_pending_hpt(pending); + } + + qemu_mutex_unlock_iothread(); + return NULL; +} + +/* Must be called with BQL held */ +static void cancel_hpt_prepare(sPAPRMachineState *spapr) +{ + sPAPRPendingHPT *pending = spapr->pending_hpt; + + /* Let the thread know it's cancelled */ + spapr->pending_hpt = NULL; + + if (!pending) { + /* Nothing to do */ + return; + } + + if (!pending->complete) { + /* thread will clean itself up */ + return; + } + + free_pending_hpt(pending); +} + +/* Convert a return code from the KVM ioctl()s implementing resize HPT + * into a PAPR hypercall return code */ +static target_ulong resize_hpt_convert_rc(int ret) +{ + if (ret >= 100000) { + return H_LONG_BUSY_ORDER_100_SEC; + } else if (ret >= 10000) { + return H_LONG_BUSY_ORDER_10_SEC; + } else if (ret >= 1000) { + return H_LONG_BUSY_ORDER_1_SEC; + } else if (ret >= 100) { + return H_LONG_BUSY_ORDER_100_MSEC; + } else if (ret >= 10) { + return H_LONG_BUSY_ORDER_10_MSEC; + } else if (ret > 0) { + return H_LONG_BUSY_ORDER_1_MSEC; + } + + switch (ret) { + case 0: + return H_SUCCESS; + case -EPERM: + return H_AUTHORITY; + case -EINVAL: + return H_PARAMETER; + case -ENXIO: + return H_CLOSED; + case -ENOSPC: + return H_PTEG_FULL; + case -EBUSY: + return H_BUSY; + case -ENOMEM: + return H_NO_MEM; + default: + return H_HARDWARE; + } +} + +static target_ulong h_resize_hpt_prepare(PowerPCCPU *cpu, + sPAPRMachineState *spapr, + target_ulong opcode, + target_ulong *args) +{ + target_ulong flags = args[0]; + int shift = args[1]; + sPAPRPendingHPT *pending = spapr->pending_hpt; + uint64_t current_ram_size = MACHINE(spapr)->ram_size; + int rc; + + if (spapr->resize_hpt == SPAPR_RESIZE_HPT_DISABLED) { + return H_AUTHORITY; + } + + if (!spapr->htab_shift) { + /* Radix guest, no HPT */ + return H_NOT_AVAILABLE; + } + + trace_spapr_h_resize_hpt_prepare(flags, shift); + + if (flags != 0) { + return H_PARAMETER; + } + + if (shift && ((shift < 18) || (shift > 46))) { + return H_PARAMETER; + } + + current_ram_size = pc_existing_dimms_capacity(&error_fatal); + + /* We only allow the guest to allocate an HPT one order above what + * we'd normally give them (to stop a small guest claiming a huge + * chunk of resources in the HPT */ + if (shift > (spapr_hpt_shift_for_ramsize(current_ram_size) + 1)) { + return H_RESOURCE; + } + + rc = kvmppc_resize_hpt_prepare(cpu, flags, shift); + if (rc != -ENOSYS) { + return resize_hpt_convert_rc(rc); + } + + if (pending) { + /* something already in progress */ + if (pending->shift == shift) { + /* and it's suitable */ + if (pending->complete) { + return pending->ret; + } else { + return H_LONG_BUSY_ORDER_100_MSEC; + } + } + + /* not suitable, cancel and replace */ + cancel_hpt_prepare(spapr); + } + + if (!shift) { + /* nothing to do */ + return H_SUCCESS; + } + + /* start new prepare */ + + pending = g_new0(sPAPRPendingHPT, 1); + pending->shift = shift; + pending->ret = H_HARDWARE; + + qemu_thread_create(&pending->thread, "sPAPR HPT prepare", + hpt_prepare_thread, pending, QEMU_THREAD_DETACHED); + + spapr->pending_hpt = pending; + + /* In theory we could estimate the time more accurately based on + * the new size, but there's not much point */ + return H_LONG_BUSY_ORDER_100_MSEC; +} + +static uint64_t new_hpte_load0(void *htab, uint64_t pteg, int slot) +{ + uint8_t *addr = htab; + + addr += pteg * HASH_PTEG_SIZE_64; + addr += slot * HASH_PTE_SIZE_64; + return ldq_p(addr); +} + +static void new_hpte_store(void *htab, uint64_t pteg, int slot, + uint64_t pte0, uint64_t pte1) +{ + uint8_t *addr = htab; + + addr += pteg * HASH_PTEG_SIZE_64; + addr += slot * HASH_PTE_SIZE_64; + + stq_p(addr, pte0); + stq_p(addr + HASH_PTE_SIZE_64 / 2, pte1); +} + +static int rehash_hpte(PowerPCCPU *cpu, + const ppc_hash_pte64_t *hptes, + void *old_hpt, uint64_t oldsize, + void *new_hpt, uint64_t newsize, + uint64_t pteg, int slot) +{ + uint64_t old_hash_mask = (oldsize >> 7) - 1; + uint64_t new_hash_mask = (newsize >> 7) - 1; + target_ulong pte0 = ppc_hash64_hpte0(cpu, hptes, slot); + target_ulong pte1; + uint64_t avpn; + unsigned base_pg_shift; + uint64_t hash, new_pteg, replace_pte0; + + if (!(pte0 & HPTE64_V_VALID) || !(pte0 & HPTE64_V_BOLTED)) { + return H_SUCCESS; + } + + pte1 = ppc_hash64_hpte1(cpu, hptes, slot); + + base_pg_shift = ppc_hash64_hpte_page_shift_noslb(cpu, pte0, pte1); + assert(base_pg_shift); /* H_ENTER shouldn't allow a bad encoding */ + avpn = HPTE64_V_AVPN_VAL(pte0) & ~(((1ULL << base_pg_shift) - 1) >> 23); + + if (pte0 & HPTE64_V_SECONDARY) { + pteg = ~pteg; + } + + if ((pte0 & HPTE64_V_SSIZE) == HPTE64_V_SSIZE_256M) { + uint64_t offset, vsid; + + /* We only have 28 - 23 bits of offset in avpn */ + offset = (avpn & 0x1f) << 23; + vsid = avpn >> 5; + /* We can find more bits from the pteg value */ + if (base_pg_shift < 23) { + offset |= ((vsid ^ pteg) & old_hash_mask) << base_pg_shift; + } + + hash = vsid ^ (offset >> base_pg_shift); + } else if ((pte0 & HPTE64_V_SSIZE) == HPTE64_V_SSIZE_1T) { + uint64_t offset, vsid; + + /* We only have 40 - 23 bits of seg_off in avpn */ + offset = (avpn & 0x1ffff) << 23; + vsid = avpn >> 17; + if (base_pg_shift < 23) { + offset |= ((vsid ^ (vsid << 25) ^ pteg) & old_hash_mask) + << base_pg_shift; + } + + hash = vsid ^ (vsid << 25) ^ (offset >> base_pg_shift); + } else { + error_report("rehash_pte: Bad segment size in HPTE"); + return H_HARDWARE; + } + + new_pteg = hash & new_hash_mask; + if (pte0 & HPTE64_V_SECONDARY) { + assert(~pteg == (hash & old_hash_mask)); + new_pteg = ~new_pteg; + } else { + assert(pteg == (hash & old_hash_mask)); + } + assert((oldsize != newsize) || (pteg == new_pteg)); + replace_pte0 = new_hpte_load0(new_hpt, new_pteg, slot); + /* + * Strictly speaking, we don't need all these tests, since we only + * ever rehash bolted HPTEs. We might in future handle non-bolted + * HPTEs, though so make the logic correct for those cases as + * well. + */ + if (replace_pte0 & HPTE64_V_VALID) { + assert(newsize < oldsize); + if (replace_pte0 & HPTE64_V_BOLTED) { + if (pte0 & HPTE64_V_BOLTED) { + /* Bolted collision, nothing we can do */ + return H_PTEG_FULL; + } else { + /* Discard this hpte */ + return H_SUCCESS; + } + } + } + + new_hpte_store(new_hpt, new_pteg, slot, pte0, pte1); + return H_SUCCESS; +} + +static int rehash_hpt(PowerPCCPU *cpu, + void *old_hpt, uint64_t oldsize, + void *new_hpt, uint64_t newsize) +{ + uint64_t n_ptegs = oldsize >> 7; + uint64_t pteg; + int slot; + int rc; + + for (pteg = 0; pteg < n_ptegs; pteg++) { + hwaddr ptex = pteg * HPTES_PER_GROUP; + const ppc_hash_pte64_t *hptes + = ppc_hash64_map_hptes(cpu, ptex, HPTES_PER_GROUP); + + if (!hptes) { + return H_HARDWARE; + } + + for (slot = 0; slot < HPTES_PER_GROUP; slot++) { + rc = rehash_hpte(cpu, hptes, old_hpt, oldsize, new_hpt, newsize, + pteg, slot); + if (rc != H_SUCCESS) { + ppc_hash64_unmap_hptes(cpu, hptes, ptex, HPTES_PER_GROUP); + return rc; + } + } + ppc_hash64_unmap_hptes(cpu, hptes, ptex, HPTES_PER_GROUP); + } + + return H_SUCCESS; +} + +static target_ulong h_resize_hpt_commit(PowerPCCPU *cpu, + sPAPRMachineState *spapr, + target_ulong opcode, + target_ulong *args) +{ + target_ulong flags = args[0]; + target_ulong shift = args[1]; + sPAPRPendingHPT *pending = spapr->pending_hpt; + int rc; + size_t newsize; + + if (spapr->resize_hpt == SPAPR_RESIZE_HPT_DISABLED) { + return H_AUTHORITY; + } + + trace_spapr_h_resize_hpt_commit(flags, shift); + + rc = kvmppc_resize_hpt_commit(cpu, flags, shift); + if (rc != -ENOSYS) { + return resize_hpt_convert_rc(rc); + } + + if (flags != 0) { + return H_PARAMETER; + } + + if (!pending || (pending->shift != shift)) { + /* no matching prepare */ + return H_CLOSED; + } + + if (!pending->complete) { + /* prepare has not completed */ + return H_BUSY; + } + + /* Shouldn't have got past PREPARE without an HPT */ + g_assert(spapr->htab_shift); + + newsize = 1ULL << pending->shift; + rc = rehash_hpt(cpu, spapr->htab, HTAB_SIZE(spapr), + pending->hpt, newsize); + if (rc == H_SUCCESS) { + qemu_vfree(spapr->htab); + spapr->htab = pending->hpt; + spapr->htab_shift = pending->shift; + + if (kvm_enabled()) { + /* For KVM PR, update the HPT pointer */ + target_ulong sdr1 = (target_ulong)(uintptr_t)spapr->htab + | (spapr->htab_shift - 18); + kvmppc_update_sdr1(sdr1); + } + + pending->hpt = NULL; /* so it's not free()d */ + } + + /* Clean up */ + spapr->pending_hpt = NULL; + free_pending_hpt(pending); + + return rc; +} + static target_ulong h_set_sprg0(PowerPCCPU *cpu, sPAPRMachineState *spapr, target_ulong opcode, target_ulong *args) { @@ -1133,6 +1529,45 @@ static target_ulong h_client_architecture_support(PowerPCCPU *cpu, guest_radix = spapr_ovec_test(ov5_guest, OV5_MMU_RADIX_300); spapr_ovec_clear(ov5_guest, OV5_MMU_RADIX_300); + /* + * HPT resizing is a bit of a special case, because when enabled + * we assume an HPT guest will support it until it says it + * doesn't, instead of assuming it won't support it until it says + * it does. Strictly speaking that approach could break for + * guests which don't make a CAS call, but those are so old we + * don't care about them. Without that assumption we'd have to + * make at least a temporary allocation of an HPT sized for max + * memory, which could be impossibly difficult under KVM HV if + * maxram is large. + */ + if (!guest_radix && !spapr_ovec_test(ov5_guest, OV5_HPT_RESIZE)) { + int maxshift = spapr_hpt_shift_for_ramsize(MACHINE(spapr)->maxram_size); + + if (spapr->resize_hpt == SPAPR_RESIZE_HPT_REQUIRED) { + error_report( + "h_client_architecture_support: Guest doesn't support HPT resizing, but resize-hpt=required"); + exit(1); + } + + if (spapr->htab_shift < maxshift) { + CPUState *cs; + + /* Guest doesn't know about HPT resizing, so we + * pre-emptively resize for the maximum permitted RAM. At + * the point this is called, nothing should have been + * entered into the existing HPT */ + spapr_reallocate_hpt(spapr, maxshift, &error_fatal); + CPU_FOREACH(cs) { + if (kvm_enabled()) { + /* For KVM PR, update the HPT pointer */ + target_ulong sdr1 = (target_ulong)(uintptr_t)spapr->htab + | (spapr->htab_shift - 18); + kvmppc_update_sdr1(sdr1); + } + } + } + } + /* NOTE: there are actually a number of ov5 bits where input from the * guest is always zero, and the platform/QEMU enables them independently * of guest input. To model these properly we'd want some sort of mask, @@ -1246,6 +1681,10 @@ static void hypercall_register_types(void) /* hcall-bulk */ spapr_register_hypercall(H_BULK_REMOVE, h_bulk_remove); + /* hcall-hpt-resize */ + spapr_register_hypercall(H_RESIZE_HPT_PREPARE, h_resize_hpt_prepare); + spapr_register_hypercall(H_RESIZE_HPT_COMMIT, h_resize_hpt_commit); + /* hcall-splpar */ spapr_register_hypercall(H_REGISTER_VPA, h_register_vpa); spapr_register_hypercall(H_CEDE, h_cede); diff --git a/hw/ppc/spapr_pci.c b/hw/ppc/spapr_pci.c index a52dcf8ec0..6ecdf29d28 100644 --- a/hw/ppc/spapr_pci.c +++ b/hw/ppc/spapr_pci.c @@ -1443,7 +1443,9 @@ static void spapr_pci_plug(HotplugHandler *plug_handler, /* If this is function 0, signal hotplug for all the device functions. * Otherwise defer sending the hotplug event. */ - if (plugged_dev->hotplugged && PCI_FUNC(pdev->devfn) == 0) { + if (!spapr_drc_hotplugged(plugged_dev)) { + spapr_drc_reset(drc); + } else if (PCI_FUNC(pdev->devfn) == 0) { int i; for (i = 0; i < 8; i++) { @@ -1474,9 +1476,7 @@ static void spapr_pci_unplug_request(HotplugHandler *plug_handler, { sPAPRPHBState *phb = SPAPR_PCI_HOST_BRIDGE(DEVICE(plug_handler)); PCIDevice *pdev = PCI_DEVICE(plugged_dev); - sPAPRDRConnectorClass *drck; sPAPRDRConnector *drc = spapr_phb_get_pci_drc(phb, pdev); - Error *local_err = NULL; if (!phb->dr_enabled) { error_setg(errp, QERR_BUS_NO_HOTPLUG, @@ -1487,8 +1487,7 @@ static void spapr_pci_unplug_request(HotplugHandler *plug_handler, g_assert(drc); g_assert(drc->dev == plugged_dev); - drck = SPAPR_DR_CONNECTOR_GET_CLASS(drc); - if (!drck->release_pending(drc)) { + if (!spapr_drc_unplug_requested(drc)) { PCIBus *bus = PCI_BUS(qdev_get_parent_bus(DEVICE(pdev))); uint32_t slotnr = PCI_SLOT(pdev->devfn); sPAPRDRConnector *func_drc; @@ -1504,7 +1503,7 @@ static void spapr_pci_unplug_request(HotplugHandler *plug_handler, func_drck = SPAPR_DR_CONNECTOR_GET_CLASS(func_drc); state = func_drck->dr_entity_sense(func_drc); if (state == SPAPR_DR_ENTITY_SENSE_PRESENT - && !func_drck->release_pending(func_drc)) { + && !spapr_drc_unplug_requested(func_drc)) { error_setg(errp, "PCI: slot %d, function %d still present. " "Must unplug all non-0 functions first.", @@ -1514,11 +1513,7 @@ static void spapr_pci_unplug_request(HotplugHandler *plug_handler, } } - spapr_drc_detach(drc, DEVICE(pdev), &local_err); - if (local_err) { - error_propagate(errp, local_err); - return; - } + spapr_drc_detach(drc); /* if this isn't func 0, defer unplug event. otherwise signal removal * for all present functions diff --git a/hw/ppc/trace-events b/hw/ppc/trace-events index 3e8e3cffde..0f7d9be4ef 100644 --- a/hw/ppc/trace-events +++ b/hw/ppc/trace-events @@ -16,6 +16,8 @@ spapr_cas_continue(unsigned long n) "Copy changes to the guest: %ld bytes" # hw/ppc/spapr_hcall.c spapr_cas_pvr_try(uint32_t pvr) "%x" spapr_cas_pvr(uint32_t cur_pvr, bool explicit_match, uint32_t new_pvr) "current=%x, explicit_match=%u, new=%x" +spapr_h_resize_hpt_prepare(uint64_t flags, uint64_t shift) "flags=0x%"PRIx64", shift=%"PRIu64 +spapr_h_resize_hpt_commit(uint64_t flags, uint64_t shift) "flags=0x%"PRIx64", shift=%"PRIu64 # hw/ppc/spapr_iommu.c spapr_iommu_put(uint64_t liobn, uint64_t ioba, uint64_t tce, uint64_t ret) "liobn=%"PRIx64" ioba=0x%"PRIx64" tce=0x%"PRIx64" ret=%"PRId64 @@ -46,8 +48,7 @@ spapr_drc_set_configured(uint32_t index) "drc: 0x%"PRIx32 spapr_drc_set_configured_skipping(uint32_t index) "drc: 0x%"PRIx32", isolated device" spapr_drc_attach(uint32_t index) "drc: 0x%"PRIx32 spapr_drc_detach(uint32_t index) "drc: 0x%"PRIx32 -spapr_drc_awaiting_isolated(uint32_t index) "drc: 0x%"PRIx32 -spapr_drc_awaiting_unusable(uint32_t index) "drc: 0x%"PRIx32 +spapr_drc_awaiting_quiesce(uint32_t index) "drc: 0x%"PRIx32 spapr_drc_awaiting_allocation(uint32_t index) "drc: 0x%"PRIx32 spapr_drc_reset(uint32_t index) "drc: 0x%"PRIx32 spapr_drc_realize(uint32_t index) "drc: 0x%"PRIx32 |