aboutsummaryrefslogtreecommitdiff
path: root/hw
diff options
context:
space:
mode:
authorDaniel Henrique Barboza <danielhb413@gmail.com>2021-03-31 21:04:36 -0300
committerDavid Gibson <david@gibson.dropbear.id.au>2021-04-12 12:27:14 +1000
commitd522cb52e604c8448674d90dae09ad50223b5d3c (patch)
treec63481a9bb958697c0d26d6ac4ba3fdd3107e7e3 /hw
parent555249a59e9cdd6b58da103aba5cf3a2d45c899f (diff)
spapr: rollback 'unplug timeout' for CPU hotunplugs
The pseries machines introduced the concept of 'unplug timeout' for CPU hotunplugs. The idea was to circunvent a deficiency in the pSeries specification (PAPR), that currently does not define a proper way for the hotunplug to fail. If the guest refuses to release the CPU (see [1] for an example) there is no way for QEMU to detect the failure. Further discussions about how to send a QAPI event to inform about the hotunplug timeout [2] exposed problems that weren't predicted back when the idea was developed. Other QEMU machines don't have any type of hotunplug timeout mechanism for any device, e.g. ACPI based machines have a way to make hotunplug errors visible to the hypervisor. This would make this timeout mechanism exclusive to pSeries, which is not ideal. The real problem is that a QAPI event that reports hotunplug timeouts puts the management layer (namely Libvirt) in a weird spot. We're not telling that the hotunplug failed, because we can't be 100% sure of that, and yet we're resetting the unplug state back, preventing any DEVICE_DEL events to reach out in case the guest decides to release the device. Libvirt would need to inspect the guest itself to see if the device was released or not, otherwise the internal domain states will be inconsistent. Moreover, Libvirt already has an 'unplug timeout' concept, and a QEMU side timeout would need to be juggled together with the existing Libvirt timeout. All this considered, this solution ended up creating more trouble than it solved. This patch reverts the 3 commits that introduced the timeout mechanism for CPU hotplugs in pSeries machines. This reverts commit 4515a5f786024fabf0bef4cf3d28adf5647e6e82 "qemu_timer.c: add timer_deadline_ms() helper" This reverts commit d1c2e3ce3d5a5424651967bce1cf1f4caa0c6d91 "spapr_drc.c: add hotunplug timeout for CPUs" This reverts commit 51254ffb320183a4636635840c23ee0e3a1efffa "spapr_drc.c: introduce unplug_timeout_timer" [1] https://bugzilla.redhat.com/show_bug.cgi?id=1911414 [2] https://lists.gnu.org/archive/html/qemu-devel/2021-03/msg04682.html CC: Paolo Bonzini <pbonzini@redhat.com> Signed-off-by: Daniel Henrique Barboza <danielhb413@gmail.com> Message-Id: <20210401000437.131140-2-danielhb413@gmail.com> Signed-off-by: David Gibson <david@gibson.dropbear.id.au>
Diffstat (limited to 'hw')
-rw-r--r--hw/ppc/spapr.c4
-rw-r--r--hw/ppc/spapr_drc.c52
2 files changed, 0 insertions, 56 deletions
diff --git a/hw/ppc/spapr.c b/hw/ppc/spapr.c
index 73a06df3b1..05a765fab4 100644
--- a/hw/ppc/spapr.c
+++ b/hw/ppc/spapr.c
@@ -3778,10 +3778,6 @@ void spapr_core_unplug_request(HotplugHandler *hotplug_dev, DeviceState *dev,
if (!spapr_drc_unplug_requested(drc)) {
spapr_drc_unplug_request(drc);
spapr_hotplug_req_remove_by_index(drc);
- } else {
- error_setg(errp, "core-id %d unplug is still pending, %d seconds "
- "timeout remaining",
- cc->core_id, spapr_drc_unplug_timeout_remaining_sec(drc));
}
}
diff --git a/hw/ppc/spapr_drc.c b/hw/ppc/spapr_drc.c
index 8a71b03800..9e16505fa1 100644
--- a/hw/ppc/spapr_drc.c
+++ b/hw/ppc/spapr_drc.c
@@ -57,8 +57,6 @@ static void spapr_drc_release(SpaprDrc *drc)
drck->release(drc->dev);
drc->unplug_requested = false;
- timer_del(drc->unplug_timeout_timer);
-
g_free(drc->fdt);
drc->fdt = NULL;
drc->fdt_start_offset = 0;
@@ -372,17 +370,6 @@ static void prop_get_fdt(Object *obj, Visitor *v, const char *name,
} while (fdt_depth != 0);
}
-static void spapr_drc_start_unplug_timeout_timer(SpaprDrc *drc)
-{
- SpaprDrcClass *drck = SPAPR_DR_CONNECTOR_GET_CLASS(drc);
-
- if (drck->unplug_timeout_seconds != 0) {
- timer_mod(drc->unplug_timeout_timer,
- qemu_clock_get_ms(QEMU_CLOCK_VIRTUAL) +
- drck->unplug_timeout_seconds * 1000);
- }
-}
-
void spapr_drc_attach(SpaprDrc *drc, DeviceState *d)
{
trace_spapr_drc_attach(spapr_drc_index(drc));
@@ -409,8 +396,6 @@ void spapr_drc_unplug_request(SpaprDrc *drc)
drc->unplug_requested = true;
- spapr_drc_start_unplug_timeout_timer(drc);
-
if (drc->state != drck->empty_state) {
trace_spapr_drc_awaiting_quiesce(spapr_drc_index(drc));
return;
@@ -419,15 +404,6 @@ void spapr_drc_unplug_request(SpaprDrc *drc)
spapr_drc_release(drc);
}
-int spapr_drc_unplug_timeout_remaining_sec(SpaprDrc *drc)
-{
- if (drc->unplug_requested) {
- return timer_deadline_ms(drc->unplug_timeout_timer) / 1000;
- }
-
- return 0;
-}
-
bool spapr_drc_reset(SpaprDrc *drc)
{
SpaprDrcClass *drck = SPAPR_DR_CONNECTOR_GET_CLASS(drc);
@@ -499,23 +475,11 @@ static bool spapr_drc_needed(void *opaque)
spapr_drc_unplug_requested(drc);
}
-static int spapr_drc_post_load(void *opaque, int version_id)
-{
- SpaprDrc *drc = opaque;
-
- if (drc->unplug_requested) {
- spapr_drc_start_unplug_timeout_timer(drc);
- }
-
- return 0;
-}
-
static const VMStateDescription vmstate_spapr_drc = {
.name = "spapr_drc",
.version_id = 1,
.minimum_version_id = 1,
.needed = spapr_drc_needed,
- .post_load = spapr_drc_post_load,
.fields = (VMStateField []) {
VMSTATE_UINT32(state, SpaprDrc),
VMSTATE_END_OF_LIST()
@@ -526,15 +490,6 @@ static const VMStateDescription vmstate_spapr_drc = {
}
};
-static void drc_unplug_timeout_cb(void *opaque)
-{
- SpaprDrc *drc = opaque;
-
- if (drc->unplug_requested) {
- drc->unplug_requested = false;
- }
-}
-
static void drc_realize(DeviceState *d, Error **errp)
{
SpaprDrc *drc = SPAPR_DR_CONNECTOR(d);
@@ -557,11 +512,6 @@ static void drc_realize(DeviceState *d, Error **errp)
object_property_add_alias(root_container, link_name,
drc->owner, child_name);
g_free(link_name);
-
- drc->unplug_timeout_timer = timer_new_ms(QEMU_CLOCK_VIRTUAL,
- drc_unplug_timeout_cb,
- drc);
-
vmstate_register(VMSTATE_IF(drc), spapr_drc_index(drc), &vmstate_spapr_drc,
drc);
trace_spapr_drc_realize_complete(spapr_drc_index(drc));
@@ -579,7 +529,6 @@ static void drc_unrealize(DeviceState *d)
name = g_strdup_printf("%x", spapr_drc_index(drc));
object_property_del(root_container, name);
g_free(name);
- timer_free(drc->unplug_timeout_timer);
}
SpaprDrc *spapr_dr_connector_new(Object *owner, const char *type,
@@ -721,7 +670,6 @@ static void spapr_drc_cpu_class_init(ObjectClass *k, void *data)
drck->drc_name_prefix = "CPU ";
drck->release = spapr_core_release;
drck->dt_populate = spapr_core_dt_populate;
- drck->unplug_timeout_seconds = 15;
}
static void spapr_drc_pci_class_init(ObjectClass *k, void *data)