aboutsummaryrefslogtreecommitdiff
path: root/hw
diff options
context:
space:
mode:
Diffstat (limited to 'hw')
-rw-r--r--hw/hyperv/vmbus.c20
-rw-r--r--hw/ppc/Kconfig5
-rw-r--r--hw/ppc/meson.build3
-rw-r--r--hw/ppc/pegasos2.c789
-rw-r--r--hw/ppc/spapr.c77
-rw-r--r--hw/ppc/spapr_caps.c41
-rw-r--r--hw/ppc/spapr_hcall.c24
-rw-r--r--hw/ppc/spapr_vof.c167
-rw-r--r--hw/ppc/trace-events24
-rw-r--r--hw/ppc/vof.c1053
-rw-r--r--hw/vfio/common.c315
-rw-r--r--hw/virtio/vhost-user.c3
-rw-r--r--hw/virtio/virtio-mem.c391
13 files changed, 2813 insertions, 99 deletions
diff --git a/hw/hyperv/vmbus.c b/hw/hyperv/vmbus.c
index 984caf898d..c9887d5a7b 100644
--- a/hw/hyperv/vmbus.c
+++ b/hw/hyperv/vmbus.c
@@ -2372,6 +2372,14 @@ static void vmbus_dev_realize(DeviceState *dev, Error **errp)
assert(!qemu_uuid_is_null(&vdev->instanceid));
+ if (!qemu_uuid_is_null(&vdc->instanceid)) {
+ /* Class wants to only have a single instance with a fixed UUID */
+ if (!qemu_uuid_is_equal(&vdev->instanceid, &vdc->instanceid)) {
+ error_setg(&err, "instance id can't be changed");
+ goto error_out;
+ }
+ }
+
/* Check for instance id collision for this class id */
QTAILQ_FOREACH(child, &BUS(vmbus)->children, sibling) {
VMBusDevice *child_dev = VMBUS_DEVICE(child->child);
@@ -2438,18 +2446,22 @@ static void vmbus_dev_unrealize(DeviceState *dev)
free_channels(vdev);
}
+static Property vmbus_dev_props[] = {
+ DEFINE_PROP_UUID("instanceid", VMBusDevice, instanceid),
+ DEFINE_PROP_END_OF_LIST()
+};
+
+
static void vmbus_dev_class_init(ObjectClass *klass, void *data)
{
DeviceClass *kdev = DEVICE_CLASS(klass);
+ device_class_set_props(kdev, vmbus_dev_props);
kdev->bus_type = TYPE_VMBUS;
kdev->realize = vmbus_dev_realize;
kdev->unrealize = vmbus_dev_unrealize;
kdev->reset = vmbus_dev_reset;
}
-static Property vmbus_dev_instanceid =
- DEFINE_PROP_UUID("instanceid", VMBusDevice, instanceid);
-
static void vmbus_dev_instance_init(Object *obj)
{
VMBusDevice *vdev = VMBUS_DEVICE(obj);
@@ -2458,8 +2470,6 @@ static void vmbus_dev_instance_init(Object *obj)
if (!qemu_uuid_is_null(&vdc->instanceid)) {
/* Class wants to only have a single instance with a fixed UUID */
vdev->instanceid = vdc->instanceid;
- } else {
- qdev_property_add_static(DEVICE(vdev), &vmbus_dev_instanceid);
}
}
diff --git a/hw/ppc/Kconfig b/hw/ppc/Kconfig
index 66e0b15d9e..7fcafec60a 100644
--- a/hw/ppc/Kconfig
+++ b/hw/ppc/Kconfig
@@ -13,6 +13,7 @@ config PSERIES
select MSI_NONBROKEN
select FDT_PPC
select CHRP_NVRAM
+ select VOF
config SPAPR_RNG
bool
@@ -75,6 +76,7 @@ config PEGASOS2
select VT82C686
select IDE_VIA
select SMBUS_EEPROM
+ select VOF
# This should come with VT82C686
select ACPI_X86
@@ -144,3 +146,6 @@ config FW_CFG_PPC
config FDT_PPC
bool
+
+config VOF
+ bool
diff --git a/hw/ppc/meson.build b/hw/ppc/meson.build
index 597d974dd4..aa4c8e6a2e 100644
--- a/hw/ppc/meson.build
+++ b/hw/ppc/meson.build
@@ -84,4 +84,7 @@ ppc_ss.add(when: 'CONFIG_VIRTEX', if_true: files('virtex_ml507.c'))
# Pegasos2
ppc_ss.add(when: 'CONFIG_PEGASOS2', if_true: files('pegasos2.c'))
+ppc_ss.add(when: 'CONFIG_VOF', if_true: files('vof.c'))
+ppc_ss.add(when: ['CONFIG_VOF', 'CONFIG_PSERIES'], if_true: files('spapr_vof.c'))
+
hw_arch += {'ppc': ppc_ss}
diff --git a/hw/ppc/pegasos2.c b/hw/ppc/pegasos2.c
index 0bfd0928aa..9a6ae867e4 100644
--- a/hw/ppc/pegasos2.c
+++ b/hw/ppc/pegasos2.c
@@ -1,7 +1,7 @@
/*
* QEMU PowerPC CHRP (Genesi/bPlan Pegasos II) hardware System Emulator
*
- * Copyright (c) 2018-2020 BALATON Zoltan
+ * Copyright (c) 2018-2021 BALATON Zoltan
*
* This work is licensed under the GNU GPL license version 2 or later.
*
@@ -34,26 +34,68 @@
#include "trace.h"
#include "qemu/datadir.h"
#include "sysemu/device_tree.h"
+#include "hw/ppc/vof.h"
-#define PROM_FILENAME "pegasos2.rom"
+#include <libfdt.h>
+
+#define PROM_FILENAME "vof.bin"
#define PROM_ADDR 0xfff00000
#define PROM_SIZE 0x80000
+#define KVMPPC_HCALL_BASE 0xf000
+#define KVMPPC_H_RTAS (KVMPPC_HCALL_BASE + 0x0)
+#define KVMPPC_H_VOF_CLIENT (KVMPPC_HCALL_BASE + 0x5)
+
+#define H_SUCCESS 0
+#define H_PRIVILEGE -3 /* Caller not privileged */
+#define H_PARAMETER -4 /* Parameter invalid, out-of-range or conflicting */
+
#define BUS_FREQ_HZ 133333333
+#define PCI0_MEM_BASE 0xc0000000
+#define PCI0_MEM_SIZE 0x20000000
+#define PCI0_IO_BASE 0xf8000000
+#define PCI0_IO_SIZE 0x10000
+
+#define PCI1_MEM_BASE 0x80000000
+#define PCI1_MEM_SIZE 0x40000000
+#define PCI1_IO_BASE 0xfe000000
+#define PCI1_IO_SIZE 0x10000
+
+#define TYPE_PEGASOS2_MACHINE MACHINE_TYPE_NAME("pegasos2")
+OBJECT_DECLARE_TYPE(Pegasos2MachineState, MachineClass, PEGASOS2_MACHINE)
+
+struct Pegasos2MachineState {
+ MachineState parent_obj;
+ PowerPCCPU *cpu;
+ DeviceState *mv;
+ Vof *vof;
+ void *fdt_blob;
+ uint64_t kernel_addr;
+ uint64_t kernel_entry;
+ uint64_t kernel_size;
+};
+
+static void *build_fdt(MachineState *machine, int *fdt_size);
+
static void pegasos2_cpu_reset(void *opaque)
{
PowerPCCPU *cpu = opaque;
+ Pegasos2MachineState *pm = PEGASOS2_MACHINE(current_machine);
cpu_reset(CPU(cpu));
cpu->env.spr[SPR_HID1] = 7ULL << 28;
+ if (pm->vof) {
+ cpu->env.gpr[1] = 2 * VOF_STACK_SIZE - 0x20;
+ cpu->env.nip = 0x100;
+ }
}
static void pegasos2_init(MachineState *machine)
{
- PowerPCCPU *cpu = NULL;
+ Pegasos2MachineState *pm = PEGASOS2_MACHINE(machine);
+ CPUPPCState *env;
MemoryRegion *rom = g_new(MemoryRegion, 1);
- DeviceState *mv;
PCIBus *pci_bus;
PCIDevice *dev;
I2CBus *i2c_bus;
@@ -63,15 +105,16 @@ static void pegasos2_init(MachineState *machine)
uint8_t *spd_data;
/* init CPU */
- cpu = POWERPC_CPU(cpu_create(machine->cpu_type));
- if (PPC_INPUT(&cpu->env) != PPC_FLAGS_INPUT_6xx) {
+ pm->cpu = POWERPC_CPU(cpu_create(machine->cpu_type));
+ env = &pm->cpu->env;
+ if (PPC_INPUT(env) != PPC_FLAGS_INPUT_6xx) {
error_report("Incompatible CPU, only 6xx bus supported");
exit(1);
}
/* Set time-base frequency */
- cpu_ppc_tb_init(&cpu->env, BUS_FREQ_HZ / 4);
- qemu_register_reset(pegasos2_cpu_reset, cpu);
+ cpu_ppc_tb_init(env, BUS_FREQ_HZ / 4);
+ qemu_register_reset(pegasos2_cpu_reset, pm->cpu);
/* RAM */
memory_region_add_subregion(get_system_memory(), 0, machine->ram);
@@ -82,30 +125,36 @@ static void pegasos2_init(MachineState *machine)
error_report("Could not find firmware '%s'", fwname);
exit(1);
}
+ if (!machine->firmware && !pm->vof) {
+ pm->vof = g_malloc0(sizeof(*pm->vof));
+ }
memory_region_init_rom(rom, NULL, "pegasos2.rom", PROM_SIZE, &error_fatal);
memory_region_add_subregion(get_system_memory(), PROM_ADDR, rom);
sz = load_elf(filename, NULL, NULL, NULL, NULL, NULL, NULL, NULL, 1,
PPC_ELF_MACHINE, 0, 0);
if (sz <= 0) {
- sz = load_image_targphys(filename, PROM_ADDR, PROM_SIZE);
+ sz = load_image_targphys(filename, pm->vof ? 0 : PROM_ADDR, PROM_SIZE);
}
if (sz <= 0 || sz > PROM_SIZE) {
error_report("Could not load firmware '%s'", filename);
exit(1);
}
g_free(filename);
+ if (pm->vof) {
+ pm->vof->fw_size = sz;
+ }
/* Marvell Discovery II system controller */
- mv = DEVICE(sysbus_create_simple(TYPE_MV64361, -1,
- ((qemu_irq *)cpu->env.irq_inputs)[PPC6xx_INPUT_INT]));
- pci_bus = mv64361_get_pci_bus(mv, 1);
+ pm->mv = DEVICE(sysbus_create_simple(TYPE_MV64361, -1,
+ ((qemu_irq *)env->irq_inputs)[PPC6xx_INPUT_INT]));
+ pci_bus = mv64361_get_pci_bus(pm->mv, 1);
/* VIA VT8231 South Bridge (multifunction PCI device) */
/* VT8231 function 0: PCI-to-ISA Bridge */
dev = pci_create_simple_multifunction(pci_bus, PCI_DEVFN(12, 0), true,
TYPE_VT8231_ISA);
qdev_connect_gpio_out(DEVICE(dev), 0,
- qdev_get_gpio_in_named(mv, "gpp", 31));
+ qdev_get_gpio_in_named(pm->mv, "gpp", 31));
/* VT8231 function 1: IDE Controller */
dev = pci_create_simple(pci_bus, PCI_DEVFN(12, 1), "via-ide");
@@ -127,18 +176,728 @@ static void pegasos2_init(MachineState *machine)
/* other PC hardware */
pci_vga_init(pci_bus);
+
+ if (machine->kernel_filename) {
+ sz = load_elf(machine->kernel_filename, NULL, NULL, NULL,
+ &pm->kernel_entry, &pm->kernel_addr, NULL, NULL, 1,
+ PPC_ELF_MACHINE, 0, 0);
+ if (sz <= 0) {
+ error_report("Could not load kernel '%s'",
+ machine->kernel_filename);
+ exit(1);
+ }
+ pm->kernel_size = sz;
+ if (!pm->vof) {
+ warn_report("Option -kernel may be ineffective with -bios.");
+ }
+ }
+ if (machine->kernel_cmdline && !pm->vof) {
+ warn_report("Option -append may be ineffective with -bios.");
+ }
+}
+
+static uint32_t pegasos2_pci_config_read(AddressSpace *as, int bus,
+ uint32_t addr, uint32_t len)
+{
+ hwaddr pcicfg = (bus ? 0xf1000c78 : 0xf1000cf8);
+ uint32_t val = 0xffffffff;
+
+ stl_le_phys(as, pcicfg, addr | BIT(31));
+ switch (len) {
+ case 4:
+ val = ldl_le_phys(as, pcicfg + 4);
+ break;
+ case 2:
+ val = lduw_le_phys(as, pcicfg + 4);
+ break;
+ case 1:
+ val = ldub_phys(as, pcicfg + 4);
+ break;
+ default:
+ qemu_log_mask(LOG_GUEST_ERROR, "%s: invalid length\n", __func__);
+ break;
+ }
+ return val;
+}
+
+static void pegasos2_pci_config_write(AddressSpace *as, int bus, uint32_t addr,
+ uint32_t len, uint32_t val)
+{
+ hwaddr pcicfg = (bus ? 0xf1000c78 : 0xf1000cf8);
+
+ stl_le_phys(as, pcicfg, addr | BIT(31));
+ switch (len) {
+ case 4:
+ stl_le_phys(as, pcicfg + 4, val);
+ break;
+ case 2:
+ stw_le_phys(as, pcicfg + 4, val);
+ break;
+ case 1:
+ stb_phys(as, pcicfg + 4, val);
+ break;
+ default:
+ qemu_log_mask(LOG_GUEST_ERROR, "%s: invalid length\n", __func__);
+ break;
+ }
+}
+
+static void pegasos2_machine_reset(MachineState *machine)
+{
+ Pegasos2MachineState *pm = PEGASOS2_MACHINE(machine);
+ AddressSpace *as = CPU(pm->cpu)->as;
+ void *fdt;
+ uint64_t d[2];
+ int sz;
+
+ qemu_devices_reset();
+ if (!pm->vof) {
+ return; /* Firmware should set up machine so nothing to do */
+ }
+
+ /* Otherwise, set up devices that board firmware would normally do */
+ stl_le_phys(as, 0xf1000000, 0x28020ff);
+ stl_le_phys(as, 0xf1000278, 0xa31fc);
+ stl_le_phys(as, 0xf100f300, 0x11ff0400);
+ stl_le_phys(as, 0xf100f10c, 0x80000000);
+ stl_le_phys(as, 0xf100001c, 0x8000000);
+ pegasos2_pci_config_write(as, 0, PCI_COMMAND, 2, PCI_COMMAND_IO |
+ PCI_COMMAND_MEMORY | PCI_COMMAND_MASTER);
+ pegasos2_pci_config_write(as, 1, PCI_COMMAND, 2, PCI_COMMAND_IO |
+ PCI_COMMAND_MEMORY | PCI_COMMAND_MASTER);
+
+ pegasos2_pci_config_write(as, 1, (PCI_DEVFN(12, 0) << 8) |
+ PCI_INTERRUPT_LINE, 2, 0x9);
+ pegasos2_pci_config_write(as, 1, (PCI_DEVFN(12, 0) << 8) |
+ 0x50, 1, 0x2);
+
+ pegasos2_pci_config_write(as, 1, (PCI_DEVFN(12, 1) << 8) |
+ PCI_INTERRUPT_LINE, 2, 0x109);
+ pegasos2_pci_config_write(as, 1, (PCI_DEVFN(12, 1) << 8) |
+ PCI_CLASS_PROG, 1, 0xf);
+ pegasos2_pci_config_write(as, 1, (PCI_DEVFN(12, 1) << 8) |
+ 0x40, 1, 0xb);
+ pegasos2_pci_config_write(as, 1, (PCI_DEVFN(12, 1) << 8) |
+ 0x50, 4, 0x17171717);
+ pegasos2_pci_config_write(as, 1, (PCI_DEVFN(12, 1) << 8) |
+ PCI_COMMAND, 2, 0x87);
+
+ pegasos2_pci_config_write(as, 1, (PCI_DEVFN(12, 2) << 8) |
+ PCI_INTERRUPT_LINE, 2, 0x409);
+
+ pegasos2_pci_config_write(as, 1, (PCI_DEVFN(12, 3) << 8) |
+ PCI_INTERRUPT_LINE, 2, 0x409);
+
+ pegasos2_pci_config_write(as, 1, (PCI_DEVFN(12, 4) << 8) |
+ PCI_INTERRUPT_LINE, 2, 0x9);
+ pegasos2_pci_config_write(as, 1, (PCI_DEVFN(12, 4) << 8) |
+ 0x48, 4, 0xf00);
+ pegasos2_pci_config_write(as, 1, (PCI_DEVFN(12, 4) << 8) |
+ 0x40, 4, 0x558020);
+ pegasos2_pci_config_write(as, 1, (PCI_DEVFN(12, 4) << 8) |
+ 0x90, 4, 0xd00);
+
+ pegasos2_pci_config_write(as, 1, (PCI_DEVFN(12, 5) << 8) |
+ PCI_INTERRUPT_LINE, 2, 0x309);
+
+ pegasos2_pci_config_write(as, 1, (PCI_DEVFN(12, 6) << 8) |
+ PCI_INTERRUPT_LINE, 2, 0x309);
+
+ /* Device tree and VOF set up */
+ vof_init(pm->vof, machine->ram_size, &error_fatal);
+ if (vof_claim(pm->vof, 0, VOF_STACK_SIZE, VOF_STACK_SIZE) == -1) {
+ error_report("Memory allocation for stack failed");
+ exit(1);
+ }
+ if (pm->kernel_size &&
+ vof_claim(pm->vof, pm->kernel_addr, pm->kernel_size, 0) == -1) {
+ error_report("Memory for kernel is in use");
+ exit(1);
+ }
+ fdt = build_fdt(machine, &sz);
+ /* FIXME: VOF assumes entry is same as load address */
+ d[0] = cpu_to_be64(pm->kernel_entry);
+ d[1] = cpu_to_be64(pm->kernel_size - (pm->kernel_entry - pm->kernel_addr));
+ qemu_fdt_setprop(fdt, "/chosen", "qemu,boot-kernel", d, sizeof(d));
+
+ qemu_fdt_dumpdtb(fdt, fdt_totalsize(fdt));
+ g_free(pm->fdt_blob);
+ pm->fdt_blob = fdt;
+
+ vof_build_dt(fdt, pm->vof);
+ vof_client_open_store(fdt, pm->vof, "/chosen", "stdout", "/failsafe");
+ pm->cpu->vhyp = PPC_VIRTUAL_HYPERVISOR(machine);
}
-static void pegasos2_machine(MachineClass *mc)
+enum pegasos2_rtas_tokens {
+ RTAS_RESTART_RTAS = 0,
+ RTAS_NVRAM_FETCH = 1,
+ RTAS_NVRAM_STORE = 2,
+ RTAS_GET_TIME_OF_DAY = 3,
+ RTAS_SET_TIME_OF_DAY = 4,
+ RTAS_EVENT_SCAN = 6,
+ RTAS_CHECK_EXCEPTION = 7,
+ RTAS_READ_PCI_CONFIG = 8,
+ RTAS_WRITE_PCI_CONFIG = 9,
+ RTAS_DISPLAY_CHARACTER = 10,
+ RTAS_SET_INDICATOR = 11,
+ RTAS_POWER_OFF = 17,
+ RTAS_SUSPEND = 18,
+ RTAS_HIBERNATE = 19,
+ RTAS_SYSTEM_REBOOT = 20,
+};
+
+static target_ulong pegasos2_rtas(PowerPCCPU *cpu, Pegasos2MachineState *pm,
+ target_ulong args_real)
{
+ AddressSpace *as = CPU(cpu)->as;
+ uint32_t token = ldl_be_phys(as, args_real);
+ uint32_t nargs = ldl_be_phys(as, args_real + 4);
+ uint32_t nrets = ldl_be_phys(as, args_real + 8);
+ uint32_t args = args_real + 12;
+ uint32_t rets = args_real + 12 + nargs * 4;
+
+ if (nrets < 1) {
+ qemu_log_mask(LOG_GUEST_ERROR, "Too few return values in RTAS call\n");
+ return H_PARAMETER;
+ }
+ switch (token) {
+ case RTAS_READ_PCI_CONFIG:
+ {
+ uint32_t addr, len, val;
+
+ if (nargs != 2 || nrets != 2) {
+ stl_be_phys(as, rets, -1);
+ return H_PARAMETER;
+ }
+ addr = ldl_be_phys(as, args);
+ len = ldl_be_phys(as, args + 4);
+ val = pegasos2_pci_config_read(as, !(addr >> 24),
+ addr & 0x0fffffff, len);
+ stl_be_phys(as, rets, 0);
+ stl_be_phys(as, rets + 4, val);
+ return H_SUCCESS;
+ }
+ case RTAS_WRITE_PCI_CONFIG:
+ {
+ uint32_t addr, len, val;
+
+ if (nargs != 3 || nrets != 1) {
+ stl_be_phys(as, rets, -1);
+ return H_PARAMETER;
+ }
+ addr = ldl_be_phys(as, args);
+ len = ldl_be_phys(as, args + 4);
+ val = ldl_be_phys(as, args + 8);
+ pegasos2_pci_config_write(as, !(addr >> 24),
+ addr & 0x0fffffff, len, val);
+ stl_be_phys(as, rets, 0);
+ return H_SUCCESS;
+ }
+ case RTAS_DISPLAY_CHARACTER:
+ if (nargs != 1 || nrets != 1) {
+ stl_be_phys(as, rets, -1);
+ return H_PARAMETER;
+ }
+ qemu_log_mask(LOG_UNIMP, "%c", ldl_be_phys(as, args));
+ stl_be_phys(as, rets, 0);
+ return H_SUCCESS;
+ default:
+ qemu_log_mask(LOG_UNIMP, "Unknown RTAS token %u (args=%u, rets=%u)\n",
+ token, nargs, nrets);
+ stl_be_phys(as, rets, 0);
+ return H_SUCCESS;
+ }
+}
+
+static void pegasos2_hypercall(PPCVirtualHypervisor *vhyp, PowerPCCPU *cpu)
+{
+ Pegasos2MachineState *pm = PEGASOS2_MACHINE(vhyp);
+ CPUPPCState *env = &cpu->env;
+
+ /* The TCG path should also be holding the BQL at this point */
+ g_assert(qemu_mutex_iothread_locked());
+
+ if (msr_pr) {
+ qemu_log_mask(LOG_GUEST_ERROR, "Hypercall made with MSR[PR]=1\n");
+ env->gpr[3] = H_PRIVILEGE;
+ } else if (env->gpr[3] == KVMPPC_H_RTAS) {
+ env->gpr[3] = pegasos2_rtas(cpu, pm, env->gpr[4]);
+ } else if (env->gpr[3] == KVMPPC_H_VOF_CLIENT) {
+ int ret = vof_client_call(MACHINE(pm), pm->vof, pm->fdt_blob,
+ env->gpr[4]);
+ env->gpr[3] = (ret ? H_PARAMETER : H_SUCCESS);
+ } else {
+ qemu_log_mask(LOG_GUEST_ERROR, "Unsupported hypercall " TARGET_FMT_lx
+ "\n", env->gpr[3]);
+ env->gpr[3] = -1;
+ }
+}
+
+static void vhyp_nop(PPCVirtualHypervisor *vhyp, PowerPCCPU *cpu)
+{
+}
+
+static target_ulong vhyp_encode_hpt_for_kvm_pr(PPCVirtualHypervisor *vhyp)
+{
+ return POWERPC_CPU(current_cpu)->env.spr[SPR_SDR1];
+}
+
+static void pegasos2_machine_class_init(ObjectClass *oc, void *data)
+{
+ MachineClass *mc = MACHINE_CLASS(oc);
+ PPCVirtualHypervisorClass *vhc = PPC_VIRTUAL_HYPERVISOR_CLASS(oc);
+
mc->desc = "Genesi/bPlan Pegasos II";
mc->init = pegasos2_init;
+ mc->reset = pegasos2_machine_reset;
mc->block_default_type = IF_IDE;
mc->default_boot_order = "cd";
mc->default_display = "std";
mc->default_cpu_type = POWERPC_CPU_TYPE_NAME("7400_v2.9");
mc->default_ram_id = "pegasos2.ram";
mc->default_ram_size = 512 * MiB;
+
+ vhc->hypercall = pegasos2_hypercall;
+ vhc->cpu_exec_enter = vhyp_nop;
+ vhc->cpu_exec_exit = vhyp_nop;
+ vhc->encode_hpt_for_kvm_pr = vhyp_encode_hpt_for_kvm_pr;
+}
+
+static const TypeInfo pegasos2_machine_info = {
+ .name = TYPE_PEGASOS2_MACHINE,
+ .parent = TYPE_MACHINE,
+ .class_init = pegasos2_machine_class_init,
+ .instance_size = sizeof(Pegasos2MachineState),
+ .interfaces = (InterfaceInfo[]) {
+ { TYPE_PPC_VIRTUAL_HYPERVISOR },
+ { }
+ },
+};
+
+static void pegasos2_machine_register_types(void)
+{
+ type_register_static(&pegasos2_machine_info);
+}
+
+type_init(pegasos2_machine_register_types)
+
+/* FDT creation for passing to firmware */
+
+typedef struct {
+ void *fdt;
+ const char *path;
+} FDTInfo;
+
+/* We do everything in reverse order so it comes out right in the tree */
+
+static void dt_ide(PCIBus *bus, PCIDevice *d, FDTInfo *fi)
+{
+ qemu_fdt_setprop_string(fi->fdt, fi->path, "device_type", "spi");
}
-DEFINE_MACHINE("pegasos2", pegasos2_machine)
+static void dt_usb(PCIBus *bus, PCIDevice *d, FDTInfo *fi)
+{
+ qemu_fdt_setprop_cell(fi->fdt, fi->path, "#size-cells", 0);
+ qemu_fdt_setprop_cell(fi->fdt, fi->path, "#address-cells", 1);
+ qemu_fdt_setprop_string(fi->fdt, fi->path, "device_type", "usb");
+}
+
+static void dt_isa(PCIBus *bus, PCIDevice *d, FDTInfo *fi)
+{
+ GString *name = g_string_sized_new(64);
+ uint32_t cells[3];
+
+ qemu_fdt_setprop_cell(fi->fdt, fi->path, "#size-cells", 1);
+ qemu_fdt_setprop_cell(fi->fdt, fi->path, "#address-cells", 2);
+ qemu_fdt_setprop_string(fi->fdt, fi->path, "device_type", "isa");
+ qemu_fdt_setprop_string(fi->fdt, fi->path, "name", "isa");
+
+ /* addional devices */
+ g_string_printf(name, "%s/lpt@i3bc", fi->path);
+ qemu_fdt_add_subnode(fi->fdt, name->str);
+ qemu_fdt_setprop_cell(fi->fdt, name->str, "clock-frequency", 0);
+ cells[0] = cpu_to_be32(7);
+ cells[1] = 0;
+ qemu_fdt_setprop(fi->fdt, name->str, "interrupts",
+ cells, 2 * sizeof(cells[0]));
+ cells[0] = cpu_to_be32(1);
+ cells[1] = cpu_to_be32(0x3bc);
+ cells[2] = cpu_to_be32(8);
+ qemu_fdt_setprop(fi->fdt, name->str, "reg", cells, 3 * sizeof(cells[0]));
+ qemu_fdt_setprop_string(fi->fdt, name->str, "device_type", "lpt");
+ qemu_fdt_setprop_string(fi->fdt, name->str, "name", "lpt");
+
+ g_string_printf(name, "%s/fdc@i3f0", fi->path);
+ qemu_fdt_add_subnode(fi->fdt, name->str);
+ qemu_fdt_setprop_cell(fi->fdt, name->str, "clock-frequency", 0);
+ cells[0] = cpu_to_be32(6);
+ cells[1] = 0;
+ qemu_fdt_setprop(fi->fdt, name->str, "interrupts",
+ cells, 2 * sizeof(cells[0]));
+ cells[0] = cpu_to_be32(1);
+ cells[1] = cpu_to_be32(0x3f0);
+ cells[2] = cpu_to_be32(8);
+ qemu_fdt_setprop(fi->fdt, name->str, "reg", cells, 3 * sizeof(cells[0]));
+ qemu_fdt_setprop_string(fi->fdt, name->str, "device_type", "fdc");
+ qemu_fdt_setprop_string(fi->fdt, name->str, "name", "fdc");
+
+ g_string_printf(name, "%s/timer@i40", fi->path);
+ qemu_fdt_add_subnode(fi->fdt, name->str);
+ qemu_fdt_setprop_cell(fi->fdt, name->str, "clock-frequency", 0);
+ cells[0] = cpu_to_be32(1);
+ cells[1] = cpu_to_be32(0x40);
+ cells[2] = cpu_to_be32(8);
+ qemu_fdt_setprop(fi->fdt, name->str, "reg", cells, 3 * sizeof(cells[0]));
+ qemu_fdt_setprop_string(fi->fdt, name->str, "device_type", "timer");
+ qemu_fdt_setprop_string(fi->fdt, name->str, "name", "timer");
+
+ g_string_printf(name, "%s/rtc@i70", fi->path);
+ qemu_fdt_add_subnode(fi->fdt, name->str);
+ qemu_fdt_setprop_string(fi->fdt, name->str, "compatible", "ds1385-rtc");
+ qemu_fdt_setprop_cell(fi->fdt, name->str, "clock-frequency", 0);
+ cells[0] = cpu_to_be32(8);
+ cells[1] = 0;
+ qemu_fdt_setprop(fi->fdt, name->str, "interrupts",
+ cells, 2 * sizeof(cells[0]));
+ cells[0] = cpu_to_be32(1);
+ cells[1] = cpu_to_be32(0x70);
+ cells[2] = cpu_to_be32(2);
+ qemu_fdt_setprop(fi->fdt, name->str, "reg", cells, 3 * sizeof(cells[0]));
+ qemu_fdt_setprop_string(fi->fdt, name->str, "device_type", "rtc");
+ qemu_fdt_setprop_string(fi->fdt, name->str, "name", "rtc");
+
+ g_string_printf(name, "%s/keyboard@i60", fi->path);
+ qemu_fdt_add_subnode(fi->fdt, name->str);
+ cells[0] = cpu_to_be32(1);
+ cells[1] = 0;
+ qemu_fdt_setprop(fi->fdt, name->str, "interrupts",
+ cells, 2 * sizeof(cells[0]));
+ cells[0] = cpu_to_be32(1);
+ cells[1] = cpu_to_be32(0x60);
+ cells[2] = cpu_to_be32(5);
+ qemu_fdt_setprop(fi->fdt, name->str, "reg", cells, 3 * sizeof(cells[0]));
+ qemu_fdt_setprop_string(fi->fdt, name->str, "device_type", "keyboard");
+ qemu_fdt_setprop_string(fi->fdt, name->str, "name", "keyboard");
+
+ g_string_printf(name, "%s/8042@i60", fi->path);
+ qemu_fdt_add_subnode(fi->fdt, name->str);
+ qemu_fdt_setprop_cell(fi->fdt, name->str, "#interrupt-cells", 2);
+ qemu_fdt_setprop_cell(fi->fdt, name->str, "#size-cells", 0);
+ qemu_fdt_setprop_cell(fi->fdt, name->str, "#address-cells", 1);
+ qemu_fdt_setprop_string(fi->fdt, name->str, "interrupt-controller", "");
+ qemu_fdt_setprop_cell(fi->fdt, name->str, "clock-frequency", 0);
+ cells[0] = cpu_to_be32(1);
+ cells[1] = cpu_to_be32(0x60);
+ cells[2] = cpu_to_be32(5);
+ qemu_fdt_setprop(fi->fdt, name->str, "reg", cells, 3 * sizeof(cells[0]));
+ qemu_fdt_setprop_string(fi->fdt, name->str, "device_type", "");
+ qemu_fdt_setprop_string(fi->fdt, name->str, "name", "8042");
+
+ g_string_printf(name, "%s/serial@i2f8", fi->path);
+ qemu_fdt_add_subnode(fi->fdt, name->str);
+ qemu_fdt_setprop_cell(fi->fdt, name->str, "clock-frequency", 0);
+ cells[0] = cpu_to_be32(3);
+ cells[1] = 0;
+ qemu_fdt_setprop(fi->fdt, name->str, "interrupts",
+ cells, 2 * sizeof(cells[0]));
+ cells[0] = cpu_to_be32(1);
+ cells[1] = cpu_to_be32(0x2f8);
+ cells[2] = cpu_to_be32(8);
+ qemu_fdt_setprop(fi->fdt, name->str, "reg", cells, 3 * sizeof(cells[0]));
+ qemu_fdt_setprop_string(fi->fdt, name->str, "device_type", "serial");
+ qemu_fdt_setprop_string(fi->fdt, name->str, "name", "serial");
+
+ g_string_free(name, TRUE);
+}
+
+static struct {
+ const char *id;
+ const char *name;
+ void (*dtf)(PCIBus *bus, PCIDevice *d, FDTInfo *fi);
+} device_map[] = {
+ { "pci11ab,6460", "host", NULL },
+ { "pci1106,8231", "isa", dt_isa },
+ { "pci1106,571", "ide", dt_ide },
+ { "pci1106,3044", "firewire", NULL },
+ { "pci1106,3038", "usb", dt_usb },
+ { "pci1106,8235", "other", NULL },
+ { "pci1106,3058", "sound", NULL },
+ { NULL, NULL }
+};
+
+static void add_pci_device(PCIBus *bus, PCIDevice *d, void *opaque)
+{
+ FDTInfo *fi = opaque;
+ GString *node = g_string_new(NULL);
+ uint32_t cells[(PCI_NUM_REGIONS + 1) * 5];
+ int i, j;
+ const char *name = NULL;
+ g_autofree const gchar *pn = g_strdup_printf("pci%x,%x",
+ pci_get_word(&d->config[PCI_VENDOR_ID]),
+ pci_get_word(&d->config[PCI_DEVICE_ID]));
+
+ for (i = 0; device_map[i].id; i++) {
+ if (!strcmp(pn, device_map[i].id)) {
+ name = device_map[i].name;
+ break;
+ }
+ }
+ g_string_printf(node, "%s/%s@%x", fi->path, (name ?: pn),
+ PCI_SLOT(d->devfn));
+ if (PCI_FUNC(d->devfn)) {
+ g_string_append_printf(node, ",%x", PCI_FUNC(d->devfn));
+ }
+
+ qemu_fdt_add_subnode(fi->fdt, node->str);
+ if (device_map[i].dtf) {
+ FDTInfo cfi = { fi->fdt, node->str };
+ device_map[i].dtf(bus, d, &cfi);
+ }
+ cells[0] = cpu_to_be32(d->devfn << 8);
+ cells[1] = 0;
+ cells[2] = 0;
+ cells[3] = 0;
+ cells[4] = 0;
+ j = 5;
+ for (i = 0; i < PCI_NUM_REGIONS; i++) {
+ if (!d->io_regions[i].size) {
+ continue;
+ }
+ cells[j] = cpu_to_be32(d->devfn << 8 | (PCI_BASE_ADDRESS_0 + i * 4));
+ if (d->io_regions[i].type & PCI_BASE_ADDRESS_SPACE_IO) {
+ cells[j] |= cpu_to_be32(1 << 24);
+ } else {
+ cells[j] |= cpu_to_be32(2 << 24);
+ if (d->io_regions[i].type & PCI_BASE_ADDRESS_MEM_PREFETCH) {
+ cells[j] |= cpu_to_be32(4 << 28);
+ }
+ }
+ cells[j + 1] = 0;
+ cells[j + 2] = 0;
+ cells[j + 3] = cpu_to_be32(d->io_regions[i].size >> 32);
+ cells[j + 4] = cpu_to_be32(d->io_regions[i].size);
+ j += 5;
+ }
+ qemu_fdt_setprop(fi->fdt, node->str, "reg", cells, j * sizeof(cells[0]));
+ qemu_fdt_setprop_string(fi->fdt, node->str, "name", name ?: pn);
+ if (pci_get_byte(&d->config[PCI_INTERRUPT_PIN])) {
+ qemu_fdt_setprop_cell(fi->fdt, node->str, "interrupts",
+ pci_get_byte(&d->config[PCI_INTERRUPT_PIN]));
+ }
+ /* Pegasos2 firmware has subsystem-id amd subsystem-vendor-id swapped */
+ qemu_fdt_setprop_cell(fi->fdt, node->str, "subsystem-vendor-id",
+ pci_get_word(&d->config[PCI_SUBSYSTEM_ID]));
+ qemu_fdt_setprop_cell(fi->fdt, node->str, "subsystem-id",
+ pci_get_word(&d->config[PCI_SUBSYSTEM_VENDOR_ID]));
+ cells[0] = pci_get_long(&d->config[PCI_CLASS_REVISION]);
+ qemu_fdt_setprop_cell(fi->fdt, node->str, "class-code", cells[0] >> 8);
+ qemu_fdt_setprop_cell(fi->fdt, node->str, "revision-id", cells[0] & 0xff);
+ qemu_fdt_setprop_cell(fi->fdt, node->str, "device-id",
+ pci_get_word(&d->config[PCI_DEVICE_ID]));
+ qemu_fdt_setprop_cell(fi->fdt, node->str, "vendor-id",
+ pci_get_word(&d->config[PCI_VENDOR_ID]));
+
+ g_string_free(node, TRUE);
+}
+
+static void *build_fdt(MachineState *machine, int *fdt_size)
+{
+ Pegasos2MachineState *pm = PEGASOS2_MACHINE(machine);
+ PowerPCCPU *cpu = pm->cpu;
+ PCIBus *pci_bus;
+ FDTInfo fi;
+ uint32_t cells[16];
+ void *fdt = create_device_tree(fdt_size);
+
+ fi.fdt = fdt;
+
+ /* root node */
+ qemu_fdt_setprop_string(fdt, "/", "CODEGEN,description",
+ "Pegasos CHRP PowerPC System");
+ qemu_fdt_setprop_string(fdt, "/", "CODEGEN,board", "Pegasos2");
+ qemu_fdt_setprop_string(fdt, "/", "CODEGEN,vendor", "bplan GmbH");
+ qemu_fdt_setprop_string(fdt, "/", "revision", "2B");
+ qemu_fdt_setprop_string(fdt, "/", "model", "Pegasos2");
+ qemu_fdt_setprop_string(fdt, "/", "device_type", "chrp");
+ qemu_fdt_setprop_cell(fdt, "/", "#address-cells", 1);
+ qemu_fdt_setprop_string(fdt, "/", "name", "bplan,Pegasos2");
+
+ /* pci@c0000000 */
+ qemu_fdt_add_subnode(fdt, "/pci@c0000000");
+ cells[0] = 0;
+ cells[1] = 0;
+ qemu_fdt_setprop(fdt, "/pci@c0000000", "bus-range",
+ cells, 2 * sizeof(cells[0]));
+ qemu_fdt_setprop_cell(fdt, "/pci@c0000000", "pci-bridge-number", 1);
+ cells[0] = cpu_to_be32(PCI0_MEM_BASE);
+ cells[1] = cpu_to_be32(PCI0_MEM_SIZE);
+ qemu_fdt_setprop(fdt, "/pci@c0000000", "reg", cells, 2 * sizeof(cells[0]));
+ cells[0] = cpu_to_be32(0x01000000);
+ cells[1] = 0;
+ cells[2] = 0;
+ cells[3] = cpu_to_be32(PCI0_IO_BASE);
+ cells[4] = 0;
+ cells[5] = cpu_to_be32(PCI0_IO_SIZE);
+ cells[6] = cpu_to_be32(0x02000000);
+ cells[7] = 0;
+ cells[8] = cpu_to_be32(PCI0_MEM_BASE);
+ cells[9] = cpu_to_be32(PCI0_MEM_BASE);
+ cells[10] = 0;
+ cells[11] = cpu_to_be32(PCI0_MEM_SIZE);
+ qemu_fdt_setprop(fdt, "/pci@c0000000", "ranges",
+ cells, 12 * sizeof(cells[0]));
+ qemu_fdt_setprop_cell(fdt, "/pci@c0000000", "#size-cells", 2);
+ qemu_fdt_setprop_cell(fdt, "/pci@c0000000", "#address-cells", 3);
+ qemu_fdt_setprop_string(fdt, "/pci@c0000000", "device_type", "pci");
+ qemu_fdt_setprop_string(fdt, "/pci@c0000000", "name", "pci");
+
+ fi.path = "/pci@c0000000";
+ pci_bus = mv64361_get_pci_bus(pm->mv, 0);
+ pci_for_each_device_reverse(pci_bus, 0, add_pci_device, &fi);
+
+ /* pci@80000000 */
+ qemu_fdt_add_subnode(fdt, "/pci@80000000");
+ cells[0] = 0;
+ cells[1] = 0;
+ qemu_fdt_setprop(fdt, "/pci@80000000", "bus-range",
+ cells, 2 * sizeof(cells[0]));
+ qemu_fdt_setprop_cell(fdt, "/pci@80000000", "pci-bridge-number", 0);
+ cells[0] = cpu_to_be32(PCI1_MEM_BASE);
+ cells[1] = cpu_to_be32(PCI1_MEM_SIZE);
+ qemu_fdt_setprop(fdt, "/pci@80000000", "reg", cells, 2 * sizeof(cells[0]));
+ qemu_fdt_setprop_cell(fdt, "/pci@80000000", "8259-interrupt-acknowledge",
+ 0xf1000cb4);
+ cells[0] = cpu_to_be32(0x01000000);
+ cells[1] = 0;
+ cells[2] = 0;
+ cells[3] = cpu_to_be32(PCI1_IO_BASE);
+ cells[4] = 0;
+ cells[5] = cpu_to_be32(PCI1_IO_SIZE);
+ cells[6] = cpu_to_be32(0x02000000);
+ cells[7] = 0;
+ cells[8] = cpu_to_be32(PCI1_MEM_BASE);
+ cells[9] = cpu_to_be32(PCI1_MEM_BASE);
+ cells[10] = 0;
+ cells[11] = cpu_to_be32(PCI1_MEM_SIZE);
+ qemu_fdt_setprop(fdt, "/pci@80000000", "ranges",
+ cells, 12 * sizeof(cells[0]));
+ qemu_fdt_setprop_cell(fdt, "/pci@80000000", "#size-cells", 2);
+ qemu_fdt_setprop_cell(fdt, "/pci@80000000", "#address-cells", 3);
+ qemu_fdt_setprop_string(fdt, "/pci@80000000", "device_type", "pci");
+ qemu_fdt_setprop_string(fdt, "/pci@80000000", "name", "pci");
+
+ fi.path = "/pci@80000000";
+ pci_bus = mv64361_get_pci_bus(pm->mv, 1);
+ pci_for_each_device_reverse(pci_bus, 0, add_pci_device, &fi);
+
+ qemu_fdt_add_subnode(fdt, "/failsafe");
+ qemu_fdt_setprop_string(fdt, "/failsafe", "device_type", "serial");
+ qemu_fdt_setprop_string(fdt, "/failsafe", "name", "failsafe");
+
+ qemu_fdt_add_subnode(fdt, "/rtas");
+ qemu_fdt_setprop_cell(fdt, "/rtas", "system-reboot", RTAS_SYSTEM_REBOOT);
+ qemu_fdt_setprop_cell(fdt, "/rtas", "hibernate", RTAS_HIBERNATE);
+ qemu_fdt_setprop_cell(fdt, "/rtas", "suspend", RTAS_SUSPEND);
+ qemu_fdt_setprop_cell(fdt, "/rtas", "power-off", RTAS_POWER_OFF);
+ qemu_fdt_setprop_cell(fdt, "/rtas", "set-indicator", RTAS_SET_INDICATOR);
+ qemu_fdt_setprop_cell(fdt, "/rtas", "display-character",
+ RTAS_DISPLAY_CHARACTER);
+ qemu_fdt_setprop_cell(fdt, "/rtas", "write-pci-config",
+ RTAS_WRITE_PCI_CONFIG);
+ qemu_fdt_setprop_cell(fdt, "/rtas", "read-pci-config",
+ RTAS_READ_PCI_CONFIG);
+ /* Pegasos2 firmware misspells check-exception and guests use that */
+ qemu_fdt_setprop_cell(fdt, "/rtas", "check-execption",
+ RTAS_CHECK_EXCEPTION);
+ qemu_fdt_setprop_cell(fdt, "/rtas", "event-scan", RTAS_EVENT_SCAN);
+ qemu_fdt_setprop_cell(fdt, "/rtas", "set-time-of-day",
+ RTAS_SET_TIME_OF_DAY);
+ qemu_fdt_setprop_cell(fdt, "/rtas", "get-time-of-day",
+ RTAS_GET_TIME_OF_DAY);
+ qemu_fdt_setprop_cell(fdt, "/rtas", "nvram-store", RTAS_NVRAM_STORE);
+ qemu_fdt_setprop_cell(fdt, "/rtas", "nvram-fetch", RTAS_NVRAM_FETCH);
+ qemu_fdt_setprop_cell(fdt, "/rtas", "restart-rtas", RTAS_RESTART_RTAS);
+ qemu_fdt_setprop_cell(fdt, "/rtas", "rtas-error-log-max", 0);
+ qemu_fdt_setprop_cell(fdt, "/rtas", "rtas-event-scan-rate", 0);
+ qemu_fdt_setprop_cell(fdt, "/rtas", "rtas-display-device", 0);
+ qemu_fdt_setprop_cell(fdt, "/rtas", "rtas-size", 20);
+ qemu_fdt_setprop_cell(fdt, "/rtas", "rtas-version", 1);
+
+ /* cpus */
+ qemu_fdt_add_subnode(fdt, "/cpus");
+ qemu_fdt_setprop_cell(fdt, "/cpus", "#cpus", 1);
+ qemu_fdt_setprop_cell(fdt, "/cpus", "#address-cells", 1);
+ qemu_fdt_setprop_cell(fdt, "/cpus", "#size-cells", 0);
+ qemu_fdt_setprop_string(fdt, "/cpus", "name", "cpus");
+
+ /* FIXME Get CPU name from CPU object */
+ const char *cp = "/cpus/PowerPC,G4";
+ qemu_fdt_add_subnode(fdt, cp);
+ qemu_fdt_setprop_cell(fdt, cp, "l2cr", 0);
+ qemu_fdt_setprop_cell(fdt, cp, "d-cache-size", 0x8000);
+ qemu_fdt_setprop_cell(fdt, cp, "d-cache-block-size",
+ cpu->env.dcache_line_size);
+ qemu_fdt_setprop_cell(fdt, cp, "d-cache-line-size",
+ cpu->env.dcache_line_size);
+ qemu_fdt_setprop_cell(fdt, cp, "i-cache-size", 0x8000);
+ qemu_fdt_setprop_cell(fdt, cp, "i-cache-block-size",
+ cpu->env.icache_line_size);
+ qemu_fdt_setprop_cell(fdt, cp, "i-cache-line-size",
+ cpu->env.icache_line_size);
+ if (cpu->env.id_tlbs) {
+ qemu_fdt_setprop_cell(fdt, cp, "i-tlb-sets", cpu->env.nb_ways);
+ qemu_fdt_setprop_cell(fdt, cp, "i-tlb-size", cpu->env.tlb_per_way);
+ qemu_fdt_setprop_cell(fdt, cp, "d-tlb-sets", cpu->env.nb_ways);
+ qemu_fdt_setprop_cell(fdt, cp, "d-tlb-size", cpu->env.tlb_per_way);
+ qemu_fdt_setprop_string(fdt, cp, "tlb-split", "");
+ }
+ qemu_fdt_setprop_cell(fdt, cp, "tlb-sets", cpu->env.nb_ways);
+ qemu_fdt_setprop_cell(fdt, cp, "tlb-size", cpu->env.nb_tlb);
+ qemu_fdt_setprop_string(fdt, cp, "state", "running");
+ if (cpu->env.insns_flags & PPC_ALTIVEC) {
+ qemu_fdt_setprop_string(fdt, cp, "altivec", "");
+ qemu_fdt_setprop_string(fdt, cp, "data-streams", "");
+ }
+ /*
+ * FIXME What flags do data-streams, external-control and
+ * performance-monitor depend on?
+ */
+ qemu_fdt_setprop_string(fdt, cp, "external-control", "");
+ if (cpu->env.insns_flags & PPC_FLOAT_FSQRT) {
+ qemu_fdt_setprop_string(fdt, cp, "general-purpose", "");
+ }
+ qemu_fdt_setprop_string(fdt, cp, "performance-monitor", "");
+ if (cpu->env.insns_flags & PPC_FLOAT_FRES) {
+ qemu_fdt_setprop_string(fdt, cp, "graphics", "");
+ }
+ qemu_fdt_setprop_cell(fdt, cp, "reservation-granule-size", 4);
+ qemu_fdt_setprop_cell(fdt, cp, "timebase-frequency",
+ cpu->env.tb_env->tb_freq);
+ qemu_fdt_setprop_cell(fdt, cp, "bus-frequency", BUS_FREQ_HZ);
+ qemu_fdt_setprop_cell(fdt, cp, "clock-frequency", BUS_FREQ_HZ * 7.5);
+ qemu_fdt_setprop_cell(fdt, cp, "cpu-version", cpu->env.spr[SPR_PVR]);
+ cells[0] = 0;
+ cells[1] = 0;
+ qemu_fdt_setprop(fdt, cp, "reg", cells, 2 * sizeof(cells[0]));
+ qemu_fdt_setprop_string(fdt, cp, "device_type", "cpu");
+ qemu_fdt_setprop_string(fdt, cp, "name", strrchr(cp, '/') + 1);
+
+ /* memory */
+ qemu_fdt_add_subnode(fdt, "/memory@0");
+ cells[0] = 0;
+ cells[1] = cpu_to_be32(machine->ram_size);
+ qemu_fdt_setprop(fdt, "/memory@0", "reg", cells, 2 * sizeof(cells[0]));
+ qemu_fdt_setprop_string(fdt, "/memory@0", "device_type", "memory");
+ qemu_fdt_setprop_string(fdt, "/memory@0", "name", "memory");
+
+ qemu_fdt_add_subnode(fdt, "/chosen");
+ qemu_fdt_setprop_string(fdt, "/chosen", "bootargs",
+ machine->kernel_cmdline ?: "");
+ qemu_fdt_setprop_string(fdt, "/chosen", "name", "chosen");
+
+ qemu_fdt_add_subnode(fdt, "/openprom");
+ qemu_fdt_setprop_string(fdt, "/openprom", "model", "Pegasos2,1.1");
+
+ return fdt;
+}
diff --git a/hw/ppc/spapr.c b/hw/ppc/spapr.c
index 4dd90b75cc..a007be471e 100644
--- a/hw/ppc/spapr.c
+++ b/hw/ppc/spapr.c
@@ -101,6 +101,7 @@
#define FDT_MAX_ADDR 0x80000000 /* FDT must stay below that */
#define FW_MAX_SIZE 0x400000
#define FW_FILE_NAME "slof.bin"
+#define FW_FILE_NAME_VOF "vof.bin"
#define FW_OVERHEAD 0x2800000
#define KERNEL_LOAD_ADDR FW_MAX_SIZE
@@ -880,6 +881,10 @@ static void spapr_dt_rtas(SpaprMachineState *spapr, void *fdt)
add_str(hypertas, "hcall-copy");
add_str(hypertas, "hcall-debug");
add_str(hypertas, "hcall-vphn");
+ if (spapr_get_cap(spapr, SPAPR_CAP_RPT_INVALIDATE) == SPAPR_CAP_ON) {
+ add_str(hypertas, "hcall-rpt-invalidate");
+ }
+
add_str(qemu_hypertas, "hcall-memop1");
if (!kvm_enabled() || kvmppc_spapr_use_multitce()) {
@@ -919,9 +924,13 @@ static void spapr_dt_rtas(SpaprMachineState *spapr, void *fdt)
*
* The extra 8 bytes is required because Linux's FWNMI error log check
* is off-by-one.
+ *
+ * RTAS_MIN_SIZE is required for the RTAS blob itself.
*/
- _FDT(fdt_setprop_cell(fdt, rtas, "rtas-size", RTAS_ERROR_LOG_MAX +
- ms->smp.max_cpus * sizeof(uint64_t)*2 + sizeof(uint64_t)));
+ _FDT(fdt_setprop_cell(fdt, rtas, "rtas-size", RTAS_MIN_SIZE +
+ RTAS_ERROR_LOG_MAX +
+ ms->smp.max_cpus * sizeof(uint64_t) * 2 +
+ sizeof(uint64_t)));
_FDT(fdt_setprop_cell(fdt, rtas, "rtas-error-log-max",
RTAS_ERROR_LOG_MAX));
_FDT(fdt_setprop_cell(fdt, rtas, "rtas-event-scan-rate",
@@ -1639,22 +1648,29 @@ static void spapr_machine_reset(MachineState *machine)
fdt_addr = MIN(spapr->rma_size, FDT_MAX_ADDR) - FDT_MAX_SIZE;
fdt = spapr_build_fdt(spapr, true, FDT_MAX_SIZE);
+ if (spapr->vof) {
+ spapr_vof_reset(spapr, fdt, &error_fatal);
+ /*
+ * Do not pack the FDT as the client may change properties.
+ * VOF client does not expect the FDT so we do not load it to the VM.
+ */
+ } else {
+ rc = fdt_pack(fdt);
+ /* Should only fail if we've built a corrupted tree */
+ assert(rc == 0);
- rc = fdt_pack(fdt);
-
- /* Should only fail if we've built a corrupted tree */
- assert(rc == 0);
-
- /* Load the fdt */
+ spapr_cpu_set_entry_state(first_ppc_cpu, SPAPR_ENTRY_POINT,
+ 0, fdt_addr, 0);
+ cpu_physical_memory_write(fdt_addr, fdt, fdt_totalsize(fdt));
+ }
qemu_fdt_dumpdtb(fdt, fdt_totalsize(fdt));
- cpu_physical_memory_write(fdt_addr, fdt, fdt_totalsize(fdt));
+
g_free(spapr->fdt_blob);
spapr->fdt_size = fdt_totalsize(fdt);
spapr->fdt_initial_size = spapr->fdt_size;
spapr->fdt_blob = fdt;
/* Set up the entry state */
- spapr_cpu_set_entry_state(first_ppc_cpu, SPAPR_ENTRY_POINT, 0, fdt_addr, 0);
first_ppc_cpu->env.gpr[5] = 0;
spapr->fwnmi_system_reset_addr = -1;
@@ -2018,6 +2034,7 @@ static const VMStateDescription vmstate_spapr = {
&vmstate_spapr_cap_ccf_assist,
&vmstate_spapr_cap_fwnmi,
&vmstate_spapr_fwnmi,
+ &vmstate_spapr_cap_rpt_invalidate,
NULL
}
};
@@ -2657,7 +2674,8 @@ static void spapr_machine_init(MachineState *machine)
SpaprMachineState *spapr = SPAPR_MACHINE(machine);
SpaprMachineClass *smc = SPAPR_MACHINE_GET_CLASS(machine);
MachineClass *mc = MACHINE_GET_CLASS(machine);
- const char *bios_name = machine->firmware ?: FW_FILE_NAME;
+ const char *bios_default = spapr->vof ? FW_FILE_NAME_VOF : FW_FILE_NAME;
+ const char *bios_name = machine->firmware ?: bios_default;
const char *kernel_filename = machine->kernel_filename;
const char *initrd_filename = machine->initrd_filename;
PCIHostState *phb;
@@ -3014,6 +3032,10 @@ static void spapr_machine_init(MachineState *machine)
}
qemu_cond_init(&spapr->fwnmi_machine_check_interlock_cond);
+ if (spapr->vof) {
+ spapr->vof->fw_size = fw_size; /* for claim() on itself */
+ spapr_register_hypercall(KVMPPC_H_VOF_CLIENT, spapr_h_vof_client);
+ }
}
#define DEFAULT_KVM_TYPE "auto"
@@ -3204,6 +3226,28 @@ static void spapr_set_resize_hpt(Object *obj, const char *value, Error **errp)
}
}
+static bool spapr_get_vof(Object *obj, Error **errp)
+{
+ SpaprMachineState *spapr = SPAPR_MACHINE(obj);
+
+ return spapr->vof != NULL;
+}
+
+static void spapr_set_vof(Object *obj, bool value, Error **errp)
+{
+ SpaprMachineState *spapr = SPAPR_MACHINE(obj);
+
+ if (spapr->vof) {
+ vof_cleanup(spapr->vof);
+ g_free(spapr->vof);
+ spapr->vof = NULL;
+ }
+ if (!value) {
+ return;
+ }
+ spapr->vof = g_malloc0(sizeof(*spapr->vof));
+}
+
static char *spapr_get_ic_mode(Object *obj, Error **errp)
{
SpaprMachineState *spapr = SPAPR_MACHINE(obj);
@@ -3329,6 +3373,11 @@ static void spapr_instance_init(Object *obj)
stringify(KERNEL_LOAD_ADDR)
" for -kernel is the default");
spapr->kernel_addr = KERNEL_LOAD_ADDR;
+
+ object_property_add_bool(obj, "x-vof", spapr_get_vof, spapr_set_vof);
+ object_property_set_description(obj, "x-vof",
+ "Enable Virtual Open Firmware (experimental)");
+
/* The machine class defines the default interrupt controller mode */
spapr->irq = smc->irq;
object_property_add_str(obj, "ic-mode", spapr_get_ic_mode,
@@ -4492,6 +4541,7 @@ static void spapr_machine_class_init(ObjectClass *oc, void *data)
XICSFabricClass *xic = XICS_FABRIC_CLASS(oc);
InterruptStatsProviderClass *ispc = INTERRUPT_STATS_PROVIDER_CLASS(oc);
XiveFabricClass *xfc = XIVE_FABRIC_CLASS(oc);
+ VofMachineIfClass *vmc = VOF_MACHINE_CLASS(oc);
mc->desc = "pSeries Logical Partition (PAPR compliant)";
mc->ignore_boot_device_suffixes = true;
@@ -4573,6 +4623,7 @@ static void spapr_machine_class_init(ObjectClass *oc, void *data)
smc->default_caps.caps[SPAPR_CAP_LARGE_DECREMENTER] = SPAPR_CAP_ON;
smc->default_caps.caps[SPAPR_CAP_CCF_ASSIST] = SPAPR_CAP_ON;
smc->default_caps.caps[SPAPR_CAP_FWNMI] = SPAPR_CAP_ON;
+ smc->default_caps.caps[SPAPR_CAP_RPT_INVALIDATE] = SPAPR_CAP_OFF;
spapr_caps_add_properties(smc);
smc->irq = &spapr_irq_dual;
smc->dr_phb_enabled = true;
@@ -4580,6 +4631,9 @@ static void spapr_machine_class_init(ObjectClass *oc, void *data)
smc->smp_threads_vsmt = true;
smc->nr_xirqs = SPAPR_NR_XIRQS;
xfc->match_nvt = spapr_match_nvt;
+ vmc->client_architecture_support = spapr_vof_client_architecture_support;
+ vmc->quiesce = spapr_vof_quiesce;
+ vmc->setprop = spapr_vof_setprop;
}
static const TypeInfo spapr_machine_info = {
@@ -4599,6 +4653,7 @@ static const TypeInfo spapr_machine_info = {
{ TYPE_XICS_FABRIC },
{ TYPE_INTERRUPT_STATS_PROVIDER },
{ TYPE_XIVE_FABRIC },
+ { TYPE_VOF_MACHINE_IF },
{ }
},
};
diff --git a/hw/ppc/spapr_caps.c b/hw/ppc/spapr_caps.c
index d0c419b392..ed7c077a0d 100644
--- a/hw/ppc/spapr_caps.c
+++ b/hw/ppc/spapr_caps.c
@@ -582,6 +582,37 @@ static void cap_fwnmi_apply(SpaprMachineState *spapr, uint8_t val,
}
}
+static void cap_rpt_invalidate_apply(SpaprMachineState *spapr,
+ uint8_t val, Error **errp)
+{
+ ERRP_GUARD();
+
+ if (!val) {
+ /* capability disabled by default */
+ return;
+ }
+
+ if (tcg_enabled()) {
+ error_setg(errp, "No H_RPT_INVALIDATE support in TCG");
+ error_append_hint(errp,
+ "Try appending -machine cap-rpt-invalidate=off\n");
+ } else if (kvm_enabled()) {
+ if (!kvmppc_has_cap_mmu_radix()) {
+ error_setg(errp, "H_RPT_INVALIDATE only supported on Radix");
+ return;
+ }
+
+ if (!kvmppc_has_cap_rpt_invalidate()) {
+ error_setg(errp,
+ "KVM implementation does not support H_RPT_INVALIDATE");
+ error_append_hint(errp,
+ "Try appending -machine cap-rpt-invalidate=off\n");
+ } else {
+ kvmppc_enable_h_rpt_invalidate();
+ }
+ }
+}
+
SpaprCapabilityInfo capability_table[SPAPR_CAP_NUM] = {
[SPAPR_CAP_HTM] = {
.name = "htm",
@@ -690,6 +721,15 @@ SpaprCapabilityInfo capability_table[SPAPR_CAP_NUM] = {
.type = "bool",
.apply = cap_fwnmi_apply,
},
+ [SPAPR_CAP_RPT_INVALIDATE] = {
+ .name = "rpt-invalidate",
+ .description = "Allow H_RPT_INVALIDATE",
+ .index = SPAPR_CAP_RPT_INVALIDATE,
+ .get = spapr_cap_get_bool,
+ .set = spapr_cap_set_bool,
+ .type = "bool",
+ .apply = cap_rpt_invalidate_apply,
+ },
};
static SpaprCapabilities default_caps_with_cpu(SpaprMachineState *spapr,
@@ -830,6 +870,7 @@ SPAPR_CAP_MIG_STATE(nested_kvm_hv, SPAPR_CAP_NESTED_KVM_HV);
SPAPR_CAP_MIG_STATE(large_decr, SPAPR_CAP_LARGE_DECREMENTER);
SPAPR_CAP_MIG_STATE(ccf_assist, SPAPR_CAP_CCF_ASSIST);
SPAPR_CAP_MIG_STATE(fwnmi, SPAPR_CAP_FWNMI);
+SPAPR_CAP_MIG_STATE(rpt_invalidate, SPAPR_CAP_RPT_INVALIDATE);
void spapr_caps_init(SpaprMachineState *spapr)
{
diff --git a/hw/ppc/spapr_hcall.c b/hw/ppc/spapr_hcall.c
index f25014afda..0e9a5b2e40 100644
--- a/hw/ppc/spapr_hcall.c
+++ b/hw/ppc/spapr_hcall.c
@@ -1233,8 +1233,7 @@ target_ulong do_client_architecture_support(PowerPCCPU *cpu,
spapr_setup_hpt(spapr);
}
- fdt = spapr_build_fdt(spapr, false, fdt_bufsize);
-
+ fdt = spapr_build_fdt(spapr, spapr->vof != NULL, fdt_bufsize);
g_free(spapr->fdt_blob);
spapr->fdt_size = fdt_totalsize(fdt);
spapr->fdt_initial_size = spapr->fdt_size;
@@ -1277,6 +1276,25 @@ static target_ulong h_client_architecture_support(PowerPCCPU *cpu,
return ret;
}
+target_ulong spapr_vof_client_architecture_support(MachineState *ms,
+ CPUState *cs,
+ target_ulong ovec_addr)
+{
+ SpaprMachineState *spapr = SPAPR_MACHINE(ms);
+
+ target_ulong ret = do_client_architecture_support(POWERPC_CPU(cs), spapr,
+ ovec_addr, FDT_MAX_SIZE);
+
+ /*
+ * This adds stdout and generates phandles for boottime and CAS FDTs.
+ * It is alright to update the FDT here as do_client_architecture_support()
+ * does not pack it.
+ */
+ spapr_vof_client_dt_finalize(spapr, spapr->fdt_blob);
+
+ return ret;
+}
+
static target_ulong h_get_cpu_characteristics(PowerPCCPU *cpu,
SpaprMachineState *spapr,
target_ulong opcode,
@@ -1299,6 +1317,8 @@ static target_ulong h_get_cpu_characteristics(PowerPCCPU *cpu,
behaviour |= H_CPU_BEHAV_L1D_FLUSH_PR;
break;
case SPAPR_CAP_FIXED:
+ behaviour |= H_CPU_BEHAV_NO_L1D_FLUSH_ENTRY;
+ behaviour |= H_CPU_BEHAV_NO_L1D_FLUSH_UACCESS;
break;
default: /* broken */
assert(safe_cache == SPAPR_CAP_BROKEN);
diff --git a/hw/ppc/spapr_vof.c b/hw/ppc/spapr_vof.c
new file mode 100644
index 0000000000..40ce8fe003
--- /dev/null
+++ b/hw/ppc/spapr_vof.c
@@ -0,0 +1,167 @@
+/*
+ * SPAPR machine hooks to Virtual Open Firmware,
+ *
+ * SPDX-License-Identifier: GPL-2.0-or-later
+ */
+#include "qemu/osdep.h"
+#include "qemu-common.h"
+#include "qapi/error.h"
+#include "hw/ppc/spapr.h"
+#include "hw/ppc/spapr_vio.h"
+#include "hw/ppc/spapr_cpu_core.h"
+#include "hw/ppc/fdt.h"
+#include "hw/ppc/vof.h"
+#include "sysemu/sysemu.h"
+#include "qom/qom-qobject.h"
+#include "trace.h"
+
+target_ulong spapr_h_vof_client(PowerPCCPU *cpu, SpaprMachineState *spapr,
+ target_ulong opcode, target_ulong *_args)
+{
+ int ret = vof_client_call(MACHINE(spapr), spapr->vof, spapr->fdt_blob,
+ ppc64_phys_to_real(_args[0]));
+
+ if (ret) {
+ return H_PARAMETER;
+ }
+ return H_SUCCESS;
+}
+
+void spapr_vof_client_dt_finalize(SpaprMachineState *spapr, void *fdt)
+{
+ char *stdout_path = spapr_vio_stdout_path(spapr->vio_bus);
+
+ vof_build_dt(fdt, spapr->vof);
+
+ if (spapr->vof->bootargs) {
+ int chosen;
+
+ _FDT(chosen = fdt_path_offset(fdt, "/chosen"));
+ /*
+ * If the client did not change "bootargs", spapr_dt_chosen() must have
+ * stored machine->kernel_cmdline in it before getting here.
+ */
+ _FDT(fdt_setprop_string(fdt, chosen, "bootargs", spapr->vof->bootargs));
+ }
+
+ /*
+ * SLOF-less setup requires an open instance of stdout for early
+ * kernel printk. By now all phandles are settled so we can open
+ * the default serial console.
+ */
+ if (stdout_path) {
+ _FDT(vof_client_open_store(fdt, spapr->vof, "/chosen", "stdout",
+ stdout_path));
+ }
+}
+
+void spapr_vof_reset(SpaprMachineState *spapr, void *fdt, Error **errp)
+{
+ target_ulong stack_ptr;
+ Vof *vof = spapr->vof;
+ PowerPCCPU *first_ppc_cpu = POWERPC_CPU(first_cpu);
+
+ vof_init(vof, spapr->rma_size, errp);
+
+ stack_ptr = vof_claim(vof, 0, VOF_STACK_SIZE, VOF_STACK_SIZE);
+ if (stack_ptr == -1) {
+ error_setg(errp, "Memory allocation for stack failed");
+ return;
+ }
+ /* Stack grows downwards plus reserve space for the minimum stack frame */
+ stack_ptr += VOF_STACK_SIZE - 0x20;
+
+ if (spapr->kernel_size &&
+ vof_claim(vof, spapr->kernel_addr, spapr->kernel_size, 0) == -1) {
+ error_setg(errp, "Memory for kernel is in use");
+ return;
+ }
+
+ if (spapr->initrd_size &&
+ vof_claim(vof, spapr->initrd_base, spapr->initrd_size, 0) == -1) {
+ error_setg(errp, "Memory for initramdisk is in use");
+ return;
+ }
+
+ spapr_vof_client_dt_finalize(spapr, fdt);
+
+ spapr_cpu_set_entry_state(first_ppc_cpu, SPAPR_ENTRY_POINT,
+ stack_ptr, spapr->initrd_base,
+ spapr->initrd_size);
+ /* VOF is 32bit BE so enforce MSR here */
+ first_ppc_cpu->env.msr &= ~((1ULL << MSR_SF) | (1ULL << MSR_LE));
+
+ /*
+ * At this point the expected allocation map is:
+ *
+ * 0..c38 - the initial firmware
+ * 8000..10000 - stack
+ * 400000.. - kernel
+ * 3ea0000.. - initramdisk
+ *
+ * We skip writing FDT as nothing expects it; OF client interface is
+ * going to be used for reading the device tree.
+ */
+}
+
+void spapr_vof_quiesce(MachineState *ms)
+{
+ SpaprMachineState *spapr = SPAPR_MACHINE(ms);
+
+ spapr->fdt_size = fdt_totalsize(spapr->fdt_blob);
+ spapr->fdt_initial_size = spapr->fdt_size;
+}
+
+bool spapr_vof_setprop(MachineState *ms, const char *path, const char *propname,
+ void *val, int vallen)
+{
+ SpaprMachineState *spapr = SPAPR_MACHINE(ms);
+
+ /*
+ * We only allow changing properties which we know how to update in QEMU
+ * OR
+ * the ones which we know that they need to survive during "quiesce".
+ */
+
+ if (strcmp(path, "/rtas") == 0) {
+ if (strcmp(propname, "linux,rtas-base") == 0 ||
+ strcmp(propname, "linux,rtas-entry") == 0) {
+ /* These need to survive quiesce so let them store in the FDT */
+ return true;
+ }
+ }
+
+ if (strcmp(path, "/chosen") == 0) {
+ if (strcmp(propname, "bootargs") == 0) {
+ Vof *vof = spapr->vof;
+
+ g_free(vof->bootargs);
+ vof->bootargs = g_strndup(val, vallen);
+ return true;
+ }
+ if (strcmp(propname, "linux,initrd-start") == 0) {
+ if (vallen == sizeof(uint32_t)) {
+ spapr->initrd_base = ldl_be_p(val);
+ return true;
+ }
+ if (vallen == sizeof(uint64_t)) {
+ spapr->initrd_base = ldq_be_p(val);
+ return true;
+ }
+ return false;
+ }
+ if (strcmp(propname, "linux,initrd-end") == 0) {
+ if (vallen == sizeof(uint32_t)) {
+ spapr->initrd_size = ldl_be_p(val) - spapr->initrd_base;
+ return true;
+ }
+ if (vallen == sizeof(uint64_t)) {
+ spapr->initrd_size = ldq_be_p(val) - spapr->initrd_base;
+ return true;
+ }
+ return false;
+ }
+ }
+
+ return true;
+}
diff --git a/hw/ppc/trace-events b/hw/ppc/trace-events
index 0ba3e40353..6e90a01072 100644
--- a/hw/ppc/trace-events
+++ b/hw/ppc/trace-events
@@ -71,6 +71,30 @@ spapr_rtas_ibm_configure_connector_invalid(uint32_t index) "DRC index: 0x%"PRIx3
spapr_vio_h_reg_crq(uint64_t reg, uint64_t queue_addr, uint64_t queue_len) "CRQ for dev 0x%" PRIx64 " registered at 0x%" PRIx64 "/0x%" PRIx64
spapr_vio_free_crq(uint32_t reg) "CRQ for dev 0x%" PRIx32 " freed"
+# vof.c
+vof_error_str_truncated(const char *s, int len) "%s truncated to %d"
+vof_error_param(const char *method, int nargscheck, int nretcheck, int nargs, int nret) "%s takes/returns %d/%d, not %d/%d"
+vof_error_unknown_service(const char *service, int nargs, int nret) "\"%s\" args=%d rets=%d"
+vof_error_unknown_method(const char *method) "\"%s\""
+vof_error_unknown_ihandle_close(uint32_t ih) "ih=0x%x"
+vof_error_unknown_path(const char *path) "\"%s\""
+vof_error_write(uint32_t ih) "ih=0x%x"
+vof_finddevice(const char *path, uint32_t ph) "\"%s\" => ph=0x%x"
+vof_claim(uint32_t virt, uint32_t size, uint32_t align, uint32_t ret) "virt=0x%x size=0x%x align=0x%x => 0x%x"
+vof_release(uint32_t virt, uint32_t size, uint32_t ret) "virt=0x%x size=0x%x => 0x%x"
+vof_method(uint32_t ihandle, const char *method, uint32_t param, uint32_t ret, uint32_t ret2) "ih=0x%x \"%s\"(0x%x) => 0x%x 0x%x"
+vof_getprop(uint32_t ph, const char *prop, uint32_t ret, const char *val) "ph=0x%x \"%s\" => len=%d [%s]"
+vof_getproplen(uint32_t ph, const char *prop, uint32_t ret) "ph=0x%x \"%s\" => len=%d"
+vof_setprop(uint32_t ph, const char *prop, const char *val, uint32_t vallen, uint32_t ret) "ph=0x%x \"%s\" [%s] len=%d => ret=%d"
+vof_open(const char *path, uint32_t ph, uint32_t ih) "%s ph=0x%x => ih=0x%x"
+vof_interpret(const char *cmd, uint32_t param1, uint32_t param2, uint32_t ret, uint32_t ret2) "[%s] 0x%x 0x%x => 0x%x 0x%x"
+vof_package_to_path(uint32_t ph, const char *tmp, uint32_t ret) "ph=0x%x => %s len=%d"
+vof_instance_to_path(uint32_t ih, uint32_t ph, const char *tmp, uint32_t ret) "ih=0x%x ph=0x%x => %s len=%d"
+vof_instance_to_package(uint32_t ih, uint32_t ph) "ih=0x%x => ph=0x%x"
+vof_write(uint32_t ih, unsigned cb, const char *msg) "ih=0x%x [%u] \"%s\""
+vof_avail(uint64_t start, uint64_t end, uint64_t size) "0x%"PRIx64"..0x%"PRIx64" size=0x%"PRIx64
+vof_claimed(uint64_t start, uint64_t end, uint64_t size) "0x%"PRIx64"..0x%"PRIx64" size=0x%"PRIx64
+
# ppc.c
ppc_tb_adjust(uint64_t offs1, uint64_t offs2, int64_t diff, int64_t seconds) "adjusted from 0x%"PRIx64" to 0x%"PRIx64", diff %"PRId64" (%"PRId64"s)"
diff --git a/hw/ppc/vof.c b/hw/ppc/vof.c
new file mode 100644
index 0000000000..81f6596215
--- /dev/null
+++ b/hw/ppc/vof.c
@@ -0,0 +1,1053 @@
+/*
+ * QEMU PowerPC Virtual Open Firmware.
+ *
+ * This implements client interface from OpenFirmware IEEE1275 on the QEMU
+ * side to leave only a very basic firmware in the VM.
+ *
+ * Copyright (c) 2021 IBM Corporation.
+ *
+ * SPDX-License-Identifier: GPL-2.0-or-later
+ */
+
+#include "qemu/osdep.h"
+#include "qemu-common.h"
+#include "qemu/timer.h"
+#include "qemu/range.h"
+#include "qemu/units.h"
+#include "qemu/log.h"
+#include "qapi/error.h"
+#include "exec/ram_addr.h"
+#include "exec/address-spaces.h"
+#include "hw/ppc/vof.h"
+#include "hw/ppc/fdt.h"
+#include "sysemu/runstate.h"
+#include "qom/qom-qobject.h"
+#include "trace.h"
+
+#include <libfdt.h>
+
+/*
+ * OF 1275 "nextprop" description suggests is it 32 bytes max but
+ * LoPAPR defines "ibm,query-interrupt-source-number" which is 33 chars long.
+ */
+#define OF_PROPNAME_LEN_MAX 64
+
+#define VOF_MAX_PATH 256
+#define VOF_MAX_SETPROPLEN 2048
+#define VOF_MAX_METHODLEN 256
+#define VOF_MAX_FORTHCODE 256
+#define VOF_VTY_BUF_SIZE 256
+
+typedef struct {
+ uint64_t start;
+ uint64_t size;
+} OfClaimed;
+
+typedef struct {
+ char *path; /* the path used to open the instance */
+ uint32_t phandle;
+} OfInstance;
+
+static int readstr(hwaddr pa, char *buf, int size)
+{
+ if (VOF_MEM_READ(pa, buf, size) != MEMTX_OK) {
+ return -1;
+ }
+ if (strnlen(buf, size) == size) {
+ buf[size - 1] = '\0';
+ trace_vof_error_str_truncated(buf, size);
+ return -1;
+ }
+ return 0;
+}
+
+static bool cmpservice(const char *s, unsigned nargs, unsigned nret,
+ const char *s1, unsigned nargscheck, unsigned nretcheck)
+{
+ if (strcmp(s, s1)) {
+ return false;
+ }
+ if ((nargscheck && (nargs != nargscheck)) ||
+ (nretcheck && (nret != nretcheck))) {
+ trace_vof_error_param(s, nargscheck, nretcheck, nargs, nret);
+ return false;
+ }
+
+ return true;
+}
+
+static void prop_format(char *tval, int tlen, const void *prop, int len)
+{
+ int i;
+ const unsigned char *c;
+ char *t;
+ const char bin[] = "...";
+
+ for (i = 0, c = prop; i < len; ++i, ++c) {
+ if (*c == '\0' && i == len - 1) {
+ strncpy(tval, prop, tlen - 1);
+ return;
+ }
+ if (*c < 0x20 || *c >= 0x80) {
+ break;
+ }
+ }
+
+ for (i = 0, c = prop, t = tval; i < len; ++i, ++c) {
+ if (t >= tval + tlen - sizeof(bin) - 1 - 2 - 1) {
+ strcpy(t, bin);
+ return;
+ }
+ if (i && i % 4 == 0 && i != len - 1) {
+ strcat(t, " ");
+ ++t;
+ }
+ t += sprintf(t, "%02X", *c & 0xFF);
+ }
+}
+
+static int get_path(const void *fdt, int offset, char *buf, int len)
+{
+ int ret;
+
+ ret = fdt_get_path(fdt, offset, buf, len - 1);
+ if (ret < 0) {
+ return ret;
+ }
+
+ buf[len - 1] = '\0';
+
+ return strlen(buf) + 1;
+}
+
+static int phandle_to_path(const void *fdt, uint32_t ph, char *buf, int len)
+{
+ int ret;
+
+ ret = fdt_node_offset_by_phandle(fdt, ph);
+ if (ret < 0) {
+ return ret;
+ }
+
+ return get_path(fdt, ret, buf, len);
+}
+
+static int path_offset(const void *fdt, const char *path)
+{
+ g_autofree char *p = NULL;
+ char *at;
+
+ /*
+ * https://www.devicetree.org/open-firmware/bindings/ppc/release/ppc-2_1.html#HDR16
+ *
+ * "Conversion from numeric representation to text representation shall use
+ * the lower case forms of the hexadecimal digits in the range a..f,
+ * suppressing leading zeros".
+ */
+ p = g_strdup(path);
+ for (at = strchr(p, '@'); at && *at; ) {
+ if (*at == '/') {
+ at = strchr(at, '@');
+ } else {
+ *at = tolower(*at);
+ ++at;
+ }
+ }
+
+ return fdt_path_offset(fdt, p);
+}
+
+static uint32_t vof_finddevice(const void *fdt, uint32_t nodeaddr)
+{
+ char fullnode[VOF_MAX_PATH];
+ uint32_t ret = -1;
+ int offset;
+
+ if (readstr(nodeaddr, fullnode, sizeof(fullnode))) {
+ return (uint32_t) ret;
+ }
+
+ offset = path_offset(fdt, fullnode);
+ if (offset >= 0) {
+ ret = fdt_get_phandle(fdt, offset);
+ }
+ trace_vof_finddevice(fullnode, ret);
+ return (uint32_t) ret;
+}
+
+static const void *getprop(const void *fdt, int nodeoff, const char *propname,
+ int *proplen, bool *write0)
+{
+ const char *unit, *prop;
+ const void *ret = fdt_getprop(fdt, nodeoff, propname, proplen);
+
+ if (ret) {
+ if (write0) {
+ *write0 = false;
+ }
+ return ret;
+ }
+
+ if (strcmp(propname, "name")) {
+ return NULL;
+ }
+ /*
+ * We return a value for "name" from path if queried but property does not
+ * exist. @proplen does not include the unit part in this case.
+ */
+ prop = fdt_get_name(fdt, nodeoff, proplen);
+ if (!prop) {
+ *proplen = 0;
+ return NULL;
+ }
+
+ unit = memchr(prop, '@', *proplen);
+ if (unit) {
+ *proplen = unit - prop;
+ }
+ *proplen += 1;
+
+ /*
+ * Since it might be cut at "@" and there will be no trailing zero
+ * in the prop buffer, tell the caller to write zero at the end.
+ */
+ if (write0) {
+ *write0 = true;
+ }
+ return prop;
+}
+
+static uint32_t vof_getprop(const void *fdt, uint32_t nodeph, uint32_t pname,
+ uint32_t valaddr, uint32_t vallen)
+{
+ char propname[OF_PROPNAME_LEN_MAX + 1];
+ uint32_t ret = 0;
+ int proplen = 0;
+ const void *prop;
+ char trval[64] = "";
+ int nodeoff = fdt_node_offset_by_phandle(fdt, nodeph);
+ bool write0;
+
+ if (nodeoff < 0) {
+ return -1;
+ }
+ if (readstr(pname, propname, sizeof(propname))) {
+ return -1;
+ }
+ prop = getprop(fdt, nodeoff, propname, &proplen, &write0);
+ if (prop) {
+ const char zero = 0;
+ int cb = MIN(proplen, vallen);
+
+ if (VOF_MEM_WRITE(valaddr, prop, cb) != MEMTX_OK ||
+ /* if that was "name" with a unit address, overwrite '@' with '0' */
+ (write0 &&
+ cb == proplen &&
+ VOF_MEM_WRITE(valaddr + cb - 1, &zero, 1) != MEMTX_OK)) {
+ ret = -1;
+ } else {
+ /*
+ * OF1275 says:
+ * "Size is either the actual size of the property, or -1 if name
+ * does not exist", hence returning proplen instead of cb.
+ */
+ ret = proplen;
+ /* Do not format a value if tracepoint is silent, for performance */
+ if (trace_event_get_state(TRACE_VOF_GETPROP) &&
+ qemu_loglevel_mask(LOG_TRACE)) {
+ prop_format(trval, sizeof(trval), prop, ret);
+ }
+ }
+ } else {
+ ret = -1;
+ }
+ trace_vof_getprop(nodeph, propname, ret, trval);
+
+ return ret;
+}
+
+static uint32_t vof_getproplen(const void *fdt, uint32_t nodeph, uint32_t pname)
+{
+ char propname[OF_PROPNAME_LEN_MAX + 1];
+ uint32_t ret = 0;
+ int proplen = 0;
+ const void *prop;
+ int nodeoff = fdt_node_offset_by_phandle(fdt, nodeph);
+
+ if (nodeoff < 0) {
+ return -1;
+ }
+ if (readstr(pname, propname, sizeof(propname))) {
+ return -1;
+ }
+ prop = getprop(fdt, nodeoff, propname, &proplen, NULL);
+ if (prop) {
+ ret = proplen;
+ } else {
+ ret = -1;
+ }
+ trace_vof_getproplen(nodeph, propname, ret);
+
+ return ret;
+}
+
+static uint32_t vof_setprop(MachineState *ms, void *fdt, Vof *vof,
+ uint32_t nodeph, uint32_t pname,
+ uint32_t valaddr, uint32_t vallen)
+{
+ char propname[OF_PROPNAME_LEN_MAX + 1];
+ uint32_t ret = -1;
+ int offset;
+ char trval[64] = "";
+ char nodepath[VOF_MAX_PATH] = "";
+ Object *vmo = object_dynamic_cast(OBJECT(ms), TYPE_VOF_MACHINE_IF);
+ VofMachineIfClass *vmc;
+ g_autofree char *val = NULL;
+
+ if (vallen > VOF_MAX_SETPROPLEN) {
+ goto trace_exit;
+ }
+ if (readstr(pname, propname, sizeof(propname))) {
+ goto trace_exit;
+ }
+ offset = fdt_node_offset_by_phandle(fdt, nodeph);
+ if (offset < 0) {
+ goto trace_exit;
+ }
+ ret = get_path(fdt, offset, nodepath, sizeof(nodepath));
+ if (ret <= 0) {
+ goto trace_exit;
+ }
+
+ val = g_malloc0(vallen);
+ if (VOF_MEM_READ(valaddr, val, vallen) != MEMTX_OK) {
+ goto trace_exit;
+ }
+
+ if (!vmo) {
+ goto trace_exit;
+ }
+
+ vmc = VOF_MACHINE_GET_CLASS(vmo);
+ if (!vmc->setprop || !vmc->setprop(ms, nodepath, propname, val, vallen)) {
+ goto trace_exit;
+ }
+
+ ret = fdt_setprop(fdt, offset, propname, val, vallen);
+ if (ret) {
+ goto trace_exit;
+ }
+
+ if (trace_event_get_state(TRACE_VOF_SETPROP) &&
+ qemu_loglevel_mask(LOG_TRACE)) {
+ prop_format(trval, sizeof(trval), val, vallen);
+ }
+ ret = vallen;
+
+trace_exit:
+ trace_vof_setprop(nodeph, propname, trval, vallen, ret);
+
+ return ret;
+}
+
+static uint32_t vof_nextprop(const void *fdt, uint32_t phandle,
+ uint32_t prevaddr, uint32_t nameaddr)
+{
+ int offset, nodeoff = fdt_node_offset_by_phandle(fdt, phandle);
+ char prev[OF_PROPNAME_LEN_MAX + 1];
+ const char *tmp;
+
+ if (readstr(prevaddr, prev, sizeof(prev))) {
+ return -1;
+ }
+
+ fdt_for_each_property_offset(offset, fdt, nodeoff) {
+ if (!fdt_getprop_by_offset(fdt, offset, &tmp, NULL)) {
+ return 0;
+ }
+ if (prev[0] == '\0' || strcmp(prev, tmp) == 0) {
+ if (prev[0] != '\0') {
+ offset = fdt_next_property_offset(fdt, offset);
+ if (offset < 0) {
+ return 0;
+ }
+ }
+ if (!fdt_getprop_by_offset(fdt, offset, &tmp, NULL)) {
+ return 0;
+ }
+
+ if (VOF_MEM_WRITE(nameaddr, tmp, strlen(tmp) + 1) != MEMTX_OK) {
+ return -1;
+ }
+ return 1;
+ }
+ }
+
+ return 0;
+}
+
+static uint32_t vof_peer(const void *fdt, uint32_t phandle)
+{
+ int ret;
+
+ if (phandle == 0) {
+ ret = fdt_path_offset(fdt, "/");
+ } else {
+ ret = fdt_next_subnode(fdt, fdt_node_offset_by_phandle(fdt, phandle));
+ }
+
+ if (ret < 0) {
+ ret = 0;
+ } else {
+ ret = fdt_get_phandle(fdt, ret);
+ }
+
+ return ret;
+}
+
+static uint32_t vof_child(const void *fdt, uint32_t phandle)
+{
+ int ret = fdt_first_subnode(fdt, fdt_node_offset_by_phandle(fdt, phandle));
+
+ if (ret < 0) {
+ ret = 0;
+ } else {
+ ret = fdt_get_phandle(fdt, ret);
+ }
+
+ return ret;
+}
+
+static uint32_t vof_parent(const void *fdt, uint32_t phandle)
+{
+ int ret = fdt_parent_offset(fdt, fdt_node_offset_by_phandle(fdt, phandle));
+
+ if (ret < 0) {
+ ret = 0;
+ } else {
+ ret = fdt_get_phandle(fdt, ret);
+ }
+
+ return ret;
+}
+
+static uint32_t vof_do_open(void *fdt, Vof *vof, int offset, const char *path)
+{
+ uint32_t ret = -1;
+ OfInstance *inst = NULL;
+
+ if (vof->of_instance_last == 0xFFFFFFFF) {
+ /* We do not recycle ihandles yet */
+ goto trace_exit;
+ }
+
+ inst = g_new0(OfInstance, 1);
+ inst->phandle = fdt_get_phandle(fdt, offset);
+ g_assert(inst->phandle);
+ ++vof->of_instance_last;
+
+ inst->path = g_strdup(path);
+ g_hash_table_insert(vof->of_instances,
+ GINT_TO_POINTER(vof->of_instance_last),
+ inst);
+ ret = vof->of_instance_last;
+
+trace_exit:
+ trace_vof_open(path, inst ? inst->phandle : 0, ret);
+
+ return ret;
+}
+
+uint32_t vof_client_open_store(void *fdt, Vof *vof, const char *nodename,
+ const char *prop, const char *path)
+{
+ int node = fdt_path_offset(fdt, nodename);
+ int inst, offset;
+
+ offset = fdt_path_offset(fdt, path);
+ if (offset < 0) {
+ trace_vof_error_unknown_path(path);
+ return offset;
+ }
+
+ inst = vof_do_open(fdt, vof, offset, path);
+
+ return fdt_setprop_cell(fdt, node, prop, inst);
+}
+
+static uint32_t vof_open(void *fdt, Vof *vof, uint32_t pathaddr)
+{
+ char path[VOF_MAX_PATH];
+ int offset;
+
+ if (readstr(pathaddr, path, sizeof(path))) {
+ return -1;
+ }
+
+ offset = path_offset(fdt, path);
+ if (offset < 0) {
+ trace_vof_error_unknown_path(path);
+ return offset;
+ }
+
+ return vof_do_open(fdt, vof, offset, path);
+}
+
+static void vof_close(Vof *vof, uint32_t ihandle)
+{
+ if (!g_hash_table_remove(vof->of_instances, GINT_TO_POINTER(ihandle))) {
+ trace_vof_error_unknown_ihandle_close(ihandle);
+ }
+}
+
+static uint32_t vof_instance_to_package(Vof *vof, uint32_t ihandle)
+{
+ gpointer instp = g_hash_table_lookup(vof->of_instances,
+ GINT_TO_POINTER(ihandle));
+ uint32_t ret = -1;
+
+ if (instp) {
+ ret = ((OfInstance *)instp)->phandle;
+ }
+ trace_vof_instance_to_package(ihandle, ret);
+
+ return ret;
+}
+
+static uint32_t vof_package_to_path(const void *fdt, uint32_t phandle,
+ uint32_t buf, uint32_t len)
+{
+ uint32_t ret = -1;
+ char tmp[VOF_MAX_PATH] = "";
+
+ ret = phandle_to_path(fdt, phandle, tmp, sizeof(tmp));
+ if (ret > 0) {
+ if (VOF_MEM_WRITE(buf, tmp, ret) != MEMTX_OK) {
+ ret = -1;
+ }
+ }
+
+ trace_vof_package_to_path(phandle, tmp, ret);
+
+ return ret;
+}
+
+static uint32_t vof_instance_to_path(void *fdt, Vof *vof, uint32_t ihandle,
+ uint32_t buf, uint32_t len)
+{
+ uint32_t ret = -1;
+ uint32_t phandle = vof_instance_to_package(vof, ihandle);
+ char tmp[VOF_MAX_PATH] = "";
+
+ if (phandle != -1) {
+ ret = phandle_to_path(fdt, phandle, tmp, sizeof(tmp));
+ if (ret > 0) {
+ if (VOF_MEM_WRITE(buf, tmp, ret) != MEMTX_OK) {
+ ret = -1;
+ }
+ }
+ }
+ trace_vof_instance_to_path(ihandle, phandle, tmp, ret);
+
+ return ret;
+}
+
+static uint32_t vof_write(Vof *vof, uint32_t ihandle, uint32_t buf,
+ uint32_t len)
+{
+ char tmp[VOF_VTY_BUF_SIZE];
+ unsigned cb;
+ OfInstance *inst = (OfInstance *)
+ g_hash_table_lookup(vof->of_instances, GINT_TO_POINTER(ihandle));
+
+ if (!inst) {
+ trace_vof_error_write(ihandle);
+ return -1;
+ }
+
+ for ( ; len > 0; len -= cb) {
+ cb = MIN(len, sizeof(tmp) - 1);
+ if (VOF_MEM_READ(buf, tmp, cb) != MEMTX_OK) {
+ return -1;
+ }
+
+ /* FIXME: there is no backend(s) yet so just call a trace */
+ if (trace_event_get_state(TRACE_VOF_WRITE) &&
+ qemu_loglevel_mask(LOG_TRACE)) {
+ tmp[cb] = '\0';
+ trace_vof_write(ihandle, cb, tmp);
+ }
+ }
+
+ return len;
+}
+
+static void vof_claimed_dump(GArray *claimed)
+{
+ int i;
+ OfClaimed c;
+
+ if (trace_event_get_state(TRACE_VOF_CLAIMED) &&
+ qemu_loglevel_mask(LOG_TRACE)) {
+
+ for (i = 0; i < claimed->len; ++i) {
+ c = g_array_index(claimed, OfClaimed, i);
+ trace_vof_claimed(c.start, c.start + c.size, c.size);
+ }
+ }
+}
+
+static bool vof_claim_avail(GArray *claimed, uint64_t virt, uint64_t size)
+{
+ int i;
+ OfClaimed c;
+
+ for (i = 0; i < claimed->len; ++i) {
+ c = g_array_index(claimed, OfClaimed, i);
+ if (ranges_overlap(c.start, c.size, virt, size)) {
+ return false;
+ }
+ }
+
+ return true;
+}
+
+static void vof_claim_add(GArray *claimed, uint64_t virt, uint64_t size)
+{
+ OfClaimed newclaim;
+
+ newclaim.start = virt;
+ newclaim.size = size;
+ g_array_append_val(claimed, newclaim);
+}
+
+static gint of_claimed_compare_func(gconstpointer a, gconstpointer b)
+{
+ return ((OfClaimed *)a)->start - ((OfClaimed *)b)->start;
+}
+
+static void vof_dt_memory_available(void *fdt, GArray *claimed, uint64_t base)
+{
+ int i, n, offset, proplen = 0, sc, ac;
+ target_ulong mem0_end;
+ const uint8_t *mem0_reg;
+ g_autofree uint8_t *avail = NULL;
+ uint8_t *availcur;
+
+ if (!fdt || !claimed) {
+ return;
+ }
+
+ offset = fdt_path_offset(fdt, "/");
+ _FDT(offset);
+ ac = fdt_address_cells(fdt, offset);
+ g_assert(ac == 1 || ac == 2);
+ sc = fdt_size_cells(fdt, offset);
+ g_assert(sc == 1 || sc == 2);
+
+ offset = fdt_path_offset(fdt, "/memory@0");
+ _FDT(offset);
+
+ mem0_reg = fdt_getprop(fdt, offset, "reg", &proplen);
+ g_assert(mem0_reg && proplen == sizeof(uint32_t) * (ac + sc));
+ if (sc == 2) {
+ mem0_end = be64_to_cpu(*(uint64_t *)(mem0_reg + sizeof(uint32_t) * ac));
+ } else {
+ mem0_end = be32_to_cpu(*(uint32_t *)(mem0_reg + sizeof(uint32_t) * ac));
+ }
+
+ g_array_sort(claimed, of_claimed_compare_func);
+ vof_claimed_dump(claimed);
+
+ /*
+ * VOF resides in the first page so we do not need to check if there is
+ * available memory before the first claimed block
+ */
+ g_assert(claimed->len && (g_array_index(claimed, OfClaimed, 0).start == 0));
+
+ avail = g_malloc0(sizeof(uint32_t) * (ac + sc) * claimed->len);
+ for (i = 0, n = 0, availcur = avail; i < claimed->len; ++i) {
+ OfClaimed c = g_array_index(claimed, OfClaimed, i);
+ uint64_t start, size;
+
+ start = c.start + c.size;
+ if (i < claimed->len - 1) {
+ OfClaimed cn = g_array_index(claimed, OfClaimed, i + 1);
+
+ size = cn.start - start;
+ } else {
+ size = mem0_end - start;
+ }
+
+ if (ac == 2) {
+ *(uint64_t *) availcur = cpu_to_be64(start);
+ } else {
+ *(uint32_t *) availcur = cpu_to_be32(start);
+ }
+ availcur += sizeof(uint32_t) * ac;
+ if (sc == 2) {
+ *(uint64_t *) availcur = cpu_to_be64(size);
+ } else {
+ *(uint32_t *) availcur = cpu_to_be32(size);
+ }
+ availcur += sizeof(uint32_t) * sc;
+
+ if (size) {
+ trace_vof_avail(c.start + c.size, c.start + c.size + size, size);
+ ++n;
+ }
+ }
+ _FDT((fdt_setprop(fdt, offset, "available", avail, availcur - avail)));
+}
+
+/*
+ * OF1275:
+ * "Allocates size bytes of memory. If align is zero, the allocated range
+ * begins at the virtual address virt. Otherwise, an aligned address is
+ * automatically chosen and the input argument virt is ignored".
+ *
+ * In other words, exactly one of @virt and @align is non-zero.
+ */
+uint64_t vof_claim(Vof *vof, uint64_t virt, uint64_t size,
+ uint64_t align)
+{
+ uint64_t ret;
+
+ if (size == 0) {
+ ret = -1;
+ } else if (align == 0) {
+ if (!vof_claim_avail(vof->claimed, virt, size)) {
+ ret = -1;
+ } else {
+ ret = virt;
+ }
+ } else {
+ vof->claimed_base = QEMU_ALIGN_UP(vof->claimed_base, align);
+ while (1) {
+ if (vof->claimed_base >= vof->top_addr) {
+ error_report("Out of RMA memory for the OF client");
+ return -1;
+ }
+ if (vof_claim_avail(vof->claimed, vof->claimed_base, size)) {
+ break;
+ }
+ vof->claimed_base += size;
+ }
+ ret = vof->claimed_base;
+ }
+
+ if (ret != -1) {
+ vof->claimed_base = MAX(vof->claimed_base, ret + size);
+ vof_claim_add(vof->claimed, ret, size);
+ }
+ trace_vof_claim(virt, size, align, ret);
+
+ return ret;
+}
+
+static uint32_t vof_release(Vof *vof, uint64_t virt, uint64_t size)
+{
+ uint32_t ret = -1;
+ int i;
+ GArray *claimed = vof->claimed;
+ OfClaimed c;
+
+ for (i = 0; i < claimed->len; ++i) {
+ c = g_array_index(claimed, OfClaimed, i);
+ if (c.start == virt && c.size == size) {
+ g_array_remove_index(claimed, i);
+ ret = 0;
+ break;
+ }
+ }
+
+ trace_vof_release(virt, size, ret);
+
+ return ret;
+}
+
+static void vof_instantiate_rtas(Error **errp)
+{
+ error_setg(errp, "The firmware should have instantiated RTAS");
+}
+
+static uint32_t vof_call_method(MachineState *ms, Vof *vof, uint32_t methodaddr,
+ uint32_t ihandle, uint32_t param1,
+ uint32_t param2, uint32_t param3,
+ uint32_t param4, uint32_t *ret2)
+{
+ uint32_t ret = -1;
+ char method[VOF_MAX_METHODLEN] = "";
+ OfInstance *inst;
+
+ if (!ihandle) {
+ goto trace_exit;
+ }
+
+ inst = (OfInstance *)g_hash_table_lookup(vof->of_instances,
+ GINT_TO_POINTER(ihandle));
+ if (!inst) {
+ goto trace_exit;
+ }
+
+ if (readstr(methodaddr, method, sizeof(method))) {
+ goto trace_exit;
+ }
+
+ if (strcmp(inst->path, "/") == 0) {
+ if (strcmp(method, "ibm,client-architecture-support") == 0) {
+ Object *vmo = object_dynamic_cast(OBJECT(ms), TYPE_VOF_MACHINE_IF);
+
+ if (vmo) {
+ VofMachineIfClass *vmc = VOF_MACHINE_GET_CLASS(vmo);
+
+ g_assert(vmc->client_architecture_support);
+ ret = vmc->client_architecture_support(ms, first_cpu, param1);
+ }
+
+ *ret2 = 0;
+ }
+ } else if (strcmp(inst->path, "/rtas") == 0) {
+ if (strcmp(method, "instantiate-rtas") == 0) {
+ vof_instantiate_rtas(&error_fatal);
+ ret = 0;
+ *ret2 = param1; /* rtas-base */
+ }
+ } else {
+ trace_vof_error_unknown_method(method);
+ }
+
+trace_exit:
+ trace_vof_method(ihandle, method, param1, ret, *ret2);
+
+ return ret;
+}
+
+static uint32_t vof_call_interpret(uint32_t cmdaddr, uint32_t param1,
+ uint32_t param2, uint32_t *ret2)
+{
+ uint32_t ret = -1;
+ char cmd[VOF_MAX_FORTHCODE] = "";
+
+ /* No interpret implemented so just call a trace */
+ readstr(cmdaddr, cmd, sizeof(cmd));
+ trace_vof_interpret(cmd, param1, param2, ret, *ret2);
+
+ return ret;
+}
+
+static void vof_quiesce(MachineState *ms, void *fdt, Vof *vof)
+{
+ Object *vmo = object_dynamic_cast(OBJECT(ms), TYPE_VOF_MACHINE_IF);
+ /* After "quiesce", no change is expected to the FDT, pack FDT to ensure */
+ int rc = fdt_pack(fdt);
+
+ assert(rc == 0);
+
+ if (vmo) {
+ VofMachineIfClass *vmc = VOF_MACHINE_GET_CLASS(vmo);
+
+ if (vmc->quiesce) {
+ vmc->quiesce(ms);
+ }
+ }
+
+ vof_claimed_dump(vof->claimed);
+}
+
+static uint32_t vof_client_handle(MachineState *ms, void *fdt, Vof *vof,
+ const char *service,
+ uint32_t *args, unsigned nargs,
+ uint32_t *rets, unsigned nrets)
+{
+ uint32_t ret = 0;
+
+ /* @nrets includes the value which this function returns */
+#define cmpserv(s, a, r) \
+ cmpservice(service, nargs, nrets, (s), (a), (r))
+
+ if (cmpserv("finddevice", 1, 1)) {
+ ret = vof_finddevice(fdt, args[0]);
+ } else if (cmpserv("getprop", 4, 1)) {
+ ret = vof_getprop(fdt, args[0], args[1], args[2], args[3]);
+ } else if (cmpserv("getproplen", 2, 1)) {
+ ret = vof_getproplen(fdt, args[0], args[1]);
+ } else if (cmpserv("setprop", 4, 1)) {
+ ret = vof_setprop(ms, fdt, vof, args[0], args[1], args[2], args[3]);
+ } else if (cmpserv("nextprop", 3, 1)) {
+ ret = vof_nextprop(fdt, args[0], args[1], args[2]);
+ } else if (cmpserv("peer", 1, 1)) {
+ ret = vof_peer(fdt, args[0]);
+ } else if (cmpserv("child", 1, 1)) {
+ ret = vof_child(fdt, args[0]);
+ } else if (cmpserv("parent", 1, 1)) {
+ ret = vof_parent(fdt, args[0]);
+ } else if (cmpserv("open", 1, 1)) {
+ ret = vof_open(fdt, vof, args[0]);
+ } else if (cmpserv("close", 1, 0)) {
+ vof_close(vof, args[0]);
+ } else if (cmpserv("instance-to-package", 1, 1)) {
+ ret = vof_instance_to_package(vof, args[0]);
+ } else if (cmpserv("package-to-path", 3, 1)) {
+ ret = vof_package_to_path(fdt, args[0], args[1], args[2]);
+ } else if (cmpserv("instance-to-path", 3, 1)) {
+ ret = vof_instance_to_path(fdt, vof, args[0], args[1], args[2]);
+ } else if (cmpserv("write", 3, 1)) {
+ ret = vof_write(vof, args[0], args[1], args[2]);
+ } else if (cmpserv("claim", 3, 1)) {
+ ret = vof_claim(vof, args[0], args[1], args[2]);
+ if (ret != -1) {
+ vof_dt_memory_available(fdt, vof->claimed, vof->claimed_base);
+ }
+ } else if (cmpserv("release", 2, 0)) {
+ ret = vof_release(vof, args[0], args[1]);
+ if (ret != -1) {
+ vof_dt_memory_available(fdt, vof->claimed, vof->claimed_base);
+ }
+ } else if (cmpserv("call-method", 0, 0)) {
+ ret = vof_call_method(ms, vof, args[0], args[1], args[2], args[3],
+ args[4], args[5], rets);
+ } else if (cmpserv("interpret", 0, 0)) {
+ ret = vof_call_interpret(args[0], args[1], args[2], rets);
+ } else if (cmpserv("milliseconds", 0, 1)) {
+ ret = qemu_clock_get_ms(QEMU_CLOCK_VIRTUAL);
+ } else if (cmpserv("quiesce", 0, 0)) {
+ vof_quiesce(ms, fdt, vof);
+ } else if (cmpserv("exit", 0, 0)) {
+ error_report("Stopped as the VM requested \"exit\"");
+ vm_stop(RUN_STATE_PAUSED);
+ } else {
+ trace_vof_error_unknown_service(service, nargs, nrets);
+ ret = -1;
+ }
+
+#undef cmpserv
+
+ return ret;
+}
+
+/* Defined as Big Endian */
+struct prom_args {
+ uint32_t service;
+ uint32_t nargs;
+ uint32_t nret;
+ uint32_t args[10];
+} QEMU_PACKED;
+
+int vof_client_call(MachineState *ms, Vof *vof, void *fdt,
+ target_ulong args_real)
+{
+ struct prom_args args_be;
+ uint32_t args[ARRAY_SIZE(args_be.args)];
+ uint32_t rets[ARRAY_SIZE(args_be.args)] = { 0 }, ret;
+ char service[64];
+ unsigned nargs, nret, i;
+
+ if (VOF_MEM_READ(args_real, &args_be, sizeof(args_be)) != MEMTX_OK) {
+ return -EINVAL;
+ }
+ nargs = be32_to_cpu(args_be.nargs);
+ if (nargs >= ARRAY_SIZE(args_be.args)) {
+ return -EINVAL;
+ }
+
+ if (VOF_MEM_READ(be32_to_cpu(args_be.service), service, sizeof(service)) !=
+ MEMTX_OK) {
+ return -EINVAL;
+ }
+ if (strnlen(service, sizeof(service)) == sizeof(service)) {
+ /* Too long service name */
+ return -EINVAL;
+ }
+
+ for (i = 0; i < nargs; ++i) {
+ args[i] = be32_to_cpu(args_be.args[i]);
+ }
+
+ nret = be32_to_cpu(args_be.nret);
+ ret = vof_client_handle(ms, fdt, vof, service, args, nargs, rets, nret);
+ if (!nret) {
+ return 0;
+ }
+
+ args_be.args[nargs] = cpu_to_be32(ret);
+ for (i = 1; i < nret; ++i) {
+ args_be.args[nargs + i] = cpu_to_be32(rets[i - 1]);
+ }
+
+ if (VOF_MEM_WRITE(args_real + offsetof(struct prom_args, args[nargs]),
+ args_be.args + nargs, sizeof(args_be.args[0]) * nret) !=
+ MEMTX_OK) {
+ return -EINVAL;
+ }
+
+ return 0;
+}
+
+static void vof_instance_free(gpointer data)
+{
+ OfInstance *inst = (OfInstance *)data;
+
+ g_free(inst->path);
+ g_free(inst);
+}
+
+void vof_init(Vof *vof, uint64_t top_addr, Error **errp)
+{
+ vof_cleanup(vof);
+
+ vof->of_instances = g_hash_table_new_full(g_direct_hash, g_direct_equal,
+ NULL, vof_instance_free);
+ vof->claimed = g_array_new(false, false, sizeof(OfClaimed));
+
+ /* Keep allocations in 32bit as CLI ABI can only return cells==32bit */
+ vof->top_addr = MIN(top_addr, 4 * GiB);
+ if (vof_claim(vof, 0, vof->fw_size, 0) == -1) {
+ error_setg(errp, "Memory for firmware is in use");
+ }
+}
+
+void vof_cleanup(Vof *vof)
+{
+ if (vof->claimed) {
+ g_array_unref(vof->claimed);
+ }
+ if (vof->of_instances) {
+ g_hash_table_unref(vof->of_instances);
+ }
+ vof->claimed = NULL;
+ vof->of_instances = NULL;
+}
+
+void vof_build_dt(void *fdt, Vof *vof)
+{
+ uint32_t phandle = fdt_get_max_phandle(fdt);
+ int offset, proplen = 0;
+ const void *prop;
+
+ /* Assign phandles to nodes without predefined phandles (like XICS/XIVE) */
+ for (offset = fdt_next_node(fdt, -1, NULL);
+ offset >= 0;
+ offset = fdt_next_node(fdt, offset, NULL)) {
+ prop = fdt_getprop(fdt, offset, "phandle", &proplen);
+ if (prop) {
+ continue;
+ }
+ ++phandle;
+ _FDT(fdt_setprop_cell(fdt, offset, "phandle", phandle));
+ }
+
+ vof_dt_memory_available(fdt, vof->claimed, vof->claimed_base);
+}
+
+static const TypeInfo vof_machine_if_info = {
+ .name = TYPE_VOF_MACHINE_IF,
+ .parent = TYPE_INTERFACE,
+ .class_size = sizeof(VofMachineIfClass),
+};
+
+static void vof_machine_if_register_types(void)
+{
+ type_register_static(&vof_machine_if_info);
+}
+type_init(vof_machine_if_register_types)
diff --git a/hw/vfio/common.c b/hw/vfio/common.c
index ae5654fcdb..3f0d111360 100644
--- a/hw/vfio/common.c
+++ b/hw/vfio/common.c
@@ -36,6 +36,7 @@
#include "qemu/range.h"
#include "sysemu/kvm.h"
#include "sysemu/reset.h"
+#include "sysemu/runstate.h"
#include "trace.h"
#include "qapi/error.h"
#include "migration/migration.h"
@@ -134,6 +135,29 @@ static const char *index_to_str(VFIODevice *vbasedev, int index)
}
}
+static int vfio_ram_block_discard_disable(VFIOContainer *container, bool state)
+{
+ switch (container->iommu_type) {
+ case VFIO_TYPE1v2_IOMMU:
+ case VFIO_TYPE1_IOMMU:
+ /*
+ * We support coordinated discarding of RAM via the RamDiscardManager.
+ */
+ return ram_block_uncoordinated_discard_disable(state);
+ default:
+ /*
+ * VFIO_SPAPR_TCE_IOMMU most probably works just fine with
+ * RamDiscardManager, however, it is completely untested.
+ *
+ * VFIO_SPAPR_TCE_v2_IOMMU with "DMA memory preregistering" does
+ * completely the opposite of managing mapping/pinning dynamically as
+ * required by RamDiscardManager. We would have to special-case sections
+ * with a RamDiscardManager.
+ */
+ return ram_block_discard_disable(state);
+ }
+}
+
int vfio_set_irq_signaling(VFIODevice *vbasedev, int index, int subindex,
int action, int fd, Error **errp)
{
@@ -569,6 +593,44 @@ static bool vfio_get_xlat_addr(IOMMUTLBEntry *iotlb, void **vaddr,
error_report("iommu map to non memory area %"HWADDR_PRIx"",
xlat);
return false;
+ } else if (memory_region_has_ram_discard_manager(mr)) {
+ RamDiscardManager *rdm = memory_region_get_ram_discard_manager(mr);
+ MemoryRegionSection tmp = {
+ .mr = mr,
+ .offset_within_region = xlat,
+ .size = int128_make64(len),
+ };
+
+ /*
+ * Malicious VMs can map memory into the IOMMU, which is expected
+ * to remain discarded. vfio will pin all pages, populating memory.
+ * Disallow that. vmstate priorities make sure any RamDiscardManager
+ * were already restored before IOMMUs are restored.
+ */
+ if (!ram_discard_manager_is_populated(rdm, &tmp)) {
+ error_report("iommu map to discarded memory (e.g., unplugged via"
+ " virtio-mem): %"HWADDR_PRIx"",
+ iotlb->translated_addr);
+ return false;
+ }
+
+ /*
+ * Malicious VMs might trigger discarding of IOMMU-mapped memory. The
+ * pages will remain pinned inside vfio until unmapped, resulting in a
+ * higher memory consumption than expected. If memory would get
+ * populated again later, there would be an inconsistency between pages
+ * pinned by vfio and pages seen by QEMU. This is the case until
+ * unmapped from the IOMMU (e.g., during device reset).
+ *
+ * With malicious guests, we really only care about pinning more memory
+ * than expected. RLIMIT_MEMLOCK set for the user/process can never be
+ * exceeded and can be used to mitigate this problem.
+ */
+ warn_report_once("Using vfio with vIOMMUs and coordinated discarding of"
+ " RAM (e.g., virtio-mem) works, however, malicious"
+ " guests can trigger pinning of more memory than"
+ " intended via an IOMMU. It's possible to mitigate "
+ " by setting/adjusting RLIMIT_MEMLOCK.");
}
/*
@@ -649,6 +711,153 @@ out:
rcu_read_unlock();
}
+static void vfio_ram_discard_notify_discard(RamDiscardListener *rdl,
+ MemoryRegionSection *section)
+{
+ VFIORamDiscardListener *vrdl = container_of(rdl, VFIORamDiscardListener,
+ listener);
+ const hwaddr size = int128_get64(section->size);
+ const hwaddr iova = section->offset_within_address_space;
+ int ret;
+
+ /* Unmap with a single call. */
+ ret = vfio_dma_unmap(vrdl->container, iova, size , NULL);
+ if (ret) {
+ error_report("%s: vfio_dma_unmap() failed: %s", __func__,
+ strerror(-ret));
+ }
+}
+
+static int vfio_ram_discard_notify_populate(RamDiscardListener *rdl,
+ MemoryRegionSection *section)
+{
+ VFIORamDiscardListener *vrdl = container_of(rdl, VFIORamDiscardListener,
+ listener);
+ const hwaddr end = section->offset_within_region +
+ int128_get64(section->size);
+ hwaddr start, next, iova;
+ void *vaddr;
+ int ret;
+
+ /*
+ * Map in (aligned within memory region) minimum granularity, so we can
+ * unmap in minimum granularity later.
+ */
+ for (start = section->offset_within_region; start < end; start = next) {
+ next = ROUND_UP(start + 1, vrdl->granularity);
+ next = MIN(next, end);
+
+ iova = start - section->offset_within_region +
+ section->offset_within_address_space;
+ vaddr = memory_region_get_ram_ptr(section->mr) + start;
+
+ ret = vfio_dma_map(vrdl->container, iova, next - start,
+ vaddr, section->readonly);
+ if (ret) {
+ /* Rollback */
+ vfio_ram_discard_notify_discard(rdl, section);
+ return ret;
+ }
+ }
+ return 0;
+}
+
+static void vfio_register_ram_discard_listener(VFIOContainer *container,
+ MemoryRegionSection *section)
+{
+ RamDiscardManager *rdm = memory_region_get_ram_discard_manager(section->mr);
+ VFIORamDiscardListener *vrdl;
+
+ /* Ignore some corner cases not relevant in practice. */
+ g_assert(QEMU_IS_ALIGNED(section->offset_within_region, TARGET_PAGE_SIZE));
+ g_assert(QEMU_IS_ALIGNED(section->offset_within_address_space,
+ TARGET_PAGE_SIZE));
+ g_assert(QEMU_IS_ALIGNED(int128_get64(section->size), TARGET_PAGE_SIZE));
+
+ vrdl = g_new0(VFIORamDiscardListener, 1);
+ vrdl->container = container;
+ vrdl->mr = section->mr;
+ vrdl->offset_within_address_space = section->offset_within_address_space;
+ vrdl->size = int128_get64(section->size);
+ vrdl->granularity = ram_discard_manager_get_min_granularity(rdm,
+ section->mr);
+
+ g_assert(vrdl->granularity && is_power_of_2(vrdl->granularity));
+ g_assert(vrdl->granularity >= 1 << ctz64(container->pgsizes));
+
+ ram_discard_listener_init(&vrdl->listener,
+ vfio_ram_discard_notify_populate,
+ vfio_ram_discard_notify_discard, true);
+ ram_discard_manager_register_listener(rdm, &vrdl->listener, section);
+ QLIST_INSERT_HEAD(&container->vrdl_list, vrdl, next);
+
+ /*
+ * Sanity-check if we have a theoretically problematic setup where we could
+ * exceed the maximum number of possible DMA mappings over time. We assume
+ * that each mapped section in the same address space as a RamDiscardManager
+ * section consumes exactly one DMA mapping, with the exception of
+ * RamDiscardManager sections; i.e., we don't expect to have gIOMMU sections
+ * in the same address space as RamDiscardManager sections.
+ *
+ * We assume that each section in the address space consumes one memslot.
+ * We take the number of KVM memory slots as a best guess for the maximum
+ * number of sections in the address space we could have over time,
+ * also consuming DMA mappings.
+ */
+ if (container->dma_max_mappings) {
+ unsigned int vrdl_count = 0, vrdl_mappings = 0, max_memslots = 512;
+
+#ifdef CONFIG_KVM
+ if (kvm_enabled()) {
+ max_memslots = kvm_get_max_memslots();
+ }
+#endif
+
+ QLIST_FOREACH(vrdl, &container->vrdl_list, next) {
+ hwaddr start, end;
+
+ start = QEMU_ALIGN_DOWN(vrdl->offset_within_address_space,
+ vrdl->granularity);
+ end = ROUND_UP(vrdl->offset_within_address_space + vrdl->size,
+ vrdl->granularity);
+ vrdl_mappings += (end - start) / vrdl->granularity;
+ vrdl_count++;
+ }
+
+ if (vrdl_mappings + max_memslots - vrdl_count >
+ container->dma_max_mappings) {
+ warn_report("%s: possibly running out of DMA mappings. E.g., try"
+ " increasing the 'block-size' of virtio-mem devies."
+ " Maximum possible DMA mappings: %d, Maximum possible"
+ " memslots: %d", __func__, container->dma_max_mappings,
+ max_memslots);
+ }
+ }
+}
+
+static void vfio_unregister_ram_discard_listener(VFIOContainer *container,
+ MemoryRegionSection *section)
+{
+ RamDiscardManager *rdm = memory_region_get_ram_discard_manager(section->mr);
+ VFIORamDiscardListener *vrdl = NULL;
+
+ QLIST_FOREACH(vrdl, &container->vrdl_list, next) {
+ if (vrdl->mr == section->mr &&
+ vrdl->offset_within_address_space ==
+ section->offset_within_address_space) {
+ break;
+ }
+ }
+
+ if (!vrdl) {
+ hw_error("vfio: Trying to unregister missing RAM discard listener");
+ }
+
+ ram_discard_manager_unregister_listener(rdm, &vrdl->listener);
+ QLIST_REMOVE(vrdl, next);
+ g_free(vrdl);
+}
+
static void vfio_listener_region_add(MemoryListener *listener,
MemoryRegionSection *section)
{
@@ -810,6 +1019,16 @@ static void vfio_listener_region_add(MemoryListener *listener,
/* Here we assume that memory_region_is_ram(section->mr)==true */
+ /*
+ * For RAM memory regions with a RamDiscardManager, we only want to map the
+ * actually populated parts - and update the mapping whenever we're notified
+ * about changes.
+ */
+ if (memory_region_has_ram_discard_manager(section->mr)) {
+ vfio_register_ram_discard_listener(container, section);
+ return;
+ }
+
vaddr = memory_region_get_ram_ptr(section->mr) +
section->offset_within_region +
(iova - section->offset_within_address_space);
@@ -947,6 +1166,10 @@ static void vfio_listener_region_del(MemoryListener *listener,
pgmask = (1ULL << ctz64(hostwin->iova_pgsizes)) - 1;
try_unmap = !((iova & pgmask) || (int128_get64(llsize) & pgmask));
+ } else if (memory_region_has_ram_discard_manager(section->mr)) {
+ vfio_unregister_ram_discard_listener(container, section);
+ /* Unregistering will trigger an unmap. */
+ try_unmap = false;
}
if (try_unmap) {
@@ -1108,6 +1331,49 @@ static void vfio_iommu_map_dirty_notify(IOMMUNotifier *n, IOMMUTLBEntry *iotlb)
rcu_read_unlock();
}
+static int vfio_ram_discard_get_dirty_bitmap(MemoryRegionSection *section,
+ void *opaque)
+{
+ const hwaddr size = int128_get64(section->size);
+ const hwaddr iova = section->offset_within_address_space;
+ const ram_addr_t ram_addr = memory_region_get_ram_addr(section->mr) +
+ section->offset_within_region;
+ VFIORamDiscardListener *vrdl = opaque;
+
+ /*
+ * Sync the whole mapped region (spanning multiple individual mappings)
+ * in one go.
+ */
+ return vfio_get_dirty_bitmap(vrdl->container, iova, size, ram_addr);
+}
+
+static int vfio_sync_ram_discard_listener_dirty_bitmap(VFIOContainer *container,
+ MemoryRegionSection *section)
+{
+ RamDiscardManager *rdm = memory_region_get_ram_discard_manager(section->mr);
+ VFIORamDiscardListener *vrdl = NULL;
+
+ QLIST_FOREACH(vrdl, &container->vrdl_list, next) {
+ if (vrdl->mr == section->mr &&
+ vrdl->offset_within_address_space ==
+ section->offset_within_address_space) {
+ break;
+ }
+ }
+
+ if (!vrdl) {
+ hw_error("vfio: Trying to sync missing RAM discard listener");
+ }
+
+ /*
+ * We only want/can synchronize the bitmap for actually mapped parts -
+ * which correspond to populated parts. Replay all populated parts.
+ */
+ return ram_discard_manager_replay_populated(rdm, section,
+ vfio_ram_discard_get_dirty_bitmap,
+ &vrdl);
+}
+
static int vfio_sync_dirty_bitmap(VFIOContainer *container,
MemoryRegionSection *section)
{
@@ -1139,6 +1405,8 @@ static int vfio_sync_dirty_bitmap(VFIOContainer *container,
}
}
return 0;
+ } else if (memory_region_has_ram_discard_manager(section->mr)) {
+ return vfio_sync_ram_discard_listener_dirty_bitmap(container, section);
}
ram_addr = memory_region_get_ram_addr(section->mr) +
@@ -1732,15 +2000,25 @@ static int vfio_connect_container(VFIOGroup *group, AddressSpace *as,
* new memory, it will not yet set ram_block_discard_set_required() and
* therefore, neither stops us here or deals with the sudden memory
* consumption of inflated memory.
+ *
+ * We do support discarding of memory coordinated via the RamDiscardManager
+ * with some IOMMU types. vfio_ram_block_discard_disable() handles the
+ * details once we know which type of IOMMU we are using.
*/
- ret = ram_block_discard_disable(true);
- if (ret) {
- error_setg_errno(errp, -ret, "Cannot set discarding of RAM broken");
- return ret;
- }
QLIST_FOREACH(container, &space->containers, next) {
if (!ioctl(group->fd, VFIO_GROUP_SET_CONTAINER, &container->fd)) {
+ ret = vfio_ram_block_discard_disable(container, true);
+ if (ret) {
+ error_setg_errno(errp, -ret,
+ "Cannot set discarding of RAM broken");
+ if (ioctl(group->fd, VFIO_GROUP_UNSET_CONTAINER,
+ &container->fd)) {
+ error_report("vfio: error disconnecting group %d from"
+ " container", group->groupid);
+ }
+ return ret;
+ }
group->container = container;
QLIST_INSERT_HEAD(&container->group_list, group, container_next);
vfio_kvm_device_add_group(group);
@@ -1768,14 +2046,22 @@ static int vfio_connect_container(VFIOGroup *group, AddressSpace *as,
container->fd = fd;
container->error = NULL;
container->dirty_pages_supported = false;
+ container->dma_max_mappings = 0;
QLIST_INIT(&container->giommu_list);
QLIST_INIT(&container->hostwin_list);
+ QLIST_INIT(&container->vrdl_list);
ret = vfio_init_container(container, group->fd, errp);
if (ret) {
goto free_container_exit;
}
+ ret = vfio_ram_block_discard_disable(container, true);
+ if (ret) {
+ error_setg_errno(errp, -ret, "Cannot set discarding of RAM broken");
+ goto free_container_exit;
+ }
+
switch (container->iommu_type) {
case VFIO_TYPE1v2_IOMMU:
case VFIO_TYPE1_IOMMU:
@@ -1798,7 +2084,10 @@ static int vfio_connect_container(VFIOGroup *group, AddressSpace *as,
vfio_host_win_add(container, 0, (hwaddr)-1, info->iova_pgsizes);
container->pgsizes = info->iova_pgsizes;
+ /* The default in the kernel ("dma_entry_limit") is 65535. */
+ container->dma_max_mappings = 65535;
if (!ret) {
+ vfio_get_info_dma_avail(info, &container->dma_max_mappings);
vfio_get_iommu_info_migration(container, info);
}
g_free(info);
@@ -1820,7 +2109,7 @@ static int vfio_connect_container(VFIOGroup *group, AddressSpace *as,
if (ret) {
error_setg_errno(errp, errno, "failed to enable container");
ret = -errno;
- goto free_container_exit;
+ goto enable_discards_exit;
}
} else {
container->prereg_listener = vfio_prereg_listener;
@@ -1832,7 +2121,7 @@ static int vfio_connect_container(VFIOGroup *group, AddressSpace *as,
ret = -1;
error_propagate_prepend(errp, container->error,
"RAM memory listener initialization failed: ");
- goto free_container_exit;
+ goto enable_discards_exit;
}
}
@@ -1845,7 +2134,7 @@ static int vfio_connect_container(VFIOGroup *group, AddressSpace *as,
if (v2) {
memory_listener_unregister(&container->prereg_listener);
}
- goto free_container_exit;
+ goto enable_discards_exit;
}
if (v2) {
@@ -1860,7 +2149,7 @@ static int vfio_connect_container(VFIOGroup *group, AddressSpace *as,
if (ret) {
error_setg_errno(errp, -ret,
"failed to remove existing window");
- goto free_container_exit;
+ goto enable_discards_exit;
}
} else {
/* The default table uses 4K pages */
@@ -1901,6 +2190,9 @@ listener_release_exit:
vfio_kvm_device_del_group(group);
vfio_listener_release(container);
+enable_discards_exit:
+ vfio_ram_block_discard_disable(container, false);
+
free_container_exit:
g_free(container);
@@ -1908,7 +2200,6 @@ close_fd_exit:
close(fd);
put_space_exit:
- ram_block_discard_disable(false);
vfio_put_address_space(space);
return ret;
@@ -2030,7 +2321,7 @@ void vfio_put_group(VFIOGroup *group)
}
if (!group->ram_block_discard_allowed) {
- ram_block_discard_disable(false);
+ vfio_ram_block_discard_disable(group->container, false);
}
vfio_kvm_device_del_group(group);
vfio_disconnect_container(group);
@@ -2084,7 +2375,7 @@ int vfio_get_device(VFIOGroup *group, const char *name,
if (!group->ram_block_discard_allowed) {
group->ram_block_discard_allowed = true;
- ram_block_discard_disable(false);
+ vfio_ram_block_discard_disable(group->container, false);
}
}
diff --git a/hw/virtio/vhost-user.c b/hw/virtio/vhost-user.c
index 1ac4a2ebec..29ea2b4fce 100644
--- a/hw/virtio/vhost-user.c
+++ b/hw/virtio/vhost-user.c
@@ -1913,7 +1913,10 @@ static int vhost_user_backend_init(struct vhost_dev *dev, void *opaque,
if (err < 0) {
return -EPROTO;
}
+ } else {
+ dev->max_queues = 1;
}
+
if (dev->num_queues && dev->max_queues < dev->num_queues) {
error_setg(errp, "The maximum number of queues supported by the "
"backend is %" PRIu64, dev->max_queues);
diff --git a/hw/virtio/virtio-mem.c b/hw/virtio/virtio-mem.c
index 75aa7d6f1b..df91e454b2 100644
--- a/hw/virtio/virtio-mem.c
+++ b/hw/virtio/virtio-mem.c
@@ -145,7 +145,173 @@ static bool virtio_mem_is_busy(void)
return migration_in_incoming_postcopy() || !migration_is_idle();
}
-static bool virtio_mem_test_bitmap(VirtIOMEM *vmem, uint64_t start_gpa,
+typedef int (*virtio_mem_range_cb)(const VirtIOMEM *vmem, void *arg,
+ uint64_t offset, uint64_t size);
+
+static int virtio_mem_for_each_unplugged_range(const VirtIOMEM *vmem, void *arg,
+ virtio_mem_range_cb cb)
+{
+ unsigned long first_zero_bit, last_zero_bit;
+ uint64_t offset, size;
+ int ret = 0;
+
+ first_zero_bit = find_first_zero_bit(vmem->bitmap, vmem->bitmap_size);
+ while (first_zero_bit < vmem->bitmap_size) {
+ offset = first_zero_bit * vmem->block_size;
+ last_zero_bit = find_next_bit(vmem->bitmap, vmem->bitmap_size,
+ first_zero_bit + 1) - 1;
+ size = (last_zero_bit - first_zero_bit + 1) * vmem->block_size;
+
+ ret = cb(vmem, arg, offset, size);
+ if (ret) {
+ break;
+ }
+ first_zero_bit = find_next_zero_bit(vmem->bitmap, vmem->bitmap_size,
+ last_zero_bit + 2);
+ }
+ return ret;
+}
+
+/*
+ * Adjust the memory section to cover the intersection with the given range.
+ *
+ * Returns false if the intersection is empty, otherwise returns true.
+ */
+static bool virito_mem_intersect_memory_section(MemoryRegionSection *s,
+ uint64_t offset, uint64_t size)
+{
+ uint64_t start = MAX(s->offset_within_region, offset);
+ uint64_t end = MIN(s->offset_within_region + int128_get64(s->size),
+ offset + size);
+
+ if (end <= start) {
+ return false;
+ }
+
+ s->offset_within_address_space += start - s->offset_within_region;
+ s->offset_within_region = start;
+ s->size = int128_make64(end - start);
+ return true;
+}
+
+typedef int (*virtio_mem_section_cb)(MemoryRegionSection *s, void *arg);
+
+static int virtio_mem_for_each_plugged_section(const VirtIOMEM *vmem,
+ MemoryRegionSection *s,
+ void *arg,
+ virtio_mem_section_cb cb)
+{
+ unsigned long first_bit, last_bit;
+ uint64_t offset, size;
+ int ret = 0;
+
+ first_bit = s->offset_within_region / vmem->bitmap_size;
+ first_bit = find_next_bit(vmem->bitmap, vmem->bitmap_size, first_bit);
+ while (first_bit < vmem->bitmap_size) {
+ MemoryRegionSection tmp = *s;
+
+ offset = first_bit * vmem->block_size;
+ last_bit = find_next_zero_bit(vmem->bitmap, vmem->bitmap_size,
+ first_bit + 1) - 1;
+ size = (last_bit - first_bit + 1) * vmem->block_size;
+
+ if (!virito_mem_intersect_memory_section(&tmp, offset, size)) {
+ break;
+ }
+ ret = cb(&tmp, arg);
+ if (ret) {
+ break;
+ }
+ first_bit = find_next_bit(vmem->bitmap, vmem->bitmap_size,
+ last_bit + 2);
+ }
+ return ret;
+}
+
+static int virtio_mem_notify_populate_cb(MemoryRegionSection *s, void *arg)
+{
+ RamDiscardListener *rdl = arg;
+
+ return rdl->notify_populate(rdl, s);
+}
+
+static int virtio_mem_notify_discard_cb(MemoryRegionSection *s, void *arg)
+{
+ RamDiscardListener *rdl = arg;
+
+ rdl->notify_discard(rdl, s);
+ return 0;
+}
+
+static void virtio_mem_notify_unplug(VirtIOMEM *vmem, uint64_t offset,
+ uint64_t size)
+{
+ RamDiscardListener *rdl;
+
+ QLIST_FOREACH(rdl, &vmem->rdl_list, next) {
+ MemoryRegionSection tmp = *rdl->section;
+
+ if (!virito_mem_intersect_memory_section(&tmp, offset, size)) {
+ continue;
+ }
+ rdl->notify_discard(rdl, &tmp);
+ }
+}
+
+static int virtio_mem_notify_plug(VirtIOMEM *vmem, uint64_t offset,
+ uint64_t size)
+{
+ RamDiscardListener *rdl, *rdl2;
+ int ret = 0;
+
+ QLIST_FOREACH(rdl, &vmem->rdl_list, next) {
+ MemoryRegionSection tmp = *rdl->section;
+
+ if (!virito_mem_intersect_memory_section(&tmp, offset, size)) {
+ continue;
+ }
+ ret = rdl->notify_populate(rdl, &tmp);
+ if (ret) {
+ break;
+ }
+ }
+
+ if (ret) {
+ /* Notify all already-notified listeners. */
+ QLIST_FOREACH(rdl2, &vmem->rdl_list, next) {
+ MemoryRegionSection tmp = *rdl->section;
+
+ if (rdl2 == rdl) {
+ break;
+ }
+ if (!virito_mem_intersect_memory_section(&tmp, offset, size)) {
+ continue;
+ }
+ rdl2->notify_discard(rdl2, &tmp);
+ }
+ }
+ return ret;
+}
+
+static void virtio_mem_notify_unplug_all(VirtIOMEM *vmem)
+{
+ RamDiscardListener *rdl;
+
+ if (!vmem->size) {
+ return;
+ }
+
+ QLIST_FOREACH(rdl, &vmem->rdl_list, next) {
+ if (rdl->double_discard_supported) {
+ rdl->notify_discard(rdl, rdl->section);
+ } else {
+ virtio_mem_for_each_plugged_section(vmem, rdl->section, rdl,
+ virtio_mem_notify_discard_cb);
+ }
+ }
+}
+
+static bool virtio_mem_test_bitmap(const VirtIOMEM *vmem, uint64_t start_gpa,
uint64_t size, bool plugged)
{
const unsigned long first_bit = (start_gpa - vmem->addr) / vmem->block_size;
@@ -198,7 +364,8 @@ static void virtio_mem_send_response_simple(VirtIOMEM *vmem,
virtio_mem_send_response(vmem, elem, &resp);
}
-static bool virtio_mem_valid_range(VirtIOMEM *vmem, uint64_t gpa, uint64_t size)
+static bool virtio_mem_valid_range(const VirtIOMEM *vmem, uint64_t gpa,
+ uint64_t size)
{
if (!QEMU_IS_ALIGNED(gpa, vmem->block_size)) {
return false;
@@ -219,19 +386,21 @@ static int virtio_mem_set_block_state(VirtIOMEM *vmem, uint64_t start_gpa,
uint64_t size, bool plug)
{
const uint64_t offset = start_gpa - vmem->addr;
- int ret;
+ RAMBlock *rb = vmem->memdev->mr.ram_block;
if (virtio_mem_is_busy()) {
return -EBUSY;
}
if (!plug) {
- ret = ram_block_discard_range(vmem->memdev->mr.ram_block, offset, size);
- if (ret) {
- error_report("Unexpected error discarding RAM: %s",
- strerror(-ret));
+ if (ram_block_discard_range(rb, offset, size)) {
return -EBUSY;
}
+ virtio_mem_notify_unplug(vmem, offset, size);
+ } else if (virtio_mem_notify_plug(vmem, offset, size)) {
+ /* Could be a mapping attempt resulted in memory getting populated. */
+ ram_block_discard_range(vmem->memdev->mr.ram_block, offset, size);
+ return -EBUSY;
}
virtio_mem_set_bitmap(vmem, start_gpa, size, plug);
return 0;
@@ -318,17 +487,16 @@ static void virtio_mem_resize_usable_region(VirtIOMEM *vmem,
static int virtio_mem_unplug_all(VirtIOMEM *vmem)
{
RAMBlock *rb = vmem->memdev->mr.ram_block;
- int ret;
if (virtio_mem_is_busy()) {
return -EBUSY;
}
- ret = ram_block_discard_range(rb, 0, qemu_ram_get_used_length(rb));
- if (ret) {
- error_report("Unexpected error discarding RAM: %s", strerror(-ret));
+ if (ram_block_discard_range(rb, 0, qemu_ram_get_used_length(rb))) {
return -EBUSY;
}
+ virtio_mem_notify_unplug_all(vmem);
+
bitmap_clear(vmem->bitmap, 0, vmem->bitmap_size);
if (vmem->size) {
vmem->size = 0;
@@ -551,7 +719,7 @@ static void virtio_mem_device_realize(DeviceState *dev, Error **errp)
return;
}
- if (ram_block_discard_require(true)) {
+ if (ram_block_coordinated_discard_require(true)) {
error_setg(errp, "Discarding RAM is disabled");
return;
}
@@ -559,7 +727,7 @@ static void virtio_mem_device_realize(DeviceState *dev, Error **errp)
ret = ram_block_discard_range(rb, 0, qemu_ram_get_used_length(rb));
if (ret) {
error_setg_errno(errp, -ret, "Unexpected error discarding RAM");
- ram_block_discard_require(false);
+ ram_block_coordinated_discard_require(false);
return;
}
@@ -577,6 +745,13 @@ static void virtio_mem_device_realize(DeviceState *dev, Error **errp)
vmstate_register_ram(&vmem->memdev->mr, DEVICE(vmem));
qemu_register_reset(virtio_mem_system_reset, vmem);
precopy_add_notifier(&vmem->precopy_notifier);
+
+ /*
+ * Set ourselves as RamDiscardManager before the plug handler maps the
+ * memory region and exposes it via an address space.
+ */
+ memory_region_set_ram_discard_manager(&vmem->memdev->mr,
+ RAM_DISCARD_MANAGER(vmem));
}
static void virtio_mem_device_unrealize(DeviceState *dev)
@@ -584,6 +759,11 @@ static void virtio_mem_device_unrealize(DeviceState *dev)
VirtIODevice *vdev = VIRTIO_DEVICE(dev);
VirtIOMEM *vmem = VIRTIO_MEM(dev);
+ /*
+ * The unplug handler unmapped the memory region, it cannot be
+ * found via an address space anymore. Unset ourselves.
+ */
+ memory_region_set_ram_discard_manager(&vmem->memdev->mr, NULL);
precopy_remove_notifier(&vmem->precopy_notifier);
qemu_unregister_reset(virtio_mem_system_reset, vmem);
vmstate_unregister_ram(&vmem->memdev->mr, DEVICE(vmem));
@@ -591,43 +771,47 @@ static void virtio_mem_device_unrealize(DeviceState *dev)
virtio_del_queue(vdev, 0);
virtio_cleanup(vdev);
g_free(vmem->bitmap);
- ram_block_discard_require(false);
+ ram_block_coordinated_discard_require(false);
}
-static int virtio_mem_restore_unplugged(VirtIOMEM *vmem)
+static int virtio_mem_discard_range_cb(const VirtIOMEM *vmem, void *arg,
+ uint64_t offset, uint64_t size)
{
RAMBlock *rb = vmem->memdev->mr.ram_block;
- unsigned long first_zero_bit, last_zero_bit;
- uint64_t offset, length;
- int ret;
- /* Find consecutive unplugged blocks and discard the consecutive range. */
- first_zero_bit = find_first_zero_bit(vmem->bitmap, vmem->bitmap_size);
- while (first_zero_bit < vmem->bitmap_size) {
- offset = first_zero_bit * vmem->block_size;
- last_zero_bit = find_next_bit(vmem->bitmap, vmem->bitmap_size,
- first_zero_bit + 1) - 1;
- length = (last_zero_bit - first_zero_bit + 1) * vmem->block_size;
+ return ram_block_discard_range(rb, offset, size) ? -EINVAL : 0;
+}
- ret = ram_block_discard_range(rb, offset, length);
- if (ret) {
- error_report("Unexpected error discarding RAM: %s",
- strerror(-ret));
- return -EINVAL;
- }
- first_zero_bit = find_next_zero_bit(vmem->bitmap, vmem->bitmap_size,
- last_zero_bit + 2);
- }
- return 0;
+static int virtio_mem_restore_unplugged(VirtIOMEM *vmem)
+{
+ /* Make sure all memory is really discarded after migration. */
+ return virtio_mem_for_each_unplugged_range(vmem, NULL,
+ virtio_mem_discard_range_cb);
}
static int virtio_mem_post_load(void *opaque, int version_id)
{
+ VirtIOMEM *vmem = VIRTIO_MEM(opaque);
+ RamDiscardListener *rdl;
+ int ret;
+
+ /*
+ * We started out with all memory discarded and our memory region is mapped
+ * into an address space. Replay, now that we updated the bitmap.
+ */
+ QLIST_FOREACH(rdl, &vmem->rdl_list, next) {
+ ret = virtio_mem_for_each_plugged_section(vmem, rdl->section, rdl,
+ virtio_mem_notify_populate_cb);
+ if (ret) {
+ return ret;
+ }
+ }
+
if (migration_in_incoming_postcopy()) {
return 0;
}
- return virtio_mem_restore_unplugged(VIRTIO_MEM(opaque));
+ return virtio_mem_restore_unplugged(vmem);
}
typedef struct VirtIOMEMMigSanityChecks {
@@ -702,6 +886,7 @@ static const VMStateDescription vmstate_virtio_mem_device = {
.name = "virtio-mem-device",
.minimum_version_id = 1,
.version_id = 1,
+ .priority = MIG_PRI_VIRTIO_MEM,
.post_load = virtio_mem_post_load,
.fields = (VMStateField[]) {
VMSTATE_WITH_TMP(VirtIOMEM, VirtIOMEMMigSanityChecks,
@@ -872,28 +1057,19 @@ static void virtio_mem_set_block_size(Object *obj, Visitor *v, const char *name,
vmem->block_size = value;
}
-static void virtio_mem_precopy_exclude_unplugged(VirtIOMEM *vmem)
+static int virtio_mem_precopy_exclude_range_cb(const VirtIOMEM *vmem, void *arg,
+ uint64_t offset, uint64_t size)
{
void * const host = qemu_ram_get_host_addr(vmem->memdev->mr.ram_block);
- unsigned long first_zero_bit, last_zero_bit;
- uint64_t offset, length;
- /*
- * Find consecutive unplugged blocks and exclude them from migration.
- *
- * Note: Blocks cannot get (un)plugged during precopy, no locking needed.
- */
- first_zero_bit = find_first_zero_bit(vmem->bitmap, vmem->bitmap_size);
- while (first_zero_bit < vmem->bitmap_size) {
- offset = first_zero_bit * vmem->block_size;
- last_zero_bit = find_next_bit(vmem->bitmap, vmem->bitmap_size,
- first_zero_bit + 1) - 1;
- length = (last_zero_bit - first_zero_bit + 1) * vmem->block_size;
+ qemu_guest_free_page_hint(host + offset, size);
+ return 0;
+}
- qemu_guest_free_page_hint(host + offset, length);
- first_zero_bit = find_next_zero_bit(vmem->bitmap, vmem->bitmap_size,
- last_zero_bit + 2);
- }
+static void virtio_mem_precopy_exclude_unplugged(VirtIOMEM *vmem)
+{
+ virtio_mem_for_each_unplugged_range(vmem, NULL,
+ virtio_mem_precopy_exclude_range_cb);
}
static int virtio_mem_precopy_notify(NotifierWithReturn *n, void *data)
@@ -918,6 +1094,7 @@ static void virtio_mem_instance_init(Object *obj)
notifier_list_init(&vmem->size_change_notifiers);
vmem->precopy_notifier.notify = virtio_mem_precopy_notify;
+ QLIST_INIT(&vmem->rdl_list);
object_property_add(obj, VIRTIO_MEM_SIZE_PROP, "size", virtio_mem_get_size,
NULL, NULL, NULL);
@@ -937,11 +1114,107 @@ static Property virtio_mem_properties[] = {
DEFINE_PROP_END_OF_LIST(),
};
+static uint64_t virtio_mem_rdm_get_min_granularity(const RamDiscardManager *rdm,
+ const MemoryRegion *mr)
+{
+ const VirtIOMEM *vmem = VIRTIO_MEM(rdm);
+
+ g_assert(mr == &vmem->memdev->mr);
+ return vmem->block_size;
+}
+
+static bool virtio_mem_rdm_is_populated(const RamDiscardManager *rdm,
+ const MemoryRegionSection *s)
+{
+ const VirtIOMEM *vmem = VIRTIO_MEM(rdm);
+ uint64_t start_gpa = vmem->addr + s->offset_within_region;
+ uint64_t end_gpa = start_gpa + int128_get64(s->size);
+
+ g_assert(s->mr == &vmem->memdev->mr);
+
+ start_gpa = QEMU_ALIGN_DOWN(start_gpa, vmem->block_size);
+ end_gpa = QEMU_ALIGN_UP(end_gpa, vmem->block_size);
+
+ if (!virtio_mem_valid_range(vmem, start_gpa, end_gpa - start_gpa)) {
+ return false;
+ }
+
+ return virtio_mem_test_bitmap(vmem, start_gpa, end_gpa - start_gpa, true);
+}
+
+struct VirtIOMEMReplayData {
+ void *fn;
+ void *opaque;
+};
+
+static int virtio_mem_rdm_replay_populated_cb(MemoryRegionSection *s, void *arg)
+{
+ struct VirtIOMEMReplayData *data = arg;
+
+ return ((ReplayRamPopulate)data->fn)(s, data->opaque);
+}
+
+static int virtio_mem_rdm_replay_populated(const RamDiscardManager *rdm,
+ MemoryRegionSection *s,
+ ReplayRamPopulate replay_fn,
+ void *opaque)
+{
+ const VirtIOMEM *vmem = VIRTIO_MEM(rdm);
+ struct VirtIOMEMReplayData data = {
+ .fn = replay_fn,
+ .opaque = opaque,
+ };
+
+ g_assert(s->mr == &vmem->memdev->mr);
+ return virtio_mem_for_each_plugged_section(vmem, s, &data,
+ virtio_mem_rdm_replay_populated_cb);
+}
+
+static void virtio_mem_rdm_register_listener(RamDiscardManager *rdm,
+ RamDiscardListener *rdl,
+ MemoryRegionSection *s)
+{
+ VirtIOMEM *vmem = VIRTIO_MEM(rdm);
+ int ret;
+
+ g_assert(s->mr == &vmem->memdev->mr);
+ rdl->section = memory_region_section_new_copy(s);
+
+ QLIST_INSERT_HEAD(&vmem->rdl_list, rdl, next);
+ ret = virtio_mem_for_each_plugged_section(vmem, rdl->section, rdl,
+ virtio_mem_notify_populate_cb);
+ if (ret) {
+ error_report("%s: Replaying plugged ranges failed: %s", __func__,
+ strerror(-ret));
+ }
+}
+
+static void virtio_mem_rdm_unregister_listener(RamDiscardManager *rdm,
+ RamDiscardListener *rdl)
+{
+ VirtIOMEM *vmem = VIRTIO_MEM(rdm);
+
+ g_assert(rdl->section->mr == &vmem->memdev->mr);
+ if (vmem->size) {
+ if (rdl->double_discard_supported) {
+ rdl->notify_discard(rdl, rdl->section);
+ } else {
+ virtio_mem_for_each_plugged_section(vmem, rdl->section, rdl,
+ virtio_mem_notify_discard_cb);
+ }
+ }
+
+ memory_region_section_free_copy(rdl->section);
+ rdl->section = NULL;
+ QLIST_REMOVE(rdl, next);
+}
+
static void virtio_mem_class_init(ObjectClass *klass, void *data)
{
DeviceClass *dc = DEVICE_CLASS(klass);
VirtioDeviceClass *vdc = VIRTIO_DEVICE_CLASS(klass);
VirtIOMEMClass *vmc = VIRTIO_MEM_CLASS(klass);
+ RamDiscardManagerClass *rdmc = RAM_DISCARD_MANAGER_CLASS(klass);
device_class_set_props(dc, virtio_mem_properties);
dc->vmsd = &vmstate_virtio_mem;
@@ -957,6 +1230,12 @@ static void virtio_mem_class_init(ObjectClass *klass, void *data)
vmc->get_memory_region = virtio_mem_get_memory_region;
vmc->add_size_change_notifier = virtio_mem_add_size_change_notifier;
vmc->remove_size_change_notifier = virtio_mem_remove_size_change_notifier;
+
+ rdmc->get_min_granularity = virtio_mem_rdm_get_min_granularity;
+ rdmc->is_populated = virtio_mem_rdm_is_populated;
+ rdmc->replay_populated = virtio_mem_rdm_replay_populated;
+ rdmc->register_listener = virtio_mem_rdm_register_listener;
+ rdmc->unregister_listener = virtio_mem_rdm_unregister_listener;
}
static const TypeInfo virtio_mem_info = {
@@ -966,6 +1245,10 @@ static const TypeInfo virtio_mem_info = {
.instance_init = virtio_mem_instance_init,
.class_init = virtio_mem_class_init,
.class_size = sizeof(VirtIOMEMClass),
+ .interfaces = (InterfaceInfo[]) {
+ { TYPE_RAM_DISCARD_MANAGER },
+ { }
+ },
};
static void virtio_register_types(void)