diff options
-rw-r--r-- | default-configs/i386-softmmu.mak | 1 | ||||
-rw-r--r-- | default-configs/x86_64-softmmu.mak | 1 | ||||
-rw-r--r-- | hw/acpi/Makefile.objs | 1 | ||||
-rw-r--r-- | hw/acpi/nvdimm.c | 382 | ||||
-rw-r--r-- | hw/i386/acpi-build.c | 12 | ||||
-rw-r--r-- | hw/i386/pc.c | 19 | ||||
-rw-r--r-- | include/hw/i386/pc.h | 2 | ||||
-rw-r--r-- | include/hw/mem/nvdimm.h | 3 | ||||
-rw-r--r-- | qemu-options.hx | 5 |
9 files changed, 425 insertions, 1 deletions
diff --git a/default-configs/i386-softmmu.mak b/default-configs/i386-softmmu.mak index b0355993e1..b177e52104 100644 --- a/default-configs/i386-softmmu.mak +++ b/default-configs/i386-softmmu.mak @@ -52,6 +52,7 @@ CONFIG_IOAPIC=y CONFIG_PVPANIC=y CONFIG_MEM_HOTPLUG=y CONFIG_NVDIMM=y +CONFIG_ACPI_NVDIMM=y CONFIG_XIO3130=y CONFIG_IOH3420=y CONFIG_I82801B11=y diff --git a/default-configs/x86_64-softmmu.mak b/default-configs/x86_64-softmmu.mak index 27669bec58..6e3b312c5f 100644 --- a/default-configs/x86_64-softmmu.mak +++ b/default-configs/x86_64-softmmu.mak @@ -52,6 +52,7 @@ CONFIG_IOAPIC=y CONFIG_PVPANIC=y CONFIG_MEM_HOTPLUG=y CONFIG_NVDIMM=y +CONFIG_ACPI_NVDIMM=y CONFIG_XIO3130=y CONFIG_IOH3420=y CONFIG_I82801B11=y diff --git a/hw/acpi/Makefile.objs b/hw/acpi/Makefile.objs index 7d3230c2a5..095597f791 100644 --- a/hw/acpi/Makefile.objs +++ b/hw/acpi/Makefile.objs @@ -2,6 +2,7 @@ common-obj-$(CONFIG_ACPI_X86) += core.o piix4.o pcihp.o common-obj-$(CONFIG_ACPI_X86_ICH) += ich9.o tco.o common-obj-$(CONFIG_ACPI_CPU_HOTPLUG) += cpu_hotplug.o common-obj-$(CONFIG_ACPI_MEMORY_HOTPLUG) += memory_hotplug.o +common-obj-$(CONFIG_ACPI_NVDIMM) += nvdimm.o common-obj-$(CONFIG_ACPI) += acpi_interface.o common-obj-$(CONFIG_ACPI) += bios-linker-loader.o common-obj-$(CONFIG_ACPI) += aml-build.o diff --git a/hw/acpi/nvdimm.c b/hw/acpi/nvdimm.c new file mode 100644 index 0000000000..98c004d1a5 --- /dev/null +++ b/hw/acpi/nvdimm.c @@ -0,0 +1,382 @@ +/* + * NVDIMM ACPI Implementation + * + * Copyright(C) 2015 Intel Corporation. + * + * Author: + * Xiao Guangrong <guangrong.xiao@linux.intel.com> + * + * NFIT is defined in ACPI 6.0: 5.2.25 NVDIMM Firmware Interface Table (NFIT) + * and the DSM specification can be found at: + * http://pmem.io/documents/NVDIMM_DSM_Interface_Example.pdf + * + * Currently, it only supports PMEM Virtualization. + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2 of the License, or (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library; if not, see <http://www.gnu.org/licenses/> + */ + +#include "hw/acpi/acpi.h" +#include "hw/acpi/aml-build.h" +#include "hw/mem/nvdimm.h" + +static int nvdimm_plugged_device_list(Object *obj, void *opaque) +{ + GSList **list = opaque; + + if (object_dynamic_cast(obj, TYPE_NVDIMM)) { + DeviceState *dev = DEVICE(obj); + + if (dev->realized) { /* only realized NVDIMMs matter */ + *list = g_slist_append(*list, DEVICE(obj)); + } + } + + object_child_foreach(obj, nvdimm_plugged_device_list, opaque); + return 0; +} + +/* + * inquire plugged NVDIMM devices and link them into the list which is + * returned to the caller. + * + * Note: it is the caller's responsibility to free the list to avoid + * memory leak. + */ +static GSList *nvdimm_get_plugged_device_list(void) +{ + GSList *list = NULL; + + object_child_foreach(qdev_get_machine(), nvdimm_plugged_device_list, + &list); + return list; +} + +#define NVDIMM_UUID_LE(a, b, c, d0, d1, d2, d3, d4, d5, d6, d7) \ + { (a) & 0xff, ((a) >> 8) & 0xff, ((a) >> 16) & 0xff, ((a) >> 24) & 0xff, \ + (b) & 0xff, ((b) >> 8) & 0xff, (c) & 0xff, ((c) >> 8) & 0xff, \ + (d0), (d1), (d2), (d3), (d4), (d5), (d6), (d7) } + +/* + * define Byte Addressable Persistent Memory (PM) Region according to + * ACPI 6.0: 5.2.25.1 System Physical Address Range Structure. + */ +static const uint8_t nvdimm_nfit_spa_uuid[] = + NVDIMM_UUID_LE(0x66f0d379, 0xb4f3, 0x4074, 0xac, 0x43, 0x0d, 0x33, + 0x18, 0xb7, 0x8c, 0xdb); + +/* + * NVDIMM Firmware Interface Table + * @signature: "NFIT" + * + * It provides information that allows OSPM to enumerate NVDIMM present in + * the platform and associate system physical address ranges created by the + * NVDIMMs. + * + * It is defined in ACPI 6.0: 5.2.25 NVDIMM Firmware Interface Table (NFIT) + */ +struct NvdimmNfitHeader { + ACPI_TABLE_HEADER_DEF + uint32_t reserved; +} QEMU_PACKED; +typedef struct NvdimmNfitHeader NvdimmNfitHeader; + +/* + * define NFIT structures according to ACPI 6.0: 5.2.25 NVDIMM Firmware + * Interface Table (NFIT). + */ + +/* + * System Physical Address Range Structure + * + * It describes the system physical address ranges occupied by NVDIMMs and + * the types of the regions. + */ +struct NvdimmNfitSpa { + uint16_t type; + uint16_t length; + uint16_t spa_index; + uint16_t flags; + uint32_t reserved; + uint32_t proximity_domain; + uint8_t type_guid[16]; + uint64_t spa_base; + uint64_t spa_length; + uint64_t mem_attr; +} QEMU_PACKED; +typedef struct NvdimmNfitSpa NvdimmNfitSpa; + +/* + * Memory Device to System Physical Address Range Mapping Structure + * + * It enables identifying each NVDIMM region and the corresponding SPA + * describing the memory interleave + */ +struct NvdimmNfitMemDev { + uint16_t type; + uint16_t length; + uint32_t nfit_handle; + uint16_t phys_id; + uint16_t region_id; + uint16_t spa_index; + uint16_t dcr_index; + uint64_t region_len; + uint64_t region_offset; + uint64_t region_dpa; + uint16_t interleave_index; + uint16_t interleave_ways; + uint16_t flags; + uint16_t reserved; +} QEMU_PACKED; +typedef struct NvdimmNfitMemDev NvdimmNfitMemDev; + +/* + * NVDIMM Control Region Structure + * + * It describes the NVDIMM and if applicable, Block Control Window. + */ +struct NvdimmNfitControlRegion { + uint16_t type; + uint16_t length; + uint16_t dcr_index; + uint16_t vendor_id; + uint16_t device_id; + uint16_t revision_id; + uint16_t sub_vendor_id; + uint16_t sub_device_id; + uint16_t sub_revision_id; + uint8_t reserved[6]; + uint32_t serial_number; + uint16_t fic; + uint16_t num_bcw; + uint64_t bcw_size; + uint64_t cmd_offset; + uint64_t cmd_size; + uint64_t status_offset; + uint64_t status_size; + uint16_t flags; + uint8_t reserved2[6]; +} QEMU_PACKED; +typedef struct NvdimmNfitControlRegion NvdimmNfitControlRegion; + +/* + * Module serial number is a unique number for each device. We use the + * slot id of NVDIMM device to generate this number so that each device + * associates with a different number. + * + * 0x123456 is a magic number we arbitrarily chose. + */ +static uint32_t nvdimm_slot_to_sn(int slot) +{ + return 0x123456 + slot; +} + +/* + * handle is used to uniquely associate nfit_memdev structure with NVDIMM + * ACPI device - nfit_memdev.nfit_handle matches with the value returned + * by ACPI device _ADR method. + * + * We generate the handle with the slot id of NVDIMM device and reserve + * 0 for NVDIMM root device. + */ +static uint32_t nvdimm_slot_to_handle(int slot) +{ + return slot + 1; +} + +/* + * index uniquely identifies the structure, 0 is reserved which indicates + * that the structure is not valid or the associated structure is not + * present. + * + * Each NVDIMM device needs two indexes, one for nfit_spa and another for + * nfit_dc which are generated by the slot id of NVDIMM device. + */ +static uint16_t nvdimm_slot_to_spa_index(int slot) +{ + return (slot + 1) << 1; +} + +/* See the comments of nvdimm_slot_to_spa_index(). */ +static uint32_t nvdimm_slot_to_dcr_index(int slot) +{ + return nvdimm_slot_to_spa_index(slot) + 1; +} + +/* ACPI 6.0: 5.2.25.1 System Physical Address Range Structure */ +static void +nvdimm_build_structure_spa(GArray *structures, DeviceState *dev) +{ + NvdimmNfitSpa *nfit_spa; + uint64_t addr = object_property_get_int(OBJECT(dev), PC_DIMM_ADDR_PROP, + NULL); + uint64_t size = object_property_get_int(OBJECT(dev), PC_DIMM_SIZE_PROP, + NULL); + uint32_t node = object_property_get_int(OBJECT(dev), PC_DIMM_NODE_PROP, + NULL); + int slot = object_property_get_int(OBJECT(dev), PC_DIMM_SLOT_PROP, + NULL); + + nfit_spa = acpi_data_push(structures, sizeof(*nfit_spa)); + + nfit_spa->type = cpu_to_le16(0 /* System Physical Address Range + Structure */); + nfit_spa->length = cpu_to_le16(sizeof(*nfit_spa)); + nfit_spa->spa_index = cpu_to_le16(nvdimm_slot_to_spa_index(slot)); + + /* + * Control region is strict as all the device info, such as SN, index, + * is associated with slot id. + */ + nfit_spa->flags = cpu_to_le16(1 /* Control region is strictly for + management during hot add/online + operation */ | + 2 /* Data in Proximity Domain field is + valid*/); + + /* NUMA node. */ + nfit_spa->proximity_domain = cpu_to_le32(node); + /* the region reported as PMEM. */ + memcpy(nfit_spa->type_guid, nvdimm_nfit_spa_uuid, + sizeof(nvdimm_nfit_spa_uuid)); + + nfit_spa->spa_base = cpu_to_le64(addr); + nfit_spa->spa_length = cpu_to_le64(size); + + /* It is the PMEM and can be cached as writeback. */ + nfit_spa->mem_attr = cpu_to_le64(0x8ULL /* EFI_MEMORY_WB */ | + 0x8000ULL /* EFI_MEMORY_NV */); +} + +/* + * ACPI 6.0: 5.2.25.2 Memory Device to System Physical Address Range Mapping + * Structure + */ +static void +nvdimm_build_structure_memdev(GArray *structures, DeviceState *dev) +{ + NvdimmNfitMemDev *nfit_memdev; + uint64_t addr = object_property_get_int(OBJECT(dev), PC_DIMM_ADDR_PROP, + NULL); + uint64_t size = object_property_get_int(OBJECT(dev), PC_DIMM_SIZE_PROP, + NULL); + int slot = object_property_get_int(OBJECT(dev), PC_DIMM_SLOT_PROP, + NULL); + uint32_t handle = nvdimm_slot_to_handle(slot); + + nfit_memdev = acpi_data_push(structures, sizeof(*nfit_memdev)); + + nfit_memdev->type = cpu_to_le16(1 /* Memory Device to System Address + Range Map Structure*/); + nfit_memdev->length = cpu_to_le16(sizeof(*nfit_memdev)); + nfit_memdev->nfit_handle = cpu_to_le32(handle); + + /* + * associate memory device with System Physical Address Range + * Structure. + */ + nfit_memdev->spa_index = cpu_to_le16(nvdimm_slot_to_spa_index(slot)); + /* associate memory device with Control Region Structure. */ + nfit_memdev->dcr_index = cpu_to_le16(nvdimm_slot_to_dcr_index(slot)); + + /* The memory region on the device. */ + nfit_memdev->region_len = cpu_to_le64(size); + nfit_memdev->region_dpa = cpu_to_le64(addr); + + /* Only one interleave for PMEM. */ + nfit_memdev->interleave_ways = cpu_to_le16(1); +} + +/* + * ACPI 6.0: 5.2.25.5 NVDIMM Control Region Structure. + */ +static void nvdimm_build_structure_dcr(GArray *structures, DeviceState *dev) +{ + NvdimmNfitControlRegion *nfit_dcr; + int slot = object_property_get_int(OBJECT(dev), PC_DIMM_SLOT_PROP, + NULL); + uint32_t sn = nvdimm_slot_to_sn(slot); + + nfit_dcr = acpi_data_push(structures, sizeof(*nfit_dcr)); + + nfit_dcr->type = cpu_to_le16(4 /* NVDIMM Control Region Structure */); + nfit_dcr->length = cpu_to_le16(sizeof(*nfit_dcr)); + nfit_dcr->dcr_index = cpu_to_le16(nvdimm_slot_to_dcr_index(slot)); + + /* vendor: Intel. */ + nfit_dcr->vendor_id = cpu_to_le16(0x8086); + nfit_dcr->device_id = cpu_to_le16(1); + + /* The _DSM method is following Intel's DSM specification. */ + nfit_dcr->revision_id = cpu_to_le16(1 /* Current Revision supported + in ACPI 6.0 is 1. */); + nfit_dcr->serial_number = cpu_to_le32(sn); + nfit_dcr->fic = cpu_to_le16(0x201 /* Format Interface Code. See Chapter + 2: NVDIMM Device Specific Method + (DSM) in DSM Spec Rev1.*/); +} + +static GArray *nvdimm_build_device_structure(GSList *device_list) +{ + GArray *structures = g_array_new(false, true /* clear */, 1); + + for (; device_list; device_list = device_list->next) { + DeviceState *dev = device_list->data; + + /* build System Physical Address Range Structure. */ + nvdimm_build_structure_spa(structures, dev); + + /* + * build Memory Device to System Physical Address Range Mapping + * Structure. + */ + nvdimm_build_structure_memdev(structures, dev); + + /* build NVDIMM Control Region Structure. */ + nvdimm_build_structure_dcr(structures, dev); + } + + return structures; +} + +static void nvdimm_build_nfit(GSList *device_list, GArray *table_offsets, + GArray *table_data, GArray *linker) +{ + GArray *structures = nvdimm_build_device_structure(device_list); + void *header; + + acpi_add_table(table_offsets, table_data); + + /* NFIT header. */ + header = acpi_data_push(table_data, sizeof(NvdimmNfitHeader)); + /* NVDIMM device structures. */ + g_array_append_vals(table_data, structures->data, structures->len); + + build_header(linker, table_data, header, "NFIT", + sizeof(NvdimmNfitHeader) + structures->len, 1, NULL); + g_array_free(structures, true); +} + +void nvdimm_build_acpi(GArray *table_offsets, GArray *table_data, + GArray *linker) +{ + GSList *device_list; + + /* no NVDIMM device is plugged. */ + device_list = nvdimm_get_plugged_device_list(); + if (!device_list) { + return; + } + nvdimm_build_nfit(device_list, table_offsets, table_data, linker); + g_slist_free(device_list); +} diff --git a/hw/i386/acpi-build.c b/hw/i386/acpi-build.c index 27d2caf144..479b11ec4d 100644 --- a/hw/i386/acpi-build.c +++ b/hw/i386/acpi-build.c @@ -39,6 +39,7 @@ #include "hw/loader.h" #include "hw/isa/isa.h" #include "hw/acpi/memory_hotplug.h" +#include "hw/mem/nvdimm.h" #include "sysemu/tpm.h" #include "hw/acpi/tpm.h" #include "sysemu/tpm_backend.h" @@ -1683,6 +1684,13 @@ static bool acpi_has_iommu(void) return intel_iommu && !ambiguous; } +static bool acpi_has_nvdimm(void) +{ + PCMachineState *pcms = PC_MACHINE(qdev_get_machine()); + + return pcms->nvdimm; +} + static void acpi_build(PcGuestInfo *guest_info, AcpiBuildTables *tables) { @@ -1767,6 +1775,10 @@ void acpi_build(PcGuestInfo *guest_info, AcpiBuildTables *tables) build_dmar_q35(tables_blob, tables->linker); } + if (acpi_has_nvdimm()) { + nvdimm_build_acpi(table_offsets, tables_blob, tables->linker); + } + /* Add tables supplied by user (if any) */ for (u = acpi_table_first(); u; u = acpi_table_next(u)) { unsigned len = acpi_table_len(u); diff --git a/hw/i386/pc.c b/hw/i386/pc.c index 48aa37a138..459260b4af 100644 --- a/hw/i386/pc.c +++ b/hw/i386/pc.c @@ -1865,6 +1865,20 @@ static void pc_machine_set_smm(Object *obj, Visitor *v, void *opaque, visit_type_OnOffAuto(v, &pcms->smm, name, errp); } +static bool pc_machine_get_nvdimm(Object *obj, Error **errp) +{ + PCMachineState *pcms = PC_MACHINE(obj); + + return pcms->nvdimm; +} + +static void pc_machine_set_nvdimm(Object *obj, bool value, Error **errp) +{ + PCMachineState *pcms = PC_MACHINE(obj); + + pcms->nvdimm = value; +} + static void pc_machine_initfn(Object *obj) { PCMachineState *pcms = PC_MACHINE(obj); @@ -1899,6 +1913,11 @@ static void pc_machine_initfn(Object *obj) object_property_set_description(obj, PC_MACHINE_VMPORT, "Enable vmport (pc & q35)", &error_abort); + + /* nvdimm is disabled on default. */ + pcms->nvdimm = false; + object_property_add_bool(obj, PC_MACHINE_NVDIMM, pc_machine_get_nvdimm, + pc_machine_set_nvdimm, &error_abort); } static void pc_machine_reset(void) diff --git a/include/hw/i386/pc.h b/include/hw/i386/pc.h index c6a777546a..b0d6283b25 100644 --- a/include/hw/i386/pc.h +++ b/include/hw/i386/pc.h @@ -55,6 +55,7 @@ struct PCMachineState { uint64_t max_ram_below_4g; OnOffAuto vmport; OnOffAuto smm; + bool nvdimm; /* RAM information (sizes, addresses, configuration): */ ram_addr_t below_4g_mem_size, above_4g_mem_size; @@ -65,6 +66,7 @@ struct PCMachineState { #define PC_MACHINE_MAX_RAM_BELOW_4G "max-ram-below-4g" #define PC_MACHINE_VMPORT "vmport" #define PC_MACHINE_SMM "smm" +#define PC_MACHINE_NVDIMM "nvdimm" /** * PCMachineClass: diff --git a/include/hw/mem/nvdimm.h b/include/hw/mem/nvdimm.h index dbfa8d6bd4..49183c126b 100644 --- a/include/hw/mem/nvdimm.h +++ b/include/hw/mem/nvdimm.h @@ -26,4 +26,7 @@ #include "hw/mem/pc-dimm.h" #define TYPE_NVDIMM "nvdimm" + +void nvdimm_build_acpi(GArray *table_offsets, GArray *table_data, + GArray *linker); #endif diff --git a/qemu-options.hx b/qemu-options.hx index 94ab8806fb..215d00ddd3 100644 --- a/qemu-options.hx +++ b/qemu-options.hx @@ -42,7 +42,8 @@ DEF("machine", HAS_ARG, QEMU_OPTION_machine, \ " igd-passthru=on|off controls IGD GFX passthrough support (default=off)\n" " aes-key-wrap=on|off controls support for AES key wrapping (default=on)\n" " dea-key-wrap=on|off controls support for DEA key wrapping (default=on)\n" - " suppress-vmdesc=on|off disables self-describing migration (default=off)\n", + " suppress-vmdesc=on|off disables self-describing migration (default=off)\n" + " nvdimm=on|off controls NVDIMM support (default=off)\n", QEMU_ARCH_ALL) STEXI @item -machine [type=]@var{name}[,prop=@var{value}[,...]] @@ -81,6 +82,8 @@ execution of AES cryptographic functions. The default is on. Enables or disables DEA key wrapping support on s390-ccw hosts. This feature controls whether DEA wrapping keys will be created to allow execution of DEA cryptographic functions. The default is on. +@item nvdimm=on|off +Enables or disables NVDIMM support. The default is off. @end table ETEXI |