diff options
author | Igor Mammedov <imammedo@redhat.com> | 2015-03-19 17:09:21 +0000 |
---|---|---|
committer | Eduardo Habkost <ehabkost@redhat.com> | 2015-03-19 16:12:09 -0300 |
commit | 57924bcd87cb03cc21ebd7efed880d16ca048dce (patch) | |
tree | 6c64141a2db6ef60b6445a204d633dbb7608063a | |
parent | 3ef7197505e483e2f28c5fbd6ed54b4061221200 (diff) |
numa: introduce machine callback for VCPU to node mapping
Current default round-robin way of distributing VCPUs among
NUMA nodes might be wrong in case on multi-core/threads
CPUs. Making guests confused wrt topology where cores from
the same socket are on different nodes.
Allow a machine to override default mapping by providing
MachineClass::cpu_index_to_socket_id()
callback which would allow it group VCPUs from a socket
on the same NUMA node.
Signed-off-by: Igor Mammedov <imammedo@redhat.com>
Reviewed-by: Andreas Färber <afaerber@suse.de>
Signed-off-by: Eduardo Habkost <ehabkost@redhat.com>
-rw-r--r-- | include/hw/boards.h | 5 | ||||
-rw-r--r-- | include/sysemu/numa.h | 3 | ||||
-rw-r--r-- | numa.c | 18 | ||||
-rw-r--r-- | vl.c | 2 |
4 files changed, 21 insertions, 7 deletions
diff --git a/include/hw/boards.h b/include/hw/boards.h index 1feea2b176..78838d13d4 100644 --- a/include/hw/boards.h +++ b/include/hw/boards.h @@ -82,6 +82,10 @@ bool machine_mem_merge(MachineState *machine); * of HotplugHandler object, which handles hotplug operation * for a given @dev. It may return NULL if @dev doesn't require * any actions to be performed by hotplug handler. + * @cpu_index_to_socket_id: + * used to provide @cpu_index to socket number mapping, allowing + * a machine to group CPU threads belonging to the same socket/package + * Returns: socket number given cpu_index belongs to. */ struct MachineClass { /*< private >*/ @@ -118,6 +122,7 @@ struct MachineClass { HotplugHandler *(*get_hotplug_handler)(MachineState *machine, DeviceState *dev); + unsigned (*cpu_index_to_socket_id)(unsigned cpu_index); }; /** diff --git a/include/sysemu/numa.h b/include/sysemu/numa.h index 5633b856a8..6523b4d7f9 100644 --- a/include/sysemu/numa.h +++ b/include/sysemu/numa.h @@ -6,6 +6,7 @@ #include "qemu/option.h" #include "sysemu/sysemu.h" #include "sysemu/hostmem.h" +#include "hw/boards.h" extern int nb_numa_nodes; /* Number of NUMA nodes */ @@ -16,7 +17,7 @@ typedef struct node_info { bool present; } NodeInfo; extern NodeInfo numa_info[MAX_NODES]; -void parse_numa_opts(void); +void parse_numa_opts(MachineClass *mc); void numa_post_machine_init(void); void query_numa_node_mem(uint64_t node_mem[]); extern QemuOptsList qemu_numa_opts; @@ -202,7 +202,7 @@ static void validate_numa_cpus(void) } } -void parse_numa_opts(void) +void parse_numa_opts(MachineClass *mc) { int i; @@ -270,13 +270,21 @@ void parse_numa_opts(void) break; } } - /* assigning the VCPUs round-robin is easier to implement, guest OSes - * must cope with this anyway, because there are BIOSes out there in - * real machines which also use this scheme. + /* Historically VCPUs were assigned in round-robin order to NUMA + * nodes. However it causes issues with guest not handling it nice + * in case where cores/threads from a multicore CPU appear on + * different nodes. So allow boards to override default distribution + * rule grouping VCPUs by socket so that VCPUs from the same socket + * would be on the same node. */ if (i == nb_numa_nodes) { for (i = 0; i < max_cpus; i++) { - set_bit(i, numa_info[i % nb_numa_nodes].node_cpu); + unsigned node_id = i % nb_numa_nodes; + if (mc->cpu_index_to_socket_id) { + node_id = mc->cpu_index_to_socket_id(i) % nb_numa_nodes; + } + + set_bit(i, numa_info[node_id].node_cpu); } } @@ -4170,7 +4170,7 @@ int main(int argc, char **argv, char **envp) default_drive(default_floppy, snapshot, IF_FLOPPY, 0, FD_OPTS); default_drive(default_sdcard, snapshot, IF_SD, 0, SD_OPTS); - parse_numa_opts(); + parse_numa_opts(machine_class); if (qemu_opts_foreach(qemu_find_opts("mon"), mon_init_func, NULL, 1) != 0) { exit(1); |