diff options
author | Peter Maydell <peter.maydell@linaro.org> | 2015-03-20 10:37:03 +0000 |
---|---|---|
committer | Peter Maydell <peter.maydell@linaro.org> | 2015-03-20 10:37:03 +0000 |
commit | 4eef86486d4090d7587e94d3f1a2203b94899989 (patch) | |
tree | d0eed668a15a46c812db9ad4eda03bbd3f8d01d1 | |
parent | e7e9b49f8e9ea4c5c9d07f6d8c9071c64dae816a (diff) | |
parent | 549fc54b8cfe16a475d8f6b8f838e53b45452b4a (diff) |
Merge remote-tracking branch 'remotes/ehabkost/tags/work/numa-verify-cpus-pull-request' into staging
NUMA queue 2015-03-19
# gpg: Signature made Thu Mar 19 19:25:53 2015 GMT using RSA key ID 984DC5A6
# gpg: Can't check signature: public key not found
* remotes/ehabkost/tags/work/numa-verify-cpus-pull-request:
numa: Print warning if no node is assigned to a CPU
pc: fix default VCPU to NUMA node mapping
numa: introduce machine callback for VCPU to node mapping
numa: Reject configuration if CPU appears on multiple nodes
numa: Reject CPU indexes > max_cpus
numa: Fix off-by-one error at MAX_CPUMASK_BITS check
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
-rw-r--r-- | hw/i386/pc.c | 9 | ||||
-rw-r--r-- | include/hw/boards.h | 5 | ||||
-rw-r--r-- | include/sysemu/numa.h | 3 | ||||
-rw-r--r-- | numa.c | 73 | ||||
-rw-r--r-- | vl.c | 2 |
5 files changed, 82 insertions, 10 deletions
diff --git a/hw/i386/pc.c b/hw/i386/pc.c index 4b46c299c3..a52d2aff7c 100644 --- a/hw/i386/pc.c +++ b/hw/i386/pc.c @@ -1851,6 +1851,14 @@ static void pc_machine_initfn(Object *obj) NULL, NULL); } +static unsigned pc_cpu_index_to_socket_id(unsigned cpu_index) +{ + unsigned pkg_id, core_id, smt_id; + x86_topo_ids_from_idx(smp_cores, smp_threads, cpu_index, + &pkg_id, &core_id, &smt_id); + return pkg_id; +} + static void pc_machine_class_init(ObjectClass *oc, void *data) { MachineClass *mc = MACHINE_CLASS(oc); @@ -1859,6 +1867,7 @@ static void pc_machine_class_init(ObjectClass *oc, void *data) pcmc->get_hotplug_handler = mc->get_hotplug_handler; mc->get_hotplug_handler = pc_get_hotpug_handler; + mc->cpu_index_to_socket_id = pc_cpu_index_to_socket_id; hc->plug = pc_machine_device_plug_cb; hc->unplug_request = pc_machine_device_unplug_request_cb; hc->unplug = pc_machine_device_unplug_cb; diff --git a/include/hw/boards.h b/include/hw/boards.h index 1feea2b176..78838d13d4 100644 --- a/include/hw/boards.h +++ b/include/hw/boards.h @@ -82,6 +82,10 @@ bool machine_mem_merge(MachineState *machine); * of HotplugHandler object, which handles hotplug operation * for a given @dev. It may return NULL if @dev doesn't require * any actions to be performed by hotplug handler. + * @cpu_index_to_socket_id: + * used to provide @cpu_index to socket number mapping, allowing + * a machine to group CPU threads belonging to the same socket/package + * Returns: socket number given cpu_index belongs to. */ struct MachineClass { /*< private >*/ @@ -118,6 +122,7 @@ struct MachineClass { HotplugHandler *(*get_hotplug_handler)(MachineState *machine, DeviceState *dev); + unsigned (*cpu_index_to_socket_id)(unsigned cpu_index); }; /** diff --git a/include/sysemu/numa.h b/include/sysemu/numa.h index 5633b856a8..6523b4d7f9 100644 --- a/include/sysemu/numa.h +++ b/include/sysemu/numa.h @@ -6,6 +6,7 @@ #include "qemu/option.h" #include "sysemu/sysemu.h" #include "sysemu/hostmem.h" +#include "hw/boards.h" extern int nb_numa_nodes; /* Number of NUMA nodes */ @@ -16,7 +17,7 @@ typedef struct node_info { bool present; } NodeInfo; extern NodeInfo numa_info[MAX_NODES]; -void parse_numa_opts(void); +void parse_numa_opts(MachineClass *mc); void numa_post_machine_init(void); void query_numa_node_mem(uint64_t node_mem[]); extern QemuOptsList qemu_numa_opts; @@ -76,9 +76,11 @@ static void numa_node_parse(NumaNodeOptions *node, QemuOpts *opts, Error **errp) } for (cpus = node->cpus; cpus; cpus = cpus->next) { - if (cpus->value > MAX_CPUMASK_BITS) { - error_setg(errp, "CPU number %" PRIu16 " is bigger than %d", - cpus->value, MAX_CPUMASK_BITS); + if (cpus->value >= max_cpus) { + error_setg(errp, + "CPU index (%" PRIu16 ")" + " should be smaller than maxcpus (%d)", + cpus->value, max_cpus); return; } bitmap_set(numa_info[nodenr].node_cpu, cpus->value, 1); @@ -165,7 +167,52 @@ error: return -1; } -void parse_numa_opts(void) +static char *enumerate_cpus(unsigned long *cpus, int max_cpus) +{ + int cpu; + bool first = true; + GString *s = g_string_new(NULL); + + for (cpu = find_first_bit(cpus, max_cpus); + cpu < max_cpus; + cpu = find_next_bit(cpus, max_cpus, cpu + 1)) { + g_string_append_printf(s, "%s%d", first ? "" : " ", cpu); + first = false; + } + return g_string_free(s, FALSE); +} + +static void validate_numa_cpus(void) +{ + int i; + DECLARE_BITMAP(seen_cpus, MAX_CPUMASK_BITS); + + bitmap_zero(seen_cpus, MAX_CPUMASK_BITS); + for (i = 0; i < nb_numa_nodes; i++) { + if (bitmap_intersects(seen_cpus, numa_info[i].node_cpu, + MAX_CPUMASK_BITS)) { + bitmap_and(seen_cpus, seen_cpus, + numa_info[i].node_cpu, MAX_CPUMASK_BITS); + error_report("CPU(s) present in multiple NUMA nodes: %s", + enumerate_cpus(seen_cpus, max_cpus));; + exit(EXIT_FAILURE); + } + bitmap_or(seen_cpus, seen_cpus, + numa_info[i].node_cpu, MAX_CPUMASK_BITS); + } + + if (!bitmap_full(seen_cpus, max_cpus)) { + char *msg; + bitmap_complement(seen_cpus, seen_cpus, max_cpus); + msg = enumerate_cpus(seen_cpus, max_cpus); + error_report("warning: CPU(s) not present in any NUMA nodes: %s", msg); + error_report("warning: All CPU(s) up to maxcpus should be described " + "in NUMA config"); + g_free(msg); + } +} + +void parse_numa_opts(MachineClass *mc) { int i; @@ -233,15 +280,25 @@ void parse_numa_opts(void) break; } } - /* assigning the VCPUs round-robin is easier to implement, guest OSes - * must cope with this anyway, because there are BIOSes out there in - * real machines which also use this scheme. + /* Historically VCPUs were assigned in round-robin order to NUMA + * nodes. However it causes issues with guest not handling it nice + * in case where cores/threads from a multicore CPU appear on + * different nodes. So allow boards to override default distribution + * rule grouping VCPUs by socket so that VCPUs from the same socket + * would be on the same node. */ if (i == nb_numa_nodes) { for (i = 0; i < max_cpus; i++) { - set_bit(i, numa_info[i % nb_numa_nodes].node_cpu); + unsigned node_id = i % nb_numa_nodes; + if (mc->cpu_index_to_socket_id) { + node_id = mc->cpu_index_to_socket_id(i) % nb_numa_nodes; + } + + set_bit(i, numa_info[node_id].node_cpu); } } + + validate_numa_cpus(); } } @@ -4170,7 +4170,7 @@ int main(int argc, char **argv, char **envp) default_drive(default_floppy, snapshot, IF_FLOPPY, 0, FD_OPTS); default_drive(default_sdcard, snapshot, IF_SD, 0, SD_OPTS); - parse_numa_opts(); + parse_numa_opts(machine_class); if (qemu_opts_foreach(qemu_find_opts("mon"), mon_init_func, NULL, 1) != 0) { exit(1); |