aboutsummaryrefslogtreecommitdiff
path: root/include
diff options
context:
space:
mode:
authorLaurent Vivier <lvivier@redhat.com>2017-05-02 18:29:55 +0200
committerEduardo Habkost <ehabkost@redhat.com>2017-05-11 16:08:47 -0300
commit3bfe57165b4bf86a431099078df422f54598f5c6 (patch)
tree268732cbb3c47f666250ab80d2bc2b7aa300d995 /include
parent0f203430dd88cc6270310956ace58aca639edb59 (diff)
numa: equally distribute memory on nodes
When there are more nodes than available memory to put the minimum allowed memory by node, all the memory is put on the last node. This is because we put (ram_size / nb_numa_nodes) & ~((1 << mc->numa_mem_align_shift) - 1); on each node, and in this case the value is 0. This is particularly true with pseries, as the memory must be aligned to 256MB. To avoid this problem, this patch uses an error diffusion algorithm [1] to distribute equally the memory on nodes. We introduce numa_auto_assign_ram() function in MachineClass to keep compatibility between machine type versions. The legacy function is used with pseries-2.9, pc-q35-2.9 and pc-i440fx-2.9 (and previous), the new one with all others. Example: qemu-system-ppc64 -S -nographic -nodefaults -monitor stdio -m 1G -smp 8 \ -numa node -numa node -numa node \ -numa node -numa node -numa node Before: (qemu) info numa 6 nodes node 0 cpus: 0 6 node 0 size: 0 MB node 1 cpus: 1 7 node 1 size: 0 MB node 2 cpus: 2 node 2 size: 0 MB node 3 cpus: 3 node 3 size: 0 MB node 4 cpus: 4 node 4 size: 0 MB node 5 cpus: 5 node 5 size: 1024 MB After: (qemu) info numa 6 nodes node 0 cpus: 0 6 node 0 size: 0 MB node 1 cpus: 1 7 node 1 size: 256 MB node 2 cpus: 2 node 2 size: 0 MB node 3 cpus: 3 node 3 size: 256 MB node 4 cpus: 4 node 4 size: 256 MB node 5 cpus: 5 node 5 size: 256 MB [1] https://en.wikipedia.org/wiki/Error_diffusion Signed-off-by: Laurent Vivier <lvivier@redhat.com> Message-Id: <20170502162955.1610-2-lvivier@redhat.com> Reviewed-by: Eduardo Habkost <ehabkost@redhat.com> [ehabkost: s/ram_size/size/ at numa_default_auto_assign_ram()] Signed-off-by: Eduardo Habkost <ehabkost@redhat.com>
Diffstat (limited to 'include')
-rw-r--r--include/hw/boards.h2
-rw-r--r--include/qemu/typedefs.h1
-rw-r--r--include/sysemu/numa.h9
3 files changed, 10 insertions, 2 deletions
diff --git a/include/hw/boards.h b/include/hw/boards.h
index 31d9c72fb0..99458eb859 100644
--- a/include/hw/boards.h
+++ b/include/hw/boards.h
@@ -136,6 +136,8 @@ struct MachineClass {
int minimum_page_bits;
bool has_hotpluggable_cpus;
int numa_mem_align_shift;
+ void (*numa_auto_assign_ram)(MachineClass *mc, NodeInfo *nodes,
+ int nb_nodes, ram_addr_t size);
HotplugHandler *(*get_hotplug_handler)(MachineState *machine,
DeviceState *dev);
diff --git a/include/qemu/typedefs.h b/include/qemu/typedefs.h
index f08d327aec..7d8505730c 100644
--- a/include/qemu/typedefs.h
+++ b/include/qemu/typedefs.h
@@ -97,5 +97,6 @@ typedef struct SSIBus SSIBus;
typedef struct uWireSlave uWireSlave;
typedef struct VirtIODevice VirtIODevice;
typedef struct Visitor Visitor;
+typedef struct node_info NodeInfo;
#endif /* QEMU_TYPEDEFS_H */
diff --git a/include/sysemu/numa.h b/include/sysemu/numa.h
index 0ea1bc086e..70e56214e5 100644
--- a/include/sysemu/numa.h
+++ b/include/sysemu/numa.h
@@ -16,14 +16,14 @@ struct numa_addr_range {
QLIST_ENTRY(numa_addr_range) entry;
};
-typedef struct node_info {
+struct node_info {
uint64_t node_mem;
unsigned long *node_cpu;
struct HostMemoryBackend *node_memdev;
bool present;
QLIST_HEAD(, numa_addr_range) addr; /* List to store address ranges */
uint8_t distance[MAX_NODES];
-} NodeInfo;
+};
extern NodeInfo numa_info[MAX_NODES];
void parse_numa_opts(MachineClass *mc);
@@ -33,6 +33,11 @@ extern QemuOptsList qemu_numa_opts;
void numa_set_mem_node_id(ram_addr_t addr, uint64_t size, uint32_t node);
void numa_unset_mem_node_id(ram_addr_t addr, uint64_t size, uint32_t node);
uint32_t numa_get_node(ram_addr_t addr, Error **errp);
+void numa_legacy_auto_assign_ram(MachineClass *mc, NodeInfo *nodes,
+ int nb_nodes, ram_addr_t size);
+void numa_default_auto_assign_ram(MachineClass *mc, NodeInfo *nodes,
+ int nb_nodes, ram_addr_t size);
+
/* on success returns node index in numa_info,
* on failure returns nb_numa_nodes */