aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorMarcelo Tosatti <mtosatti@redhat.com>2010-03-01 20:25:08 -0300
committerMarcelo Tosatti <mtosatti@redhat.com>2010-03-04 00:28:47 -0300
commitc902760fb25f9c490af01e8f6bccaa8dd71cc224 (patch)
treeebf40c2fba4b138eee6721d011f9f82040a6e565
parent60e4c6317b8773d987729401aeca9d8c6b61b05f (diff)
Add option to use file backed guest memory
Port qemu-kvm's -mem-path and -mem-prealloc options. These are useful for backing guest memory with huge pages via hugetlbfs. Signed-off-by: Marcelo Tosatti <mtosatti@redhat.com> CC: john cooper <john.cooper@redhat.com>
-rw-r--r--cpu-all.h3
-rw-r--r--exec.c115
-rw-r--r--qemu-options.hx16
-rw-r--r--vl.c12
4 files changed, 141 insertions, 5 deletions
diff --git a/cpu-all.h b/cpu-all.h
index 8488bfea9f..9823c24bab 100644
--- a/cpu-all.h
+++ b/cpu-all.h
@@ -847,6 +847,9 @@ extern uint8_t *phys_ram_dirty;
extern ram_addr_t ram_size;
extern ram_addr_t last_ram_offset;
+extern const char *mem_path;
+extern int mem_prealloc;
+
/* physical memory access */
/* MMIO pages are identified by a combination of an IO device index and
diff --git a/exec.c b/exec.c
index 6a3c912b7f..f41518e8a3 100644
--- a/exec.c
+++ b/exec.c
@@ -2529,6 +2529,99 @@ void qemu_flush_coalesced_mmio_buffer(void)
kvm_flush_coalesced_mmio_buffer();
}
+#if defined(__linux__) && !defined(TARGET_S390X)
+
+#include <sys/vfs.h>
+
+#define HUGETLBFS_MAGIC 0x958458f6
+
+static long gethugepagesize(const char *path)
+{
+ struct statfs fs;
+ int ret;
+
+ do {
+ ret = statfs(path, &fs);
+ } while (ret != 0 && errno == EINTR);
+
+ if (ret != 0) {
+ perror("statfs");
+ return 0;
+ }
+
+ if (fs.f_type != HUGETLBFS_MAGIC)
+ fprintf(stderr, "Warning: path not on HugeTLBFS: %s\n", path);
+
+ return fs.f_bsize;
+}
+
+static void *file_ram_alloc(ram_addr_t memory, const char *path)
+{
+ char *filename;
+ void *area;
+ int fd;
+#ifdef MAP_POPULATE
+ int flags;
+#endif
+ unsigned long hpagesize;
+
+ hpagesize = gethugepagesize(path);
+ if (!hpagesize) {
+ return NULL;
+ }
+
+ if (memory < hpagesize) {
+ return NULL;
+ }
+
+ if (kvm_enabled() && !kvm_has_sync_mmu()) {
+ fprintf(stderr, "host lacks kvm mmu notifiers, -mem-path unsupported\n");
+ return NULL;
+ }
+
+ if (asprintf(&filename, "%s/qemu_back_mem.XXXXXX", path) == -1) {
+ return NULL;
+ }
+
+ fd = mkstemp(filename);
+ if (fd < 0) {
+ perror("mkstemp");
+ free(filename);
+ return NULL;
+ }
+ unlink(filename);
+ free(filename);
+
+ memory = (memory+hpagesize-1) & ~(hpagesize-1);
+
+ /*
+ * ftruncate is not supported by hugetlbfs in older
+ * hosts, so don't bother bailing out on errors.
+ * If anything goes wrong with it under other filesystems,
+ * mmap will fail.
+ */
+ if (ftruncate(fd, memory))
+ perror("ftruncate");
+
+#ifdef MAP_POPULATE
+ /* NB: MAP_POPULATE won't exhaustively alloc all phys pages in the case
+ * MAP_PRIVATE is requested. For mem_prealloc we mmap as MAP_SHARED
+ * to sidestep this quirk.
+ */
+ flags = mem_prealloc ? MAP_POPULATE | MAP_SHARED : MAP_PRIVATE;
+ area = mmap(0, memory, PROT_READ | PROT_WRITE, flags, fd, 0);
+#else
+ area = mmap(0, memory, PROT_READ | PROT_WRITE, MAP_PRIVATE, fd, 0);
+#endif
+ if (area == MAP_FAILED) {
+ perror("file_ram_alloc: can't mmap RAM pages");
+ close(fd);
+ return (NULL);
+ }
+ return area;
+}
+#endif
+
ram_addr_t qemu_ram_alloc(ram_addr_t size)
{
RAMBlock *new_block;
@@ -2536,16 +2629,28 @@ ram_addr_t qemu_ram_alloc(ram_addr_t size)
size = TARGET_PAGE_ALIGN(size);
new_block = qemu_malloc(sizeof(*new_block));
+ if (mem_path) {
+#if defined (__linux__) && !defined(TARGET_S390X)
+ new_block->host = file_ram_alloc(size, mem_path);
+ if (!new_block->host)
+ exit(1);
+#else
+ fprintf(stderr, "-mem-path option unsupported\n");
+ exit(1);
+#endif
+ } else {
#if defined(TARGET_S390X) && defined(CONFIG_KVM)
- /* XXX S390 KVM requires the topmost vma of the RAM to be < 256GB */
- new_block->host = mmap((void*)0x1000000, size, PROT_EXEC|PROT_READ|PROT_WRITE,
- MAP_SHARED | MAP_ANONYMOUS, -1, 0);
+ /* XXX S390 KVM requires the topmost vma of the RAM to be < 256GB */
+ new_block->host = mmap((void*)0x1000000, size,
+ PROT_EXEC|PROT_READ|PROT_WRITE,
+ MAP_SHARED | MAP_ANONYMOUS, -1, 0);
#else
- new_block->host = qemu_vmalloc(size);
+ new_block->host = qemu_vmalloc(size);
#endif
#ifdef MADV_MERGEABLE
- madvise(new_block->host, size, MADV_MERGEABLE);
+ madvise(new_block->host, size, MADV_MERGEABLE);
#endif
+ }
new_block->offset = last_ram_offset;
new_block->length = size;
diff --git a/qemu-options.hx b/qemu-options.hx
index 7daa246c1f..fd50addb65 100644
--- a/qemu-options.hx
+++ b/qemu-options.hx
@@ -314,6 +314,22 @@ a suffix of ``M'' or ``G'' can be used to signify a value in megabytes or
gigabytes respectively.
ETEXI
+DEF("mem-path", HAS_ARG, QEMU_OPTION_mempath,
+ "-mem-path FILE provide backing storage for guest RAM\n")
+STEXI
+@item -mem-path @var{path}
+Allocate guest RAM from a temporarily created file in @var{path}.
+ETEXI
+
+#ifdef MAP_POPULATE
+DEF("mem-prealloc", 0, QEMU_OPTION_mem_prealloc,
+ "-mem-prealloc preallocate guest memory (use with -mem-path)\n")
+STEXI
+@item -mem-prealloc
+Preallocate memory when using -mem-path.
+ETEXI
+#endif
+
DEF("k", HAS_ARG, QEMU_OPTION_k,
"-k language use keyboard layout (for example 'fr' for French)\n")
STEXI
diff --git a/vl.c b/vl.c
index db7a178f88..274d847294 100644
--- a/vl.c
+++ b/vl.c
@@ -185,6 +185,10 @@ enum vga_retrace_method vga_retrace_method = VGA_RETRACE_DUMB;
DisplayType display_type = DT_DEFAULT;
const char* keyboard_layout = NULL;
ram_addr_t ram_size;
+const char *mem_path = NULL;
+#ifdef MAP_POPULATE
+int mem_prealloc = 0; /* force preallocation of physical target memory */
+#endif
int nb_nics;
NICInfo nd_table[MAX_NICS];
int vm_running;
@@ -5216,6 +5220,14 @@ int main(int argc, char **argv, char **envp)
ram_size = value;
break;
}
+ case QEMU_OPTION_mempath:
+ mem_path = optarg;
+ break;
+#ifdef MAP_POPULATE
+ case QEMU_OPTION_mem_prealloc:
+ mem_prealloc = 1;
+ break;
+#endif
case QEMU_OPTION_d:
{
int mask;