aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--arch_init.c65
-rw-r--r--exec.c102
-rw-r--r--include/exec/cpu-all.h6
3 files changed, 121 insertions, 52 deletions
diff --git a/arch_init.c b/arch_init.c
index 1ee2e35352..5fc6fc382c 100644
--- a/arch_init.c
+++ b/arch_init.c
@@ -52,6 +52,7 @@
#include "exec/ram_addr.h"
#include "hw/acpi/acpi.h"
#include "qemu/host-utils.h"
+#include "qemu/rcu_queue.h"
#ifdef DEBUG_ARCH_INIT
#define DPRINTF(fmt, ...) \
@@ -523,9 +524,12 @@ static void migration_bitmap_sync(void)
trace_migration_bitmap_sync_start();
address_space_sync_dirty_bitmap(&address_space_memory);
- QLIST_FOREACH(block, &ram_list.blocks, next) {
+ rcu_read_lock();
+ QLIST_FOREACH_RCU(block, &ram_list.blocks, next) {
migration_bitmap_sync_range(block->mr->ram_addr, block->used_length);
}
+ rcu_read_unlock();
+
trace_migration_bitmap_sync_end(migration_dirty_pages
- num_dirty_pages_init);
num_dirty_pages_period += migration_dirty_pages - num_dirty_pages_init;
@@ -648,6 +652,8 @@ static int ram_save_page(QEMUFile *f, RAMBlock* block, ram_addr_t offset,
/*
* ram_find_and_save_block: Finds a page to send and sends it to f
*
+ * Called within an RCU critical section.
+ *
* Returns: The number of bytes written.
* 0 means no dirty pages
*/
@@ -661,7 +667,7 @@ static int ram_find_and_save_block(QEMUFile *f, bool last_stage)
MemoryRegion *mr;
if (!block)
- block = QLIST_FIRST(&ram_list.blocks);
+ block = QLIST_FIRST_RCU(&ram_list.blocks);
while (true) {
mr = block->mr;
@@ -672,9 +678,9 @@ static int ram_find_and_save_block(QEMUFile *f, bool last_stage)
}
if (offset >= block->used_length) {
offset = 0;
- block = QLIST_NEXT(block, next);
+ block = QLIST_NEXT_RCU(block, next);
if (!block) {
- block = QLIST_FIRST(&ram_list.blocks);
+ block = QLIST_FIRST_RCU(&ram_list.blocks);
complete_round = true;
ram_bulk_stage = false;
}
@@ -728,10 +734,10 @@ uint64_t ram_bytes_total(void)
RAMBlock *block;
uint64_t total = 0;
- QLIST_FOREACH(block, &ram_list.blocks, next) {
+ rcu_read_lock();
+ QLIST_FOREACH_RCU(block, &ram_list.blocks, next)
total += block->used_length;
- }
-
+ rcu_read_unlock();
return total;
}
@@ -777,6 +783,13 @@ static void reset_ram_globals(void)
#define MAX_WAIT 50 /* ms, half buffered_file limit */
+
+/* Each of ram_save_setup, ram_save_iterate and ram_save_complete has
+ * long-running RCU critical section. When rcu-reclaims in the code
+ * start to become numerous it will be necessary to reduce the
+ * granularity of these critical sections.
+ */
+
static int ram_save_setup(QEMUFile *f, void *opaque)
{
RAMBlock *block;
@@ -820,6 +833,7 @@ static int ram_save_setup(QEMUFile *f, void *opaque)
/* iothread lock needed for ram_list.dirty_memory[] */
qemu_mutex_lock_iothread();
qemu_mutex_lock_ramlist();
+ rcu_read_lock();
bytes_transferred = 0;
reset_ram_globals();
@@ -832,7 +846,7 @@ static int ram_save_setup(QEMUFile *f, void *opaque)
* gaps due to alignment or unplugs.
*/
migration_dirty_pages = 0;
- QLIST_FOREACH(block, &ram_list.blocks, next) {
+ QLIST_FOREACH_RCU(block, &ram_list.blocks, next) {
uint64_t block_pages;
block_pages = block->used_length >> TARGET_PAGE_BITS;
@@ -841,17 +855,18 @@ static int ram_save_setup(QEMUFile *f, void *opaque)
memory_global_dirty_log_start();
migration_bitmap_sync();
+ qemu_mutex_unlock_ramlist();
qemu_mutex_unlock_iothread();
qemu_put_be64(f, ram_bytes_total() | RAM_SAVE_FLAG_MEM_SIZE);
- QLIST_FOREACH(block, &ram_list.blocks, next) {
+ QLIST_FOREACH_RCU(block, &ram_list.blocks, next) {
qemu_put_byte(f, strlen(block->idstr));
qemu_put_buffer(f, (uint8_t *)block->idstr, strlen(block->idstr));
qemu_put_be64(f, block->used_length);
}
- qemu_mutex_unlock_ramlist();
+ rcu_read_unlock();
ram_control_before_iterate(f, RAM_CONTROL_SETUP);
ram_control_after_iterate(f, RAM_CONTROL_SETUP);
@@ -868,12 +883,14 @@ static int ram_save_iterate(QEMUFile *f, void *opaque)
int64_t t0;
int total_sent = 0;
- qemu_mutex_lock_ramlist();
-
+ rcu_read_lock();
if (ram_list.version != last_version) {
reset_ram_globals();
}
+ /* Read version before ram_list.blocks */
+ smp_rmb();
+
ram_control_before_iterate(f, RAM_CONTROL_ROUND);
t0 = qemu_clock_get_ns(QEMU_CLOCK_REALTIME);
@@ -904,8 +921,7 @@ static int ram_save_iterate(QEMUFile *f, void *opaque)
}
i++;
}
-
- qemu_mutex_unlock_ramlist();
+ rcu_read_unlock();
/*
* Must occur before EOS (or any QEMUFile operation)
@@ -933,7 +949,8 @@ static int ram_save_iterate(QEMUFile *f, void *opaque)
/* Called with iothread lock */
static int ram_save_complete(QEMUFile *f, void *opaque)
{
- qemu_mutex_lock_ramlist();
+ rcu_read_lock();
+
migration_bitmap_sync();
ram_control_before_iterate(f, RAM_CONTROL_FINISH);
@@ -955,7 +972,7 @@ static int ram_save_complete(QEMUFile *f, void *opaque)
ram_control_after_iterate(f, RAM_CONTROL_FINISH);
migration_end();
- qemu_mutex_unlock_ramlist();
+ rcu_read_unlock();
qemu_put_be64(f, RAM_SAVE_FLAG_EOS);
return 0;
@@ -969,7 +986,9 @@ static uint64_t ram_save_pending(QEMUFile *f, void *opaque, uint64_t max_size)
if (remaining_size < max_size) {
qemu_mutex_lock_iothread();
+ rcu_read_lock();
migration_bitmap_sync();
+ rcu_read_unlock();
qemu_mutex_unlock_iothread();
remaining_size = ram_save_remaining() * TARGET_PAGE_SIZE;
}
@@ -1011,6 +1030,9 @@ static int load_xbzrle(QEMUFile *f, ram_addr_t addr, void *host)
return 0;
}
+/* Must be called from within a rcu critical section.
+ * Returns a pointer from within the RCU-protected ram_list.
+ */
static inline void *host_from_stream_offset(QEMUFile *f,
ram_addr_t offset,
int flags)
@@ -1032,7 +1054,7 @@ static inline void *host_from_stream_offset(QEMUFile *f,
qemu_get_buffer(f, (uint8_t *)id, len);
id[len] = 0;
- QLIST_FOREACH(block, &ram_list.blocks, next) {
+ QLIST_FOREACH_RCU(block, &ram_list.blocks, next) {
if (!strncmp(id, block->idstr, sizeof(id)) &&
block->max_length > offset) {
return memory_region_get_ram_ptr(block->mr) + offset;
@@ -1065,6 +1087,12 @@ static int ram_load(QEMUFile *f, void *opaque, int version_id)
ret = -EINVAL;
}
+ /* This RCU critical section can be very long running.
+ * When RCU reclaims in the code start to become numerous,
+ * it will be necessary to reduce the granularity of this
+ * critical section.
+ */
+ rcu_read_lock();
while (!ret && !(flags & RAM_SAVE_FLAG_EOS)) {
ram_addr_t addr, total_ram_bytes;
void *host;
@@ -1089,7 +1117,7 @@ static int ram_load(QEMUFile *f, void *opaque, int version_id)
id[len] = 0;
length = qemu_get_be64(f);
- QLIST_FOREACH(block, &ram_list.blocks, next) {
+ QLIST_FOREACH_RCU(block, &ram_list.blocks, next) {
if (!strncmp(id, block->idstr, sizeof(id))) {
if (length != block->used_length) {
Error *local_err = NULL;
@@ -1163,6 +1191,7 @@ static int ram_load(QEMUFile *f, void *opaque, int version_id)
}
}
+ rcu_read_unlock();
DPRINTF("Completed load of VM with exit code %d seq iteration "
"%" PRIu64 "\n", ret, seq_iter);
return ret;
diff --git a/exec.c b/exec.c
index a7c2b92d0a..c85321a38b 100644
--- a/exec.c
+++ b/exec.c
@@ -44,7 +44,7 @@
#include "trace.h"
#endif
#include "exec/cpu-all.h"
-
+#include "qemu/rcu_queue.h"
#include "exec/cputlb.h"
#include "translate-all.h"
@@ -58,6 +58,9 @@
#if !defined(CONFIG_USER_ONLY)
static bool in_migration;
+/* ram_list is read under rcu_read_lock()/rcu_read_unlock(). Writes
+ * are protected by the ramlist lock.
+ */
RAMList ram_list = { .blocks = QLIST_HEAD_INITIALIZER(ram_list.blocks) };
static MemoryRegion *system_memory;
@@ -806,16 +809,16 @@ void cpu_abort(CPUState *cpu, const char *fmt, ...)
}
#if !defined(CONFIG_USER_ONLY)
+/* Called from RCU critical section */
static RAMBlock *qemu_get_ram_block(ram_addr_t addr)
{
RAMBlock *block;
- /* The list is protected by the iothread lock here. */
block = atomic_rcu_read(&ram_list.mru_block);
if (block && addr - block->offset < block->max_length) {
goto found;
}
- QLIST_FOREACH(block, &ram_list.blocks, next) {
+ QLIST_FOREACH_RCU(block, &ram_list.blocks, next) {
if (addr - block->offset < block->max_length) {
goto found;
}
@@ -854,10 +857,12 @@ static void tlb_reset_dirty_range_all(ram_addr_t start, ram_addr_t length)
end = TARGET_PAGE_ALIGN(start + length);
start &= TARGET_PAGE_MASK;
+ rcu_read_lock();
block = qemu_get_ram_block(start);
assert(block == qemu_get_ram_block(end - 1));
start1 = (uintptr_t)ramblock_ptr(block, start - block->offset);
cpu_tlb_reset_dirty_all(start1, length);
+ rcu_read_unlock();
}
/* Note: start and end must be within the same ram block. */
@@ -1190,6 +1195,7 @@ error:
}
#endif
+/* Called with the ramlist lock held. */
static ram_addr_t find_ram_offset(ram_addr_t size)
{
RAMBlock *block, *next_block;
@@ -1197,16 +1203,16 @@ static ram_addr_t find_ram_offset(ram_addr_t size)
assert(size != 0); /* it would hand out same offset multiple times */
- if (QLIST_EMPTY(&ram_list.blocks)) {
+ if (QLIST_EMPTY_RCU(&ram_list.blocks)) {
return 0;
}
- QLIST_FOREACH(block, &ram_list.blocks, next) {
+ QLIST_FOREACH_RCU(block, &ram_list.blocks, next) {
ram_addr_t end, next = RAM_ADDR_MAX;
end = block->offset + block->max_length;
- QLIST_FOREACH(next_block, &ram_list.blocks, next) {
+ QLIST_FOREACH_RCU(next_block, &ram_list.blocks, next) {
if (next_block->offset >= end) {
next = MIN(next, next_block->offset);
}
@@ -1231,9 +1237,11 @@ ram_addr_t last_ram_offset(void)
RAMBlock *block;
ram_addr_t last = 0;
- QLIST_FOREACH(block, &ram_list.blocks, next) {
+ rcu_read_lock();
+ QLIST_FOREACH_RCU(block, &ram_list.blocks, next) {
last = MAX(last, block->offset + block->max_length);
}
+ rcu_read_unlock();
return last;
}
@@ -1253,11 +1261,14 @@ static void qemu_ram_setup_dump(void *addr, ram_addr_t size)
}
}
+/* Called within an RCU critical section, or while the ramlist lock
+ * is held.
+ */
static RAMBlock *find_ram_block(ram_addr_t addr)
{
RAMBlock *block;
- QLIST_FOREACH(block, &ram_list.blocks, next) {
+ QLIST_FOREACH_RCU(block, &ram_list.blocks, next) {
if (block->offset == addr) {
return block;
}
@@ -1271,6 +1282,7 @@ void qemu_ram_set_idstr(ram_addr_t addr, const char *name, DeviceState *dev)
{
RAMBlock *new_block, *block;
+ rcu_read_lock();
new_block = find_ram_block(addr);
assert(new_block);
assert(!new_block->idstr[0]);
@@ -1284,15 +1296,14 @@ void qemu_ram_set_idstr(ram_addr_t addr, const char *name, DeviceState *dev)
}
pstrcat(new_block->idstr, sizeof(new_block->idstr), name);
- qemu_mutex_lock_ramlist();
- QLIST_FOREACH(block, &ram_list.blocks, next) {
+ QLIST_FOREACH_RCU(block, &ram_list.blocks, next) {
if (block != new_block && !strcmp(block->idstr, new_block->idstr)) {
fprintf(stderr, "RAMBlock \"%s\" already registered, abort!\n",
new_block->idstr);
abort();
}
}
- qemu_mutex_unlock_ramlist();
+ rcu_read_unlock();
}
/* Called with iothread lock held. */
@@ -1305,10 +1316,12 @@ void qemu_ram_unset_idstr(ram_addr_t addr)
* does not work anyway.
*/
+ rcu_read_lock();
block = find_ram_block(addr);
if (block) {
memset(block->idstr, 0, sizeof(block->idstr));
}
+ rcu_read_unlock();
}
static int memory_try_enable_merging(void *addr, size_t len)
@@ -1372,7 +1385,6 @@ static ram_addr_t ram_block_add(RAMBlock *new_block, Error **errp)
old_ram_size = last_ram_offset() >> TARGET_PAGE_BITS;
- /* This assumes the iothread lock is taken here too. */
qemu_mutex_lock_ramlist();
new_block->offset = find_ram_offset(new_block->max_length);
@@ -1398,21 +1410,23 @@ static ram_addr_t ram_block_add(RAMBlock *new_block, Error **errp)
* QLIST (which has an RCU-friendly variant) does not have insertion at
* tail, so save the last element in last_block.
*/
- QLIST_FOREACH(block, &ram_list.blocks, next) {
+ QLIST_FOREACH_RCU(block, &ram_list.blocks, next) {
last_block = block;
if (block->max_length < new_block->max_length) {
break;
}
}
if (block) {
- QLIST_INSERT_BEFORE(block, new_block, next);
+ QLIST_INSERT_BEFORE_RCU(block, new_block, next);
} else if (last_block) {
- QLIST_INSERT_AFTER(last_block, new_block, next);
+ QLIST_INSERT_AFTER_RCU(last_block, new_block, next);
} else { /* list is empty */
- QLIST_INSERT_HEAD(&ram_list.blocks, new_block, next);
+ QLIST_INSERT_HEAD_RCU(&ram_list.blocks, new_block, next);
}
ram_list.mru_block = NULL;
+ /* Write list before version */
+ smp_wmb();
ram_list.version++;
qemu_mutex_unlock_ramlist();
@@ -1552,12 +1566,13 @@ void qemu_ram_free_from_ptr(ram_addr_t addr)
{
RAMBlock *block;
- /* This assumes the iothread lock is taken here too. */
qemu_mutex_lock_ramlist();
- QLIST_FOREACH(block, &ram_list.blocks, next) {
+ QLIST_FOREACH_RCU(block, &ram_list.blocks, next) {
if (addr == block->offset) {
- QLIST_REMOVE(block, next);
+ QLIST_REMOVE_RCU(block, next);
ram_list.mru_block = NULL;
+ /* Write list before version */
+ smp_wmb();
ram_list.version++;
g_free_rcu(block, rcu);
break;
@@ -1583,17 +1598,17 @@ static void reclaim_ramblock(RAMBlock *block)
g_free(block);
}
-/* Called with the iothread lock held */
void qemu_ram_free(ram_addr_t addr)
{
RAMBlock *block;
- /* This assumes the iothread lock is taken here too. */
qemu_mutex_lock_ramlist();
- QLIST_FOREACH(block, &ram_list.blocks, next) {
+ QLIST_FOREACH_RCU(block, &ram_list.blocks, next) {
if (addr == block->offset) {
- QLIST_REMOVE(block, next);
+ QLIST_REMOVE_RCU(block, next);
ram_list.mru_block = NULL;
+ /* Write list before version */
+ smp_wmb();
ram_list.version++;
call_rcu(block, reclaim_ramblock, rcu);
break;
@@ -1610,7 +1625,7 @@ void qemu_ram_remap(ram_addr_t addr, ram_addr_t length)
int flags;
void *area, *vaddr;
- QLIST_FOREACH(block, &ram_list.blocks, next) {
+ QLIST_FOREACH_RCU(block, &ram_list.blocks, next) {
offset = addr - block->offset;
if (offset < block->max_length) {
vaddr = ramblock_ptr(block, offset);
@@ -1657,8 +1672,10 @@ int qemu_get_ram_fd(ram_addr_t addr)
RAMBlock *block;
int fd;
+ rcu_read_lock();
block = qemu_get_ram_block(addr);
fd = block->fd;
+ rcu_read_unlock();
return fd;
}
@@ -1667,8 +1684,10 @@ void *qemu_get_ram_block_host_ptr(ram_addr_t addr)
RAMBlock *block;
void *ptr;
+ rcu_read_lock();
block = qemu_get_ram_block(addr);
ptr = ramblock_ptr(block, 0);
+ rcu_read_unlock();
return ptr;
}
@@ -1676,12 +1695,19 @@ void *qemu_get_ram_block_host_ptr(ram_addr_t addr)
* This should not be used for general purpose DMA. Use address_space_map
* or address_space_rw instead. For local memory (e.g. video ram) that the
* device owns, use memory_region_get_ram_ptr.
+ *
+ * By the time this function returns, the returned pointer is not protected
+ * by RCU anymore. If the caller is not within an RCU critical section and
+ * does not hold the iothread lock, it must have other means of protecting the
+ * pointer, such as a reference to the region that includes the incoming
+ * ram_addr_t.
*/
void *qemu_get_ram_ptr(ram_addr_t addr)
{
RAMBlock *block;
void *ptr;
+ rcu_read_lock();
block = qemu_get_ram_block(addr);
if (xen_enabled() && block->host == NULL) {
@@ -1691,19 +1717,26 @@ void *qemu_get_ram_ptr(ram_addr_t addr)
*/
if (block->offset == 0) {
ptr = xen_map_cache(addr, 0, 0);
- goto done;
+ goto unlock;
}
block->host = xen_map_cache(block->offset, block->max_length, 1);
}
ptr = ramblock_ptr(block, addr - block->offset);
-done:
+unlock:
+ rcu_read_unlock();
return ptr;
}
/* Return a host pointer to guest's ram. Similar to qemu_get_ram_ptr
* but takes a size argument.
+ *
+ * By the time this function returns, the returned pointer is not protected
+ * by RCU anymore. If the caller is not within an RCU critical section and
+ * does not hold the iothread lock, it must have other means of protecting the
+ * pointer, such as a reference to the region that includes the incoming
+ * ram_addr_t.
*/
static void *qemu_ram_ptr_length(ram_addr_t addr, hwaddr *size)
{
@@ -1715,11 +1748,13 @@ static void *qemu_ram_ptr_length(ram_addr_t addr, hwaddr *size)
return xen_map_cache(addr, *size, 1);
} else {
RAMBlock *block;
- QLIST_FOREACH(block, &ram_list.blocks, next) {
+ rcu_read_lock();
+ QLIST_FOREACH_RCU(block, &ram_list.blocks, next) {
if (addr - block->offset < block->max_length) {
if (addr - block->offset + *size > block->max_length)
*size = block->max_length - addr + block->offset;
ptr = ramblock_ptr(block, addr - block->offset);
+ rcu_read_unlock();
return ptr;
}
}
@@ -1745,17 +1780,20 @@ MemoryRegion *qemu_ram_addr_from_host(void *ptr, ram_addr_t *ram_addr)
MemoryRegion *mr;
if (xen_enabled()) {
+ rcu_read_lock();
*ram_addr = xen_ram_addr_from_mapcache(ptr);
mr = qemu_get_ram_block(*ram_addr)->mr;
+ rcu_read_unlock();
return mr;
}
- block = ram_list.mru_block;
+ rcu_read_lock();
+ block = atomic_rcu_read(&ram_list.mru_block);
if (block && block->host && host - block->host < block->max_length) {
goto found;
}
- QLIST_FOREACH(block, &ram_list.blocks, next) {
+ QLIST_FOREACH_RCU(block, &ram_list.blocks, next) {
/* This case append when the block is not mapped. */
if (block->host == NULL) {
continue;
@@ -1765,11 +1803,13 @@ MemoryRegion *qemu_ram_addr_from_host(void *ptr, ram_addr_t *ram_addr)
}
}
+ rcu_read_unlock();
return NULL;
found:
*ram_addr = block->offset + (host - block->host);
mr = block->mr;
+ rcu_read_unlock();
return mr;
}
@@ -3027,8 +3067,10 @@ void qemu_ram_foreach_block(RAMBlockIterFunc func, void *opaque)
{
RAMBlock *block;
- QLIST_FOREACH(block, &ram_list.blocks, next) {
+ rcu_read_lock();
+ QLIST_FOREACH_RCU(block, &ram_list.blocks, next) {
func(block->host, block->offset, block->used_length, opaque);
}
+ rcu_read_unlock();
}
#endif
diff --git a/include/exec/cpu-all.h b/include/exec/cpu-all.h
index 87b865800c..ac06c6721c 100644
--- a/include/exec/cpu-all.h
+++ b/include/exec/cpu-all.h
@@ -279,9 +279,7 @@ struct RAMBlock {
uint32_t flags;
/* Protected by iothread lock. */
char idstr[256];
- /* Reads can take either the iothread or the ramlist lock.
- * Writes must take both locks.
- */
+ /* RCU-enabled, writes protected by the ramlist lock */
QLIST_ENTRY(RAMBlock) next;
int fd;
};
@@ -298,7 +296,7 @@ typedef struct RAMList {
/* Protected by the iothread lock. */
unsigned long *dirty_memory[DIRTY_MEMORY_NUM];
RAMBlock *mru_block;
- /* Protected by the ramlist lock. */
+ /* RCU-enabled, writes protected by the ramlist lock. */
QLIST_HEAD(, RAMBlock) blocks;
uint32_t version;
} RAMList;