diff options
36 files changed, 997 insertions, 191 deletions
diff --git a/arch_init.c b/arch_init.c index 89c8fa46bb..5fc6fc382c 100644 --- a/arch_init.c +++ b/arch_init.c @@ -52,6 +52,7 @@ #include "exec/ram_addr.h" #include "hw/acpi/acpi.h" #include "qemu/host-utils.h" +#include "qemu/rcu_queue.h" #ifdef DEBUG_ARCH_INIT #define DPRINTF(fmt, ...) \ @@ -487,7 +488,6 @@ static void migration_bitmap_sync_range(ram_addr_t start, ram_addr_t length) } -/* Needs iothread lock! */ /* Fix me: there are too many global variables used in migration process. */ static int64_t start_time; static int64_t bytes_xfer_prev; @@ -500,6 +500,7 @@ static void migration_bitmap_sync_init(void) num_dirty_pages_period = 0; } +/* Called with iothread lock held, to protect ram_list.dirty_memory[] */ static void migration_bitmap_sync(void) { RAMBlock *block; @@ -523,9 +524,12 @@ static void migration_bitmap_sync(void) trace_migration_bitmap_sync_start(); address_space_sync_dirty_bitmap(&address_space_memory); - QTAILQ_FOREACH(block, &ram_list.blocks, next) { + rcu_read_lock(); + QLIST_FOREACH_RCU(block, &ram_list.blocks, next) { migration_bitmap_sync_range(block->mr->ram_addr, block->used_length); } + rcu_read_unlock(); + trace_migration_bitmap_sync_end(migration_dirty_pages - num_dirty_pages_init); num_dirty_pages_period += migration_dirty_pages - num_dirty_pages_init; @@ -648,6 +652,8 @@ static int ram_save_page(QEMUFile *f, RAMBlock* block, ram_addr_t offset, /* * ram_find_and_save_block: Finds a page to send and sends it to f * + * Called within an RCU critical section. + * * Returns: The number of bytes written. * 0 means no dirty pages */ @@ -661,7 +667,7 @@ static int ram_find_and_save_block(QEMUFile *f, bool last_stage) MemoryRegion *mr; if (!block) - block = QTAILQ_FIRST(&ram_list.blocks); + block = QLIST_FIRST_RCU(&ram_list.blocks); while (true) { mr = block->mr; @@ -672,9 +678,9 @@ static int ram_find_and_save_block(QEMUFile *f, bool last_stage) } if (offset >= block->used_length) { offset = 0; - block = QTAILQ_NEXT(block, next); + block = QLIST_NEXT_RCU(block, next); if (!block) { - block = QTAILQ_FIRST(&ram_list.blocks); + block = QLIST_FIRST_RCU(&ram_list.blocks); complete_round = true; ram_bulk_stage = false; } @@ -688,9 +694,9 @@ static int ram_find_and_save_block(QEMUFile *f, bool last_stage) } } } + last_seen_block = block; last_offset = offset; - return bytes_sent; } @@ -728,9 +734,10 @@ uint64_t ram_bytes_total(void) RAMBlock *block; uint64_t total = 0; - QTAILQ_FOREACH(block, &ram_list.blocks, next) + rcu_read_lock(); + QLIST_FOREACH_RCU(block, &ram_list.blocks, next) total += block->used_length; - + rcu_read_unlock(); return total; } @@ -776,6 +783,13 @@ static void reset_ram_globals(void) #define MAX_WAIT 50 /* ms, half buffered_file limit */ + +/* Each of ram_save_setup, ram_save_iterate and ram_save_complete has + * long-running RCU critical section. When rcu-reclaims in the code + * start to become numerous it will be necessary to reduce the + * granularity of these critical sections. + */ + static int ram_save_setup(QEMUFile *f, void *opaque) { RAMBlock *block; @@ -816,8 +830,10 @@ static int ram_save_setup(QEMUFile *f, void *opaque) acct_clear(); } + /* iothread lock needed for ram_list.dirty_memory[] */ qemu_mutex_lock_iothread(); qemu_mutex_lock_ramlist(); + rcu_read_lock(); bytes_transferred = 0; reset_ram_globals(); @@ -830,7 +846,7 @@ static int ram_save_setup(QEMUFile *f, void *opaque) * gaps due to alignment or unplugs. */ migration_dirty_pages = 0; - QTAILQ_FOREACH(block, &ram_list.blocks, next) { + QLIST_FOREACH_RCU(block, &ram_list.blocks, next) { uint64_t block_pages; block_pages = block->used_length >> TARGET_PAGE_BITS; @@ -839,17 +855,18 @@ static int ram_save_setup(QEMUFile *f, void *opaque) memory_global_dirty_log_start(); migration_bitmap_sync(); + qemu_mutex_unlock_ramlist(); qemu_mutex_unlock_iothread(); qemu_put_be64(f, ram_bytes_total() | RAM_SAVE_FLAG_MEM_SIZE); - QTAILQ_FOREACH(block, &ram_list.blocks, next) { + QLIST_FOREACH_RCU(block, &ram_list.blocks, next) { qemu_put_byte(f, strlen(block->idstr)); qemu_put_buffer(f, (uint8_t *)block->idstr, strlen(block->idstr)); qemu_put_be64(f, block->used_length); } - qemu_mutex_unlock_ramlist(); + rcu_read_unlock(); ram_control_before_iterate(f, RAM_CONTROL_SETUP); ram_control_after_iterate(f, RAM_CONTROL_SETUP); @@ -866,12 +883,14 @@ static int ram_save_iterate(QEMUFile *f, void *opaque) int64_t t0; int total_sent = 0; - qemu_mutex_lock_ramlist(); - + rcu_read_lock(); if (ram_list.version != last_version) { reset_ram_globals(); } + /* Read version before ram_list.blocks */ + smp_rmb(); + ram_control_before_iterate(f, RAM_CONTROL_ROUND); t0 = qemu_clock_get_ns(QEMU_CLOCK_REALTIME); @@ -902,8 +921,7 @@ static int ram_save_iterate(QEMUFile *f, void *opaque) } i++; } - - qemu_mutex_unlock_ramlist(); + rcu_read_unlock(); /* * Must occur before EOS (or any QEMUFile operation) @@ -928,9 +946,11 @@ static int ram_save_iterate(QEMUFile *f, void *opaque) return total_sent; } +/* Called with iothread lock */ static int ram_save_complete(QEMUFile *f, void *opaque) { - qemu_mutex_lock_ramlist(); + rcu_read_lock(); + migration_bitmap_sync(); ram_control_before_iterate(f, RAM_CONTROL_FINISH); @@ -952,7 +972,7 @@ static int ram_save_complete(QEMUFile *f, void *opaque) ram_control_after_iterate(f, RAM_CONTROL_FINISH); migration_end(); - qemu_mutex_unlock_ramlist(); + rcu_read_unlock(); qemu_put_be64(f, RAM_SAVE_FLAG_EOS); return 0; @@ -966,7 +986,9 @@ static uint64_t ram_save_pending(QEMUFile *f, void *opaque, uint64_t max_size) if (remaining_size < max_size) { qemu_mutex_lock_iothread(); + rcu_read_lock(); migration_bitmap_sync(); + rcu_read_unlock(); qemu_mutex_unlock_iothread(); remaining_size = ram_save_remaining() * TARGET_PAGE_SIZE; } @@ -1008,6 +1030,9 @@ static int load_xbzrle(QEMUFile *f, ram_addr_t addr, void *host) return 0; } +/* Must be called from within a rcu critical section. + * Returns a pointer from within the RCU-protected ram_list. + */ static inline void *host_from_stream_offset(QEMUFile *f, ram_addr_t offset, int flags) @@ -1029,7 +1054,7 @@ static inline void *host_from_stream_offset(QEMUFile *f, qemu_get_buffer(f, (uint8_t *)id, len); id[len] = 0; - QTAILQ_FOREACH(block, &ram_list.blocks, next) { + QLIST_FOREACH_RCU(block, &ram_list.blocks, next) { if (!strncmp(id, block->idstr, sizeof(id)) && block->max_length > offset) { return memory_region_get_ram_ptr(block->mr) + offset; @@ -1062,6 +1087,12 @@ static int ram_load(QEMUFile *f, void *opaque, int version_id) ret = -EINVAL; } + /* This RCU critical section can be very long running. + * When RCU reclaims in the code start to become numerous, + * it will be necessary to reduce the granularity of this + * critical section. + */ + rcu_read_lock(); while (!ret && !(flags & RAM_SAVE_FLAG_EOS)) { ram_addr_t addr, total_ram_bytes; void *host; @@ -1086,7 +1117,7 @@ static int ram_load(QEMUFile *f, void *opaque, int version_id) id[len] = 0; length = qemu_get_be64(f); - QTAILQ_FOREACH(block, &ram_list.blocks, next) { + QLIST_FOREACH_RCU(block, &ram_list.blocks, next) { if (!strncmp(id, block->idstr, sizeof(id))) { if (length != block->used_length) { Error *local_err = NULL; @@ -1117,7 +1148,6 @@ static int ram_load(QEMUFile *f, void *opaque, int version_id) ret = -EINVAL; break; } - ch = qemu_get_byte(f); ram_handle_compressed(host, ch, TARGET_PAGE_SIZE); break; @@ -1128,7 +1158,6 @@ static int ram_load(QEMUFile *f, void *opaque, int version_id) ret = -EINVAL; break; } - qemu_get_buffer(f, host, TARGET_PAGE_SIZE); break; case RAM_SAVE_FLAG_XBZRLE: @@ -1138,7 +1167,6 @@ static int ram_load(QEMUFile *f, void *opaque, int version_id) ret = -EINVAL; break; } - if (load_xbzrle(f, addr, host) < 0) { error_report("Failed to decompress XBZRLE page at " RAM_ADDR_FMT, addr); @@ -1163,6 +1191,7 @@ static int ram_load(QEMUFile *f, void *opaque, int version_id) } } + rcu_read_unlock(); DPRINTF("Completed load of VM with exit code %d seq iteration " "%" PRIu64 "\n", ret, seq_iter); return ret; diff --git a/bootdevice.c b/bootdevice.c index 5914417027..c3a010c094 100644 --- a/bootdevice.c +++ b/bootdevice.c @@ -210,7 +210,9 @@ char *get_boot_devices_list(size_t *size, bool ignore_suffixes) char *list = NULL; QTAILQ_FOREACH(i, &fw_boot_order, link) { - char *devpath = NULL, *bootpath; + char *devpath = NULL, *suffix = NULL; + char *bootpath; + char *d; size_t len; if (i->dev) { @@ -218,21 +220,22 @@ char *get_boot_devices_list(size_t *size, bool ignore_suffixes) assert(devpath); } - if (i->suffix && !ignore_suffixes && devpath) { - size_t bootpathlen = strlen(devpath) + strlen(i->suffix) + 1; - - bootpath = g_malloc(bootpathlen); - snprintf(bootpath, bootpathlen, "%s%s", devpath, i->suffix); - g_free(devpath); - } else if (devpath) { - bootpath = devpath; - } else if (!ignore_suffixes) { - assert(i->suffix); - bootpath = g_strdup(i->suffix); - } else { - bootpath = g_strdup(""); + if (!ignore_suffixes) { + d = qdev_get_own_fw_dev_path_from_handler(i->dev->parent_bus, i->dev); + if (d) { + assert(!i->suffix); + suffix = d; + } else { + suffix = g_strdup(i->suffix); + } } + bootpath = g_strdup_printf("%s%s", + devpath ? devpath : "", + suffix ? suffix : ""); + g_free(devpath); + g_free(suffix); + if (total) { list[total-1] = '\n'; } diff --git a/cpu-exec.c b/cpu-exec.c index 67381176da..2ffeb6e40d 100644 --- a/cpu-exec.c +++ b/cpu-exec.c @@ -24,6 +24,9 @@ #include "qemu/atomic.h" #include "sysemu/qtest.h" #include "qemu/timer.h" +#include "exec/address-spaces.h" +#include "exec/memory-internal.h" +#include "qemu/rcu.h" /* -icount align implementation. */ @@ -141,6 +144,33 @@ void cpu_resume_from_signal(CPUState *cpu, void *puc) cpu->exception_index = -1; siglongjmp(cpu->jmp_env, 1); } + +void cpu_reload_memory_map(CPUState *cpu) +{ + AddressSpaceDispatch *d; + + if (qemu_in_vcpu_thread()) { + /* Do not let the guest prolong the critical section as much as it + * as it desires. + * + * Currently, this is prevented by the I/O thread's periodinc kicking + * of the VCPU thread (iothread_requesting_mutex, qemu_cpu_kick_thread) + * but this will go away once TCG's execution moves out of the global + * mutex. + * + * This pair matches cpu_exec's rcu_read_lock()/rcu_read_unlock(), which + * only protects cpu->as->dispatch. Since we reload it below, we can + * split the critical section. + */ + rcu_read_unlock(); + rcu_read_lock(); + } + + /* The CPU and TLB are protected by the iothread lock. */ + d = atomic_rcu_read(&cpu->as->dispatch); + cpu->memory_dispatch = d; + tlb_flush(cpu, 1); +} #endif /* Execute a TB, and fix up the CPU state afterwards if necessary */ @@ -352,6 +382,8 @@ int cpu_exec(CPUArchState *env) * an instruction scheduling constraint on modern architectures. */ smp_mb(); + rcu_read_lock(); + if (unlikely(exit_request)) { cpu->exit_request = 1; } @@ -548,6 +580,7 @@ int cpu_exec(CPUArchState *env) } /* for(;;) */ cc->cpu_exec_exit(cpu); + rcu_read_unlock(); /* fail safe : never use current_cpu outside cpu_exec() */ current_cpu = NULL; @@ -1108,7 +1108,7 @@ bool qemu_cpu_is_self(CPUState *cpu) return qemu_thread_is_self(cpu->thread); } -static bool qemu_in_vcpu_thread(void) +bool qemu_in_vcpu_thread(void) { return current_cpu && qemu_cpu_is_self(current_cpu); } @@ -243,8 +243,12 @@ static void tlb_add_large_page(CPUArchState *env, target_ulong vaddr, } /* Add a new TLB entry. At most one entry for a given virtual address - is permitted. Only a single TARGET_PAGE_SIZE region is mapped, the - supplied size is only used by tlb_flush_page. */ + * is permitted. Only a single TARGET_PAGE_SIZE region is mapped, the + * supplied size is only used by tlb_flush_page. + * + * Called from TCG-generated code, which is under an RCU read-side + * critical section. + */ void tlb_set_page(CPUState *cpu, target_ulong vaddr, hwaddr paddr, int prot, int mmu_idx, target_ulong size) @@ -265,8 +269,7 @@ void tlb_set_page(CPUState *cpu, target_ulong vaddr, } sz = size; - section = address_space_translate_for_iotlb(cpu->as, paddr, - &xlat, &sz); + section = address_space_translate_for_iotlb(cpu, paddr, &xlat, &sz); assert(sz >= TARGET_PAGE_SIZE); #if defined(DEBUG_TLB) @@ -347,7 +350,7 @@ tb_page_addr_t get_page_addr_code(CPUArchState *env1, target_ulong addr) cpu_ldub_code(env1, addr); } pd = env1->iotlb[mmu_idx][page_index] & ~TARGET_PAGE_MASK; - mr = iotlb_to_region(cpu->as, pd); + mr = iotlb_to_region(cpu, pd); if (memory_region_is_unassigned(mr)) { CPUClass *cc = CPU_GET_CLASS(cpu); diff --git a/docs/memory.txt b/docs/memory.txt index b12f1f049a..2ceb348942 100644 --- a/docs/memory.txt +++ b/docs/memory.txt @@ -73,17 +73,66 @@ stability. Region lifecycle ---------------- -A region is created by one of the constructor functions (memory_region_init*()) -and attached to an object. It is then destroyed by object_unparent() or simply -when the parent object dies. +A region is created by one of the memory_region_init*() functions and +attached to an object, which acts as its owner or parent. QEMU ensures +that the owner object remains alive as long as the region is visible to +the guest, or as long as the region is in use by a virtual CPU or another +device. For example, the owner object will not die between an +address_space_map operation and the corresponding address_space_unmap. -In between, a region can be added to an address space -by using memory_region_add_subregion() and removed using -memory_region_del_subregion(). Destroying the region implicitly -removes the region from the address space. +After creation, a region can be added to an address space or a +container with memory_region_add_subregion(), and removed using +memory_region_del_subregion(). + +Various region attributes (read-only, dirty logging, coalesced mmio, +ioeventfd) can be changed during the region lifecycle. They take effect +as soon as the region is made visible. This can be immediately, later, +or never. + +Destruction of a memory region happens automatically when the owner +object dies. + +If however the memory region is part of a dynamically allocated data +structure, you should call object_unparent() to destroy the memory region +before the data structure is freed. For an example see VFIOMSIXInfo +and VFIOQuirk in hw/vfio/pci.c. + +You must not destroy a memory region as long as it may be in use by a +device or CPU. In order to do this, as a general rule do not create or +destroy memory regions dynamically during a device's lifetime, and only +call object_unparent() in the memory region owner's instance_finalize +callback. The dynamically allocated data structure that contains the +memory region then should obviously be freed in the instance_finalize +callback as well. + +If you break this rule, the following situation can happen: + +- the memory region's owner had a reference taken via memory_region_ref + (for example by address_space_map) + +- the region is unparented, and has no owner anymore + +- when address_space_unmap is called, the reference to the memory region's + owner is leaked. + + +There is an exception to the above rule: it is okay to call +object_unparent at any time for an alias or a container region. It is +therefore also okay to create or destroy alias and container regions +dynamically during a device's lifetime. + +This exceptional usage is valid because aliases and containers only help +QEMU building the guest's memory map; they are never accessed directly. +memory_region_ref and memory_region_unref are never called on aliases +or containers, and the above situation then cannot happen. Exploiting +this exception is rarely necessary, and therefore it is discouraged, +but nevertheless it is used in a few places. + +For regions that "have no owner" (NULL is passed at creation time), the +machine object is actually used as the owner. Since instance_finalize is +never called for the machine object, you must never call object_unparent +on regions that have no owner, unless they are aliases or containers. -Region attributes may be changed at any point; they take effect once -the region becomes exposed to the guest. Overlapping regions and priority -------------------------------- @@ -215,13 +264,6 @@ BAR containing MMIO registers is mapped after it. Note that if the guest maps a BAR outside the PCI hole, it would not be visible as the pci-hole alias clips it to a 0.5GB range. -Attributes ----------- - -Various region attributes (read-only, dirty logging, coalesced mmio, ioeventfd) -can be changed during the region lifecycle. They take effect once the region -is made visible (which can be immediately, later, or never). - MMIO Operations --------------- diff --git a/docs/rcu.txt b/docs/rcu.txt index 61752b93ab..21ecb8106c 100644 --- a/docs/rcu.txt +++ b/docs/rcu.txt @@ -120,12 +120,15 @@ The core RCU API is small: void call_rcu(T *p, void (*func)(T *p), field-name); + void g_free_rcu(T *p, + field-name); - call_rcu1 is typically used through this macro, in the common case - where the "struct rcu_head" is the first field in the struct. In - the above case, one could have written simply: + call_rcu1 is typically used through these macro, in the common case + where the "struct rcu_head" is the first field in the struct. If + the callback function is g_free, in particular, g_free_rcu can be + used. In the above case, one could have written simply: - call_rcu(foo_reclaim, g_free, rcu); + g_free_rcu(foo_reclaim, rcu); typeof(*p) atomic_rcu_read(p); @@ -44,7 +44,7 @@ #include "trace.h" #endif #include "exec/cpu-all.h" - +#include "qemu/rcu_queue.h" #include "exec/cputlb.h" #include "translate-all.h" @@ -58,7 +58,10 @@ #if !defined(CONFIG_USER_ONLY) static bool in_migration; -RAMList ram_list = { .blocks = QTAILQ_HEAD_INITIALIZER(ram_list.blocks) }; +/* ram_list is read under rcu_read_lock()/rcu_read_unlock(). Writes + * are protected by the ramlist lock. + */ +RAMList ram_list = { .blocks = QLIST_HEAD_INITIALIZER(ram_list.blocks) }; static MemoryRegion *system_memory; static MemoryRegion *system_io; @@ -115,6 +118,8 @@ struct PhysPageEntry { typedef PhysPageEntry Node[P_L2_SIZE]; typedef struct PhysPageMap { + struct rcu_head rcu; + unsigned sections_nb; unsigned sections_nb_alloc; unsigned nodes_nb; @@ -124,6 +129,8 @@ typedef struct PhysPageMap { } PhysPageMap; struct AddressSpaceDispatch { + struct rcu_head rcu; + /* This is a multi-level map on the physical address space. * The bottom level has pointers to MemoryRegionSections. */ @@ -315,6 +322,7 @@ bool memory_region_is_unassigned(MemoryRegion *mr) && mr != &io_mem_watch; } +/* Called from RCU critical section */ static MemoryRegionSection *address_space_lookup_region(AddressSpaceDispatch *d, hwaddr addr, bool resolve_subpage) @@ -330,6 +338,7 @@ static MemoryRegionSection *address_space_lookup_region(AddressSpaceDispatch *d, return section; } +/* Called from RCU critical section */ static MemoryRegionSection * address_space_translate_internal(AddressSpaceDispatch *d, hwaddr addr, hwaddr *xlat, hwaddr *plen, bool resolve_subpage) @@ -370,8 +379,10 @@ MemoryRegion *address_space_translate(AddressSpace *as, hwaddr addr, MemoryRegion *mr; hwaddr len = *plen; + rcu_read_lock(); for (;;) { - section = address_space_translate_internal(as->dispatch, addr, &addr, plen, true); + AddressSpaceDispatch *d = atomic_rcu_read(&as->dispatch); + section = address_space_translate_internal(d, addr, &addr, plen, true); mr = section->mr; if (!mr->iommu_ops) { @@ -397,15 +408,18 @@ MemoryRegion *address_space_translate(AddressSpace *as, hwaddr addr, *plen = len; *xlat = addr; + rcu_read_unlock(); return mr; } +/* Called from RCU critical section */ MemoryRegionSection * -address_space_translate_for_iotlb(AddressSpace *as, hwaddr addr, hwaddr *xlat, - hwaddr *plen) +address_space_translate_for_iotlb(CPUState *cpu, hwaddr addr, + hwaddr *xlat, hwaddr *plen) { MemoryRegionSection *section; - section = address_space_translate_internal(as->dispatch, addr, xlat, plen, false); + section = address_space_translate_internal(cpu->memory_dispatch, + addr, xlat, plen, false); assert(!section->mr->iommu_ops); return section; @@ -795,16 +809,16 @@ void cpu_abort(CPUState *cpu, const char *fmt, ...) } #if !defined(CONFIG_USER_ONLY) +/* Called from RCU critical section */ static RAMBlock *qemu_get_ram_block(ram_addr_t addr) { RAMBlock *block; - /* The list is protected by the iothread lock here. */ - block = ram_list.mru_block; + block = atomic_rcu_read(&ram_list.mru_block); if (block && addr - block->offset < block->max_length) { goto found; } - QTAILQ_FOREACH(block, &ram_list.blocks, next) { + QLIST_FOREACH_RCU(block, &ram_list.blocks, next) { if (addr - block->offset < block->max_length) { goto found; } @@ -814,6 +828,22 @@ static RAMBlock *qemu_get_ram_block(ram_addr_t addr) abort(); found: + /* It is safe to write mru_block outside the iothread lock. This + * is what happens: + * + * mru_block = xxx + * rcu_read_unlock() + * xxx removed from list + * rcu_read_lock() + * read mru_block + * mru_block = NULL; + * call_rcu(reclaim_ramblock, xxx); + * rcu_read_unlock() + * + * atomic_rcu_set is not needed here. The block was already published + * when it was placed into the list. Here we're just making an extra + * copy of the pointer. + */ ram_list.mru_block = block; return block; } @@ -827,10 +857,12 @@ static void tlb_reset_dirty_range_all(ram_addr_t start, ram_addr_t length) end = TARGET_PAGE_ALIGN(start + length); start &= TARGET_PAGE_MASK; + rcu_read_lock(); block = qemu_get_ram_block(start); assert(block == qemu_get_ram_block(end - 1)); start1 = (uintptr_t)ramblock_ptr(block, start - block->offset); cpu_tlb_reset_dirty_all(start1, length); + rcu_read_unlock(); } /* Note: start and end must be within the same ram block. */ @@ -851,6 +883,7 @@ static void cpu_physical_memory_set_dirty_tracking(bool enable) in_migration = enable; } +/* Called from RCU critical section */ hwaddr memory_region_section_get_iotlb(CPUState *cpu, MemoryRegionSection *section, target_ulong vaddr, @@ -1162,6 +1195,7 @@ error: } #endif +/* Called with the ramlist lock held. */ static ram_addr_t find_ram_offset(ram_addr_t size) { RAMBlock *block, *next_block; @@ -1169,15 +1203,16 @@ static ram_addr_t find_ram_offset(ram_addr_t size) assert(size != 0); /* it would hand out same offset multiple times */ - if (QTAILQ_EMPTY(&ram_list.blocks)) + if (QLIST_EMPTY_RCU(&ram_list.blocks)) { return 0; + } - QTAILQ_FOREACH(block, &ram_list.blocks, next) { + QLIST_FOREACH_RCU(block, &ram_list.blocks, next) { ram_addr_t end, next = RAM_ADDR_MAX; end = block->offset + block->max_length; - QTAILQ_FOREACH(next_block, &ram_list.blocks, next) { + QLIST_FOREACH_RCU(next_block, &ram_list.blocks, next) { if (next_block->offset >= end) { next = MIN(next, next_block->offset); } @@ -1202,9 +1237,11 @@ ram_addr_t last_ram_offset(void) RAMBlock *block; ram_addr_t last = 0; - QTAILQ_FOREACH(block, &ram_list.blocks, next) + rcu_read_lock(); + QLIST_FOREACH_RCU(block, &ram_list.blocks, next) { last = MAX(last, block->offset + block->max_length); - + } + rcu_read_unlock(); return last; } @@ -1224,11 +1261,14 @@ static void qemu_ram_setup_dump(void *addr, ram_addr_t size) } } +/* Called within an RCU critical section, or while the ramlist lock + * is held. + */ static RAMBlock *find_ram_block(ram_addr_t addr) { RAMBlock *block; - QTAILQ_FOREACH(block, &ram_list.blocks, next) { + QLIST_FOREACH_RCU(block, &ram_list.blocks, next) { if (block->offset == addr) { return block; } @@ -1237,11 +1277,13 @@ static RAMBlock *find_ram_block(ram_addr_t addr) return NULL; } +/* Called with iothread lock held. */ void qemu_ram_set_idstr(ram_addr_t addr, const char *name, DeviceState *dev) { - RAMBlock *new_block = find_ram_block(addr); - RAMBlock *block; + RAMBlock *new_block, *block; + rcu_read_lock(); + new_block = find_ram_block(addr); assert(new_block); assert(!new_block->idstr[0]); @@ -1254,25 +1296,32 @@ void qemu_ram_set_idstr(ram_addr_t addr, const char *name, DeviceState *dev) } pstrcat(new_block->idstr, sizeof(new_block->idstr), name); - /* This assumes the iothread lock is taken here too. */ - qemu_mutex_lock_ramlist(); - QTAILQ_FOREACH(block, &ram_list.blocks, next) { + QLIST_FOREACH_RCU(block, &ram_list.blocks, next) { if (block != new_block && !strcmp(block->idstr, new_block->idstr)) { fprintf(stderr, "RAMBlock \"%s\" already registered, abort!\n", new_block->idstr); abort(); } } - qemu_mutex_unlock_ramlist(); + rcu_read_unlock(); } +/* Called with iothread lock held. */ void qemu_ram_unset_idstr(ram_addr_t addr) { - RAMBlock *block = find_ram_block(addr); + RAMBlock *block; + + /* FIXME: arch_init.c assumes that this is not called throughout + * migration. Ignore the problem since hot-unplug during migration + * does not work anyway. + */ + rcu_read_lock(); + block = find_ram_block(addr); if (block) { memset(block->idstr, 0, sizeof(block->idstr)); } + rcu_read_unlock(); } static int memory_try_enable_merging(void *addr, size_t len) @@ -1331,11 +1380,11 @@ int qemu_ram_resize(ram_addr_t base, ram_addr_t newsize, Error **errp) static ram_addr_t ram_block_add(RAMBlock *new_block, Error **errp) { RAMBlock *block; + RAMBlock *last_block = NULL; ram_addr_t old_ram_size, new_ram_size; old_ram_size = last_ram_offset() >> TARGET_PAGE_BITS; - /* This assumes the iothread lock is taken here too. */ qemu_mutex_lock_ramlist(); new_block->offset = find_ram_offset(new_block->max_length); @@ -1357,19 +1406,27 @@ static ram_addr_t ram_block_add(RAMBlock *new_block, Error **errp) } } - /* Keep the list sorted from biggest to smallest block. */ - QTAILQ_FOREACH(block, &ram_list.blocks, next) { + /* Keep the list sorted from biggest to smallest block. Unlike QTAILQ, + * QLIST (which has an RCU-friendly variant) does not have insertion at + * tail, so save the last element in last_block. + */ + QLIST_FOREACH_RCU(block, &ram_list.blocks, next) { + last_block = block; if (block->max_length < new_block->max_length) { break; } } if (block) { - QTAILQ_INSERT_BEFORE(block, new_block, next); - } else { - QTAILQ_INSERT_TAIL(&ram_list.blocks, new_block, next); + QLIST_INSERT_BEFORE_RCU(block, new_block, next); + } else if (last_block) { + QLIST_INSERT_AFTER_RCU(last_block, new_block, next); + } else { /* list is empty */ + QLIST_INSERT_HEAD_RCU(&ram_list.blocks, new_block, next); } ram_list.mru_block = NULL; + /* Write list before version */ + smp_wmb(); ram_list.version++; qemu_mutex_unlock_ramlist(); @@ -1377,6 +1434,8 @@ static ram_addr_t ram_block_add(RAMBlock *new_block, Error **errp) if (new_ram_size > old_ram_size) { int i; + + /* ram_list.dirty_memory[] is protected by the iothread lock. */ for (i = 0; i < DIRTY_MEMORY_NUM; i++) { ram_list.dirty_memory[i] = bitmap_zero_extend(ram_list.dirty_memory[i], @@ -1507,49 +1566,55 @@ void qemu_ram_free_from_ptr(ram_addr_t addr) { RAMBlock *block; - /* This assumes the iothread lock is taken here too. */ qemu_mutex_lock_ramlist(); - QTAILQ_FOREACH(block, &ram_list.blocks, next) { + QLIST_FOREACH_RCU(block, &ram_list.blocks, next) { if (addr == block->offset) { - QTAILQ_REMOVE(&ram_list.blocks, block, next); + QLIST_REMOVE_RCU(block, next); ram_list.mru_block = NULL; + /* Write list before version */ + smp_wmb(); ram_list.version++; - g_free(block); + g_free_rcu(block, rcu); break; } } qemu_mutex_unlock_ramlist(); } +static void reclaim_ramblock(RAMBlock *block) +{ + if (block->flags & RAM_PREALLOC) { + ; + } else if (xen_enabled()) { + xen_invalidate_map_cache_entry(block->host); +#ifndef _WIN32 + } else if (block->fd >= 0) { + munmap(block->host, block->max_length); + close(block->fd); +#endif + } else { + qemu_anon_ram_free(block->host, block->max_length); + } + g_free(block); +} + void qemu_ram_free(ram_addr_t addr) { RAMBlock *block; - /* This assumes the iothread lock is taken here too. */ qemu_mutex_lock_ramlist(); - QTAILQ_FOREACH(block, &ram_list.blocks, next) { + QLIST_FOREACH_RCU(block, &ram_list.blocks, next) { if (addr == block->offset) { - QTAILQ_REMOVE(&ram_list.blocks, block, next); + QLIST_REMOVE_RCU(block, next); ram_list.mru_block = NULL; + /* Write list before version */ + smp_wmb(); ram_list.version++; - if (block->flags & RAM_PREALLOC) { - ; - } else if (xen_enabled()) { - xen_invalidate_map_cache_entry(block->host); -#ifndef _WIN32 - } else if (block->fd >= 0) { - munmap(block->host, block->max_length); - close(block->fd); -#endif - } else { - qemu_anon_ram_free(block->host, block->max_length); - } - g_free(block); + call_rcu(block, reclaim_ramblock, rcu); break; } } qemu_mutex_unlock_ramlist(); - } #ifndef _WIN32 @@ -1560,7 +1625,7 @@ void qemu_ram_remap(ram_addr_t addr, ram_addr_t length) int flags; void *area, *vaddr; - QTAILQ_FOREACH(block, &ram_list.blocks, next) { + QLIST_FOREACH_RCU(block, &ram_list.blocks, next) { offset = addr - block->offset; if (offset < block->max_length) { vaddr = ramblock_ptr(block, offset); @@ -1597,7 +1662,6 @@ void qemu_ram_remap(ram_addr_t addr, ram_addr_t length) memory_try_enable_merging(vaddr, length); qemu_ram_setup_dump(vaddr, length); } - return; } } } @@ -1605,49 +1669,78 @@ void qemu_ram_remap(ram_addr_t addr, ram_addr_t length) int qemu_get_ram_fd(ram_addr_t addr) { - RAMBlock *block = qemu_get_ram_block(addr); + RAMBlock *block; + int fd; - return block->fd; + rcu_read_lock(); + block = qemu_get_ram_block(addr); + fd = block->fd; + rcu_read_unlock(); + return fd; } void *qemu_get_ram_block_host_ptr(ram_addr_t addr) { - RAMBlock *block = qemu_get_ram_block(addr); + RAMBlock *block; + void *ptr; - return ramblock_ptr(block, 0); + rcu_read_lock(); + block = qemu_get_ram_block(addr); + ptr = ramblock_ptr(block, 0); + rcu_read_unlock(); + return ptr; } /* Return a host pointer to ram allocated with qemu_ram_alloc. - With the exception of the softmmu code in this file, this should - only be used for local memory (e.g. video ram) that the device owns, - and knows it isn't going to access beyond the end of the block. - - It should not be used for general purpose DMA. - Use cpu_physical_memory_map/cpu_physical_memory_rw instead. + * This should not be used for general purpose DMA. Use address_space_map + * or address_space_rw instead. For local memory (e.g. video ram) that the + * device owns, use memory_region_get_ram_ptr. + * + * By the time this function returns, the returned pointer is not protected + * by RCU anymore. If the caller is not within an RCU critical section and + * does not hold the iothread lock, it must have other means of protecting the + * pointer, such as a reference to the region that includes the incoming + * ram_addr_t. */ void *qemu_get_ram_ptr(ram_addr_t addr) { - RAMBlock *block = qemu_get_ram_block(addr); + RAMBlock *block; + void *ptr; - if (xen_enabled()) { + rcu_read_lock(); + block = qemu_get_ram_block(addr); + + if (xen_enabled() && block->host == NULL) { /* We need to check if the requested address is in the RAM * because we don't want to map the entire memory in QEMU. * In that case just map until the end of the page. */ if (block->offset == 0) { - return xen_map_cache(addr, 0, 0); - } else if (block->host == NULL) { - block->host = - xen_map_cache(block->offset, block->max_length, 1); + ptr = xen_map_cache(addr, 0, 0); + goto unlock; } + + block->host = xen_map_cache(block->offset, block->max_length, 1); } - return ramblock_ptr(block, addr - block->offset); + ptr = ramblock_ptr(block, addr - block->offset); + +unlock: + rcu_read_unlock(); + return ptr; } /* Return a host pointer to guest's ram. Similar to qemu_get_ram_ptr - * but takes a size argument */ + * but takes a size argument. + * + * By the time this function returns, the returned pointer is not protected + * by RCU anymore. If the caller is not within an RCU critical section and + * does not hold the iothread lock, it must have other means of protecting the + * pointer, such as a reference to the region that includes the incoming + * ram_addr_t. + */ static void *qemu_ram_ptr_length(ram_addr_t addr, hwaddr *size) { + void *ptr; if (*size == 0) { return NULL; } @@ -1655,12 +1748,14 @@ static void *qemu_ram_ptr_length(ram_addr_t addr, hwaddr *size) return xen_map_cache(addr, *size, 1); } else { RAMBlock *block; - - QTAILQ_FOREACH(block, &ram_list.blocks, next) { + rcu_read_lock(); + QLIST_FOREACH_RCU(block, &ram_list.blocks, next) { if (addr - block->offset < block->max_length) { if (addr - block->offset + *size > block->max_length) *size = block->max_length - addr + block->offset; - return ramblock_ptr(block, addr - block->offset); + ptr = ramblock_ptr(block, addr - block->offset); + rcu_read_unlock(); + return ptr; } } @@ -1670,23 +1765,35 @@ static void *qemu_ram_ptr_length(ram_addr_t addr, hwaddr *size) } /* Some of the softmmu routines need to translate from a host pointer - (typically a TLB entry) back to a ram offset. */ + * (typically a TLB entry) back to a ram offset. + * + * By the time this function returns, the returned pointer is not protected + * by RCU anymore. If the caller is not within an RCU critical section and + * does not hold the iothread lock, it must have other means of protecting the + * pointer, such as a reference to the region that includes the incoming + * ram_addr_t. + */ MemoryRegion *qemu_ram_addr_from_host(void *ptr, ram_addr_t *ram_addr) { RAMBlock *block; uint8_t *host = ptr; + MemoryRegion *mr; if (xen_enabled()) { + rcu_read_lock(); *ram_addr = xen_ram_addr_from_mapcache(ptr); - return qemu_get_ram_block(*ram_addr)->mr; + mr = qemu_get_ram_block(*ram_addr)->mr; + rcu_read_unlock(); + return mr; } - block = ram_list.mru_block; + rcu_read_lock(); + block = atomic_rcu_read(&ram_list.mru_block); if (block && block->host && host - block->host < block->max_length) { goto found; } - QTAILQ_FOREACH(block, &ram_list.blocks, next) { + QLIST_FOREACH_RCU(block, &ram_list.blocks, next) { /* This case append when the block is not mapped. */ if (block->host == NULL) { continue; @@ -1696,11 +1803,14 @@ MemoryRegion *qemu_ram_addr_from_host(void *ptr, ram_addr_t *ram_addr) } } + rcu_read_unlock(); return NULL; found: *ram_addr = block->offset + (host - block->host); - return block->mr; + mr = block->mr; + rcu_read_unlock(); + return mr; } static void notdirty_mem_write(void *opaque, hwaddr ram_addr, @@ -1961,9 +2071,12 @@ static uint16_t dummy_section(PhysPageMap *map, AddressSpace *as, return phys_section_add(map, §ion); } -MemoryRegion *iotlb_to_region(AddressSpace *as, hwaddr index) +MemoryRegion *iotlb_to_region(CPUState *cpu, hwaddr index) { - return as->dispatch->map.sections[index & ~TARGET_PAGE_MASK].mr; + AddressSpaceDispatch *d = atomic_rcu_read(&cpu->memory_dispatch); + MemoryRegionSection *sections = d->map.sections; + + return sections[index & ~TARGET_PAGE_MASK].mr; } static void io_mem_init(void) @@ -1997,6 +2110,12 @@ static void mem_begin(MemoryListener *listener) as->next_dispatch = d; } +static void address_space_dispatch_free(AddressSpaceDispatch *d) +{ + phys_sections_free(&d->map); + g_free(d); +} + static void mem_commit(MemoryListener *listener) { AddressSpace *as = container_of(listener, AddressSpace, dispatch_listener); @@ -2005,11 +2124,9 @@ static void mem_commit(MemoryListener *listener) phys_page_compact_all(next, next->map.nodes_nb); - as->dispatch = next; - + atomic_rcu_set(&as->dispatch, next); if (cur) { - phys_sections_free(&cur->map); - g_free(cur); + call_rcu(cur, address_space_dispatch_free, rcu); } } @@ -2026,7 +2143,7 @@ static void tcg_commit(MemoryListener *listener) if (cpu->tcg_as_listener != listener) { continue; } - tlb_flush(cpu, 1); + cpu_reload_memory_map(cpu); } } @@ -2068,8 +2185,10 @@ void address_space_destroy_dispatch(AddressSpace *as) { AddressSpaceDispatch *d = as->dispatch; - g_free(d); - as->dispatch = NULL; + atomic_rcu_set(&as->dispatch, NULL); + if (d) { + call_rcu(d, address_space_dispatch_free, rcu); + } } static void memory_map_init(void) @@ -2948,8 +3067,10 @@ void qemu_ram_foreach_block(RAMBlockIterFunc func, void *opaque) { RAMBlock *block; - QTAILQ_FOREACH(block, &ram_list.blocks, next) { + rcu_read_lock(); + QLIST_FOREACH_RCU(block, &ram_list.blocks, next) { func(block->host, block->offset, block->used_length, opaque); } + rcu_read_unlock(); } #endif diff --git a/hw/9pfs/virtio-9p-synth.c b/hw/9pfs/virtio-9p-synth.c index e75aa8772e..a0ab9a86a9 100644 --- a/hw/9pfs/virtio-9p-synth.c +++ b/hw/9pfs/virtio-9p-synth.c @@ -18,7 +18,7 @@ #include "fsdev/qemu-fsdev.h" #include "virtio-9p-synth.h" #include "qemu/rcu.h" - +#include "qemu/rcu_queue.h" #include <sys/stat.h> /* Root node for synth file system */ diff --git a/hw/core/qdev.c b/hw/core/qdev.c index 2eacac0787..44c6b93727 100644 --- a/hw/core/qdev.c +++ b/hw/core/qdev.c @@ -818,6 +818,13 @@ static char *qdev_get_fw_dev_path_from_handler(BusState *bus, DeviceState *dev) return d; } +char *qdev_get_own_fw_dev_path_from_handler(BusState *bus, DeviceState *dev) +{ + Object *obj = OBJECT(dev); + + return fw_path_provider_try_get_dev_path(obj, bus, dev); +} + static int qdev_get_fw_dev_path_helper(DeviceState *dev, char *p, int size) { int l = 0; diff --git a/hw/i386/intel_iommu.c b/hw/i386/intel_iommu.c index 0a4282adf3..7da70ff349 100644 --- a/hw/i386/intel_iommu.c +++ b/hw/i386/intel_iommu.c @@ -745,6 +745,9 @@ static inline bool vtd_is_interrupt_addr(hwaddr addr) /* Map dev to context-entry then do a paging-structures walk to do a iommu * translation. + * + * Called from RCU critical section. + * * @bus_num: The bus number * @devfn: The devfn, which is the combined of device and function number * @is_write: The access is a write operation diff --git a/hw/pci-bridge/pci_bridge_dev.c b/hw/pci-bridge/pci_bridge_dev.c index 252ea5eb53..36f73e1f8b 100644 --- a/hw/pci-bridge/pci_bridge_dev.c +++ b/hw/pci-bridge/pci_bridge_dev.c @@ -97,6 +97,11 @@ static void pci_bridge_dev_exitfn(PCIDevice *dev) pci_bridge_exitfn(dev); } +static void pci_bridge_dev_instance_finalize(Object *obj) +{ + shpc_free(PCI_DEVICE(obj)); +} + static void pci_bridge_dev_write_config(PCIDevice *d, uint32_t address, uint32_t val, int len) { @@ -154,10 +159,11 @@ static void pci_bridge_dev_class_init(ObjectClass *klass, void *data) } static const TypeInfo pci_bridge_dev_info = { - .name = TYPE_PCI_BRIDGE_DEV, - .parent = TYPE_PCI_BRIDGE, - .instance_size = sizeof(PCIBridgeDev), - .class_init = pci_bridge_dev_class_init, + .name = TYPE_PCI_BRIDGE_DEV, + .parent = TYPE_PCI_BRIDGE, + .instance_size = sizeof(PCIBridgeDev), + .class_init = pci_bridge_dev_class_init, + .instance_finalize = pci_bridge_dev_instance_finalize, .interfaces = (InterfaceInfo[]) { { TYPE_HOTPLUG_HANDLER }, { } diff --git a/hw/pci-host/apb.c b/hw/pci-host/apb.c index f573875baf..832b6c7248 100644 --- a/hw/pci-host/apb.c +++ b/hw/pci-host/apb.c @@ -205,6 +205,7 @@ static AddressSpace *pbm_pci_dma_iommu(PCIBus *bus, void *opaque, int devfn) return &is->iommu_as; } +/* Called from RCU critical section */ static IOMMUTLBEntry pbm_translate_iommu(MemoryRegion *iommu, hwaddr addr, bool is_write) { diff --git a/hw/pci/pcie_host.c b/hw/pci/pcie_host.c index dfb4a2b505..d8afba863e 100644 --- a/hw/pci/pcie_host.c +++ b/hw/pci/pcie_host.c @@ -88,6 +88,8 @@ static void pcie_host_init(Object *obj) PCIExpressHost *e = PCIE_HOST_BRIDGE(obj); e->base_addr = PCIE_BASE_ADDR_UNMAPPED; + memory_region_init_io(&e->mmio, OBJECT(e), &pcie_mmcfg_ops, e, "pcie-mmcfg-mmio", + PCIE_MMCFG_SIZE_MAX); } void pcie_host_mmcfg_unmap(PCIExpressHost *e) @@ -104,8 +106,7 @@ void pcie_host_mmcfg_init(PCIExpressHost *e, uint32_t size) assert(size >= PCIE_MMCFG_SIZE_MIN); assert(size <= PCIE_MMCFG_SIZE_MAX); e->size = size; - memory_region_init_io(&e->mmio, OBJECT(e), &pcie_mmcfg_ops, e, - "pcie-mmcfg", e->size); + memory_region_set_size(&e->mmio, e->size); } void pcie_host_mmcfg_map(PCIExpressHost *e, hwaddr addr, @@ -121,10 +122,12 @@ void pcie_host_mmcfg_update(PCIExpressHost *e, hwaddr addr, uint32_t size) { + memory_region_transaction_begin(); pcie_host_mmcfg_unmap(e); if (enable) { pcie_host_mmcfg_map(e, addr, size); } + memory_region_transaction_commit(); } static const TypeInfo pcie_host_type_info = { diff --git a/hw/pci/shpc.c b/hw/pci/shpc.c index 27c496e8c3..5fd7f4bbb7 100644 --- a/hw/pci/shpc.c +++ b/hw/pci/shpc.c @@ -663,13 +663,22 @@ void shpc_cleanup(PCIDevice *d, MemoryRegion *bar) SHPCDevice *shpc = d->shpc; d->cap_present &= ~QEMU_PCI_CAP_SHPC; memory_region_del_subregion(bar, &shpc->mmio); - object_unparent(OBJECT(&shpc->mmio)); /* TODO: cleanup config space changes? */ +} + +void shpc_free(PCIDevice *d) +{ + SHPCDevice *shpc = d->shpc; + if (!shpc) { + return; + } + object_unparent(OBJECT(&shpc->mmio)); g_free(shpc->config); g_free(shpc->cmask); g_free(shpc->wmask); g_free(shpc->w1cmask); g_free(shpc); + d->shpc = NULL; } void shpc_cap_write_config(PCIDevice *d, uint32_t addr, uint32_t val, int l) diff --git a/hw/ppc/spapr_iommu.c b/hw/ppc/spapr_iommu.c index da474740c0..ba003da39e 100644 --- a/hw/ppc/spapr_iommu.c +++ b/hw/ppc/spapr_iommu.c @@ -59,6 +59,7 @@ static sPAPRTCETable *spapr_tce_find_by_liobn(uint32_t liobn) return NULL; } +/* Called from RCU critical section */ static IOMMUTLBEntry spapr_tce_translate_iommu(MemoryRegion *iommu, hwaddr addr, bool is_write) { diff --git a/hw/scsi/vhost-scsi.c b/hw/scsi/vhost-scsi.c index dcb2bc5a6e..e30ff84c0c 100644 --- a/hw/scsi/vhost-scsi.c +++ b/hw/scsi/vhost-scsi.c @@ -24,6 +24,7 @@ #include "hw/virtio/virtio-scsi.h" #include "hw/virtio/virtio-bus.h" #include "hw/virtio/virtio-access.h" +#include "hw/fw-path-provider.h" /* Features supported by host kernel. */ static const int kernel_feature_bits[] = { @@ -250,6 +251,12 @@ static void vhost_scsi_realize(DeviceState *dev, Error **errp) return; } + /* At present, channel and lun both are 0 for bootable vhost-scsi disk */ + s->channel = 0; + s->lun = 0; + /* Note: we can also get the minimum tpgt from kernel */ + s->target = vs->conf.boot_tpgt; + error_setg(&s->migration_blocker, "vhost-scsi does not support migration"); migrate_add_blocker(s->migration_blocker); @@ -271,6 +278,19 @@ static void vhost_scsi_unrealize(DeviceState *dev, Error **errp) virtio_scsi_common_unrealize(dev, errp); } +/* + * Implementation of an interface to adjust firmware path + * for the bootindex property handling. + */ +static char *vhost_scsi_get_fw_dev_path(FWPathProvider *p, BusState *bus, + DeviceState *dev) +{ + VHostSCSI *s = VHOST_SCSI(dev); + /* format: channel@channel/vhost-scsi@target,lun */ + return g_strdup_printf("channel@%x/%s@%x,%x", s->channel, + qdev_fw_name(dev), s->target, s->lun); +} + static Property vhost_scsi_properties[] = { DEFINE_VHOST_SCSI_PROPERTIES(VHostSCSI, parent_obj.conf), DEFINE_PROP_END_OF_LIST(), @@ -280,6 +300,7 @@ static void vhost_scsi_class_init(ObjectClass *klass, void *data) { DeviceClass *dc = DEVICE_CLASS(klass); VirtioDeviceClass *vdc = VIRTIO_DEVICE_CLASS(klass); + FWPathProviderClass *fwc = FW_PATH_PROVIDER_CLASS(klass); dc->props = vhost_scsi_properties; set_bit(DEVICE_CATEGORY_STORAGE, dc->categories); @@ -288,6 +309,15 @@ static void vhost_scsi_class_init(ObjectClass *klass, void *data) vdc->get_features = vhost_scsi_get_features; vdc->set_config = vhost_scsi_set_config; vdc->set_status = vhost_scsi_set_status; + fwc->get_dev_path = vhost_scsi_get_fw_dev_path; +} + +static void vhost_scsi_instance_init(Object *obj) +{ + VHostSCSI *dev = VHOST_SCSI(obj); + + device_add_bootindex_property(obj, &dev->bootindex, "bootindex", NULL, + DEVICE(dev), NULL); } static const TypeInfo vhost_scsi_info = { @@ -295,6 +325,11 @@ static const TypeInfo vhost_scsi_info = { .parent = TYPE_VIRTIO_SCSI_COMMON, .instance_size = sizeof(VHostSCSI), .class_init = vhost_scsi_class_init, + .instance_init = vhost_scsi_instance_init, + .interfaces = (InterfaceInfo[]) { + { TYPE_FW_PATH_PROVIDER }, + { } + }, }; static void virtio_register_types(void) diff --git a/hw/virtio/virtio-pci.c b/hw/virtio/virtio-pci.c index dde1d73b56..604cb5b749 100644 --- a/hw/virtio/virtio-pci.c +++ b/hw/virtio/virtio-pci.c @@ -1238,6 +1238,8 @@ static void vhost_scsi_pci_instance_init(Object *obj) virtio_instance_init_common(obj, &dev->vdev, sizeof(dev->vdev), TYPE_VHOST_SCSI); + object_property_add_alias(obj, "bootindex", OBJECT(&dev->vdev), + "bootindex", &error_abort); } static const TypeInfo vhost_scsi_pci_info = { diff --git a/include/exec/cpu-all.h b/include/exec/cpu-all.h index 2c4828694b..ac06c6721c 100644 --- a/include/exec/cpu-all.h +++ b/include/exec/cpu-all.h @@ -24,6 +24,7 @@ #include "exec/memory.h" #include "qemu/thread.h" #include "qom/cpu.h" +#include "qemu/rcu.h" /* some important defines: * @@ -268,6 +269,7 @@ CPUArchState *cpu_copy(CPUArchState *env); typedef struct RAMBlock RAMBlock; struct RAMBlock { + struct rcu_head rcu; struct MemoryRegion *mr; uint8_t *host; ram_addr_t offset; @@ -275,11 +277,10 @@ struct RAMBlock { ram_addr_t max_length; void (*resized)(const char*, uint64_t length, void *host); uint32_t flags; + /* Protected by iothread lock. */ char idstr[256]; - /* Reads can take either the iothread or the ramlist lock. - * Writes must take both locks. - */ - QTAILQ_ENTRY(RAMBlock) next; + /* RCU-enabled, writes protected by the ramlist lock */ + QLIST_ENTRY(RAMBlock) next; int fd; }; @@ -295,8 +296,8 @@ typedef struct RAMList { /* Protected by the iothread lock. */ unsigned long *dirty_memory[DIRTY_MEMORY_NUM]; RAMBlock *mru_block; - /* Protected by the ramlist lock. */ - QTAILQ_HEAD(, RAMBlock) blocks; + /* RCU-enabled, writes protected by the ramlist lock. */ + QLIST_HEAD(, RAMBlock) blocks; uint32_t version; } RAMList; extern RAMList ram_list; diff --git a/include/exec/cputlb.h b/include/exec/cputlb.h index b8ecd6f68d..e0da9d7ad3 100644 --- a/include/exec/cputlb.h +++ b/include/exec/cputlb.h @@ -34,7 +34,7 @@ extern int tlb_flush_count; void tb_flush_jmp_cache(CPUState *cpu, target_ulong addr); MemoryRegionSection * -address_space_translate_for_iotlb(AddressSpace *as, hwaddr addr, hwaddr *xlat, +address_space_translate_for_iotlb(CPUState *cpu, hwaddr addr, hwaddr *xlat, hwaddr *plen); hwaddr memory_region_section_get_iotlb(CPUState *cpu, MemoryRegionSection *section, diff --git a/include/exec/exec-all.h b/include/exec/exec-all.h index 6a154485ba..8eb0db3910 100644 --- a/include/exec/exec-all.h +++ b/include/exec/exec-all.h @@ -96,6 +96,8 @@ void tb_invalidate_phys_page_range(tb_page_addr_t start, tb_page_addr_t end, void tb_invalidate_phys_range(tb_page_addr_t start, tb_page_addr_t end, int is_cpu_write_access); #if !defined(CONFIG_USER_ONLY) +bool qemu_in_vcpu_thread(void); +void cpu_reload_memory_map(CPUState *cpu); void tcg_cpu_address_space_init(CPUState *cpu, AddressSpace *as); /* cputlb.c */ void tlb_flush_page(CPUState *cpu, target_ulong addr); @@ -337,7 +339,8 @@ extern uintptr_t tci_tb_ptr; void phys_mem_set_alloc(void *(*alloc)(size_t, uint64_t *align)); -struct MemoryRegion *iotlb_to_region(AddressSpace *as, hwaddr index); +struct MemoryRegion *iotlb_to_region(CPUState *cpu, + hwaddr index); bool io_mem_read(struct MemoryRegion *mr, hwaddr addr, uint64_t *pvalue, unsigned size); bool io_mem_write(struct MemoryRegion *mr, hwaddr addr, diff --git a/include/hw/pci/shpc.h b/include/hw/pci/shpc.h index 025bc5b268..9bbea39996 100644 --- a/include/hw/pci/shpc.h +++ b/include/hw/pci/shpc.h @@ -41,6 +41,7 @@ void shpc_reset(PCIDevice *d); int shpc_bar_size(PCIDevice *dev); int shpc_init(PCIDevice *dev, PCIBus *sec_bus, MemoryRegion *bar, unsigned off); void shpc_cleanup(PCIDevice *dev, MemoryRegion *bar); +void shpc_free(PCIDevice *dev); void shpc_cap_write_config(PCIDevice *d, uint32_t addr, uint32_t val, int len); diff --git a/include/hw/qdev-core.h b/include/hw/qdev-core.h index 15a226f24a..4e673f9d29 100644 --- a/include/hw/qdev-core.h +++ b/include/hw/qdev-core.h @@ -342,6 +342,7 @@ void qbus_reset_all_fn(void *opaque); BusState *sysbus_get_default(void); char *qdev_get_fw_dev_path(DeviceState *dev); +char *qdev_get_own_fw_dev_path_from_handler(BusState *bus, DeviceState *dev); /** * @qdev_machine_init diff --git a/include/hw/virtio/vhost-scsi.h b/include/hw/virtio/vhost-scsi.h index 85cc031281..dea0075626 100644 --- a/include/hw/virtio/vhost-scsi.h +++ b/include/hw/virtio/vhost-scsi.h @@ -60,11 +60,16 @@ typedef struct VHostSCSI { Error *migration_blocker; struct vhost_dev dev; + int32_t bootindex; + int channel; + int target; + int lun; } VHostSCSI; #define DEFINE_VHOST_SCSI_PROPERTIES(_state, _conf_field) \ DEFINE_PROP_STRING("vhostfd", _state, _conf_field.vhostfd), \ DEFINE_PROP_STRING("wwpn", _state, _conf_field.wwpn), \ + DEFINE_PROP_UINT32("boot_tpgt", _state, _conf_field.boot_tpgt, 0), \ DEFINE_PROP_UINT32("num_queues", _state, _conf_field.num_queues, 1), \ DEFINE_PROP_UINT32("max_sectors", _state, _conf_field.max_sectors, 0xFFFF), \ DEFINE_PROP_UINT32("cmd_per_lun", _state, _conf_field.cmd_per_lun, 128) diff --git a/include/hw/virtio/virtio-scsi.h b/include/hw/virtio/virtio-scsi.h index bf17cc9ea5..c122e7ae5c 100644 --- a/include/hw/virtio/virtio-scsi.h +++ b/include/hw/virtio/virtio-scsi.h @@ -153,6 +153,7 @@ struct VirtIOSCSIConf { uint32_t cmd_per_lun; char *vhostfd; char *wwpn; + uint32_t boot_tpgt; IOThread *iothread; }; diff --git a/include/qemu/queue.h b/include/qemu/queue.h index c602797652..80941506ce 100644 --- a/include/qemu/queue.h +++ b/include/qemu/queue.h @@ -139,17 +139,6 @@ struct { \ (elm)->field.le_prev = &(head)->lh_first; \ } while (/*CONSTCOND*/0) -#define QLIST_INSERT_HEAD_RCU(head, elm, field) do { \ - (elm)->field.le_prev = &(head)->lh_first; \ - (elm)->field.le_next = (head)->lh_first; \ - smp_wmb(); /* fill elm before linking it */ \ - if ((head)->lh_first != NULL) { \ - (head)->lh_first->field.le_prev = &(elm)->field.le_next; \ - } \ - (head)->lh_first = (elm); \ - smp_wmb(); \ -} while (/* CONSTCOND*/0) - #define QLIST_REMOVE(elm, field) do { \ if ((elm)->field.le_next != NULL) \ (elm)->field.le_next->field.le_prev = \ diff --git a/include/qemu/rcu.h b/include/qemu/rcu.h index 068a279a79..506ab58eaf 100644 --- a/include/qemu/rcu.h +++ b/include/qemu/rcu.h @@ -140,6 +140,14 @@ extern void call_rcu1(struct rcu_head *head, RCUCBFunc *func); }), \ (RCUCBFunc *)(func)) +#define g_free_rcu(obj, field) \ + call_rcu1(({ \ + char __attribute__((unused)) \ + offset_must_be_zero[-offsetof(typeof(*(obj)), field)]; \ + &(obj)->field; \ + }), \ + (RCUCBFunc *)g_free); + #ifdef __cplusplus } #endif diff --git a/include/qemu/rcu_queue.h b/include/qemu/rcu_queue.h new file mode 100644 index 0000000000..3aca7a57e3 --- /dev/null +++ b/include/qemu/rcu_queue.h @@ -0,0 +1,134 @@ +#ifndef QEMU_RCU_QUEUE_H +#define QEMU_RCU_QUEUE_H + +/* + * rcu_queue.h + * + * RCU-friendly versions of the queue.h primitives. + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + * + * Copyright (c) 2013 Mike D. Day, IBM Corporation. + * + * IBM's contributions to this file may be relicensed under LGPLv2 or later. + */ + +#include "qemu/queue.h" +#include "qemu/atomic.h" + +#ifdef __cplusplus +extern "C" { +#endif + + +/* + * List access methods. + */ +#define QLIST_EMPTY_RCU(head) (atomic_rcu_read(&(head)->lh_first) == NULL) +#define QLIST_FIRST_RCU(head) (atomic_rcu_read(&(head)->lh_first)) +#define QLIST_NEXT_RCU(elm, field) (atomic_rcu_read(&(elm)->field.le_next)) + +/* + * List functions. + */ + + +/* + * The difference between atomic_read/set and atomic_rcu_read/set + * is in the including of a read/write memory barrier to the volatile + * access. atomic_rcu_* macros include the memory barrier, the + * plain atomic macros do not. Therefore, it should be correct to + * issue a series of reads or writes to the same element using only + * the atomic_* macro, until the last read or write, which should be + * atomic_rcu_* to introduce a read or write memory barrier as + * appropriate. + */ + +/* Upon publication of the listelm->next value, list readers + * will see the new node when following next pointers from + * antecedent nodes, but may not see the new node when following + * prev pointers from subsequent nodes until after the RCU grace + * period expires. + * see linux/include/rculist.h __list_add_rcu(new, prev, next) + */ +#define QLIST_INSERT_AFTER_RCU(listelm, elm, field) do { \ + (elm)->field.le_next = (listelm)->field.le_next; \ + (elm)->field.le_prev = &(listelm)->field.le_next; \ + atomic_rcu_set(&(listelm)->field.le_next, (elm)); \ + if ((elm)->field.le_next != NULL) { \ + (elm)->field.le_next->field.le_prev = \ + &(elm)->field.le_next; \ + } \ +} while (/*CONSTCOND*/0) + +/* Upon publication of the listelm->prev->next value, list + * readers will see the new element when following prev pointers + * from subsequent elements, but may not see the new element + * when following next pointers from antecedent elements + * until after the RCU grace period expires. + */ +#define QLIST_INSERT_BEFORE_RCU(listelm, elm, field) do { \ + (elm)->field.le_prev = (listelm)->field.le_prev; \ + (elm)->field.le_next = (listelm); \ + atomic_rcu_set((listelm)->field.le_prev, (elm)); \ + (listelm)->field.le_prev = &(elm)->field.le_next; \ +} while (/*CONSTCOND*/0) + +/* Upon publication of the head->first value, list readers + * will see the new element when following the head, but may + * not see the new element when following prev pointers from + * subsequent elements until after the RCU grace period has + * expired. + */ +#define QLIST_INSERT_HEAD_RCU(head, elm, field) do { \ + (elm)->field.le_prev = &(head)->lh_first; \ + (elm)->field.le_next = (head)->lh_first; \ + atomic_rcu_set((&(head)->lh_first), (elm)); \ + if ((elm)->field.le_next != NULL) { \ + (elm)->field.le_next->field.le_prev = \ + &(elm)->field.le_next; \ + } \ +} while (/*CONSTCOND*/0) + + +/* prior to publication of the elm->prev->next value, some list + * readers may still see the removed element when following + * the antecedent's next pointer. + */ +#define QLIST_REMOVE_RCU(elm, field) do { \ + if ((elm)->field.le_next != NULL) { \ + (elm)->field.le_next->field.le_prev = \ + (elm)->field.le_prev; \ + } \ + *(elm)->field.le_prev = (elm)->field.le_next; \ +} while (/*CONSTCOND*/0) + +/* List traversal must occur within an RCU critical section. */ +#define QLIST_FOREACH_RCU(var, head, field) \ + for ((var) = atomic_rcu_read(&(head)->lh_first); \ + (var); \ + (var) = atomic_rcu_read(&(var)->field.le_next)) + +/* List traversal must occur within an RCU critical section. */ +#define QLIST_FOREACH_SAFE_RCU(var, head, field, next_var) \ + for ((var) = (atomic_rcu_read(&(head)->lh_first)); \ + (var) && \ + ((next_var) = atomic_rcu_read(&(var)->field.le_next), 1); \ + (var) = (next_var)) + +#ifdef __cplusplus +} +#endif +#endif /* QEMU_RCU_QUEUE.H */ diff --git a/include/qom/cpu.h b/include/qom/cpu.h index 2098f1cb50..48fd6fb1d2 100644 --- a/include/qom/cpu.h +++ b/include/qom/cpu.h @@ -256,6 +256,7 @@ struct CPUState { sigjmp_buf jmp_env; AddressSpace *as; + struct AddressSpaceDispatch *memory_dispatch; MemoryListener *tcg_as_listener; void *env_ptr; /* CPUArchState */ @@ -1943,6 +1943,7 @@ void memory_listener_unregister(MemoryListener *listener) void address_space_init(AddressSpace *as, MemoryRegion *root, const char *name) { + memory_region_ref(root); memory_region_transaction_begin(); as->root = root; as->current_map = g_new(FlatView, 1); @@ -1969,10 +1970,13 @@ static void do_address_space_destroy(AddressSpace *as) flatview_unref(as->current_map); g_free(as->name); g_free(as->ioeventfds); + memory_region_unref(as->root); } void address_space_destroy(AddressSpace *as) { + MemoryRegion *root = as->root; + /* Flush out anything from MemoryListeners listening in on this */ memory_region_transaction_begin(); as->root = NULL; @@ -1984,6 +1988,7 @@ void address_space_destroy(AddressSpace *as) * entries that the guest should never use. Wait for the old * values to expire before freeing the data. */ + as->root = root; call_rcu(as, do_address_space_destroy, rcu); } diff --git a/scripts/dump-guest-memory.py b/scripts/dump-guest-memory.py index 1ed8b67883..dc8e44acf8 100644 --- a/scripts/dump-guest-memory.py +++ b/scripts/dump-guest-memory.py @@ -108,16 +108,16 @@ shape and this command should mostly work.""" assert (val["hi"] == 0) return val["lo"] - def qtailq_foreach(self, head, field_str): - var_p = head["tqh_first"] + def qlist_foreach(self, head, field_str): + var_p = head["lh_first"] while (var_p != 0): var = var_p.dereference() yield var - var_p = var[field_str]["tqe_next"] + var_p = var[field_str]["le_next"] def qemu_get_ram_block(self, ram_addr): ram_blocks = gdb.parse_and_eval("ram_list.blocks") - for block in self.qtailq_foreach(ram_blocks, "next"): + for block in self.qlist_foreach(ram_blocks, "next"): if (ram_addr - block["offset"] < block["length"]): return block raise gdb.GdbError("Bad ram offset %x" % ram_addr) diff --git a/scripts/kvm/kvm_stat b/scripts/kvm/kvm_stat index c0c4ff0de3..c65cabda5a 100755 --- a/scripts/kvm/kvm_stat +++ b/scripts/kvm/kvm_stat @@ -145,6 +145,45 @@ svm_exit_reasons = { 0x400: 'NPF', } +# EC definition of HSR (from arch/arm64/include/asm/kvm_arm.h) +aarch64_exit_reasons = { + 0x00: 'UNKNOWN', + 0x01: 'WFI', + 0x03: 'CP15_32', + 0x04: 'CP15_64', + 0x05: 'CP14_MR', + 0x06: 'CP14_LS', + 0x07: 'FP_ASIMD', + 0x08: 'CP10_ID', + 0x0C: 'CP14_64', + 0x0E: 'ILL_ISS', + 0x11: 'SVC32', + 0x12: 'HVC32', + 0x13: 'SMC32', + 0x15: 'SVC64', + 0x16: 'HVC64', + 0x17: 'SMC64', + 0x18: 'SYS64', + 0x20: 'IABT', + 0x21: 'IABT_HYP', + 0x22: 'PC_ALIGN', + 0x24: 'DABT', + 0x25: 'DABT_HYP', + 0x26: 'SP_ALIGN', + 0x28: 'FP_EXC32', + 0x2C: 'FP_EXC64', + 0x2F: 'SERROR', + 0x30: 'BREAKPT', + 0x31: 'BREAKPT_HYP', + 0x32: 'SOFTSTP', + 0x33: 'SOFTSTP_HYP', + 0x34: 'WATCHPT', + 0x35: 'WATCHPT_HYP', + 0x38: 'BKPT32', + 0x3A: 'VECTOR32', + 0x3C: 'BRK64', +} + # From include/uapi/linux/kvm.h, KVM_EXIT_xxx userspace_exit_reasons = { 0: 'UNKNOWN', @@ -212,7 +251,8 @@ def ppc_init(): def aarch64_init(): globals().update({ - 'sc_perf_evt_open' : 241 + 'sc_perf_evt_open' : 241, + 'exit_reasons' : aarch64_exit_reasons, }) def detect_platform(): diff --git a/softmmu_template.h b/softmmu_template.h index 6b4e615dbf..0e3dd35fe1 100644 --- a/softmmu_template.h +++ b/softmmu_template.h @@ -149,7 +149,7 @@ static inline DATA_TYPE glue(io_read, SUFFIX)(CPUArchState *env, { uint64_t val; CPUState *cpu = ENV_GET_CPU(env); - MemoryRegion *mr = iotlb_to_region(cpu->as, physaddr); + MemoryRegion *mr = iotlb_to_region(cpu, physaddr); physaddr = (physaddr & TARGET_PAGE_MASK) + addr; cpu->mem_io_pc = retaddr; @@ -369,7 +369,7 @@ static inline void glue(io_write, SUFFIX)(CPUArchState *env, uintptr_t retaddr) { CPUState *cpu = ENV_GET_CPU(env); - MemoryRegion *mr = iotlb_to_region(cpu->as, physaddr); + MemoryRegion *mr = iotlb_to_region(cpu, physaddr); physaddr = (physaddr & TARGET_PAGE_MASK) + addr; if (mr != &io_mem_rom && mr != &io_mem_notdirty && !cpu_can_do_io(cpu)) { diff --git a/tests/Makefile b/tests/Makefile index 53a4c30641..307035c26c 100644 --- a/tests/Makefile +++ b/tests/Makefile @@ -62,6 +62,8 @@ check-unit-y += tests/test-int128$(EXESUF) gcov-files-test-int128-y = check-unit-y += tests/rcutorture$(EXESUF) gcov-files-rcutorture-y = util/rcu.c +check-unit-y += tests/test-rcu-list$(EXESUF) +gcov-files-test-rcu-list-y = util/rcu.c check-unit-y += tests/test-bitops$(EXESUF) check-unit-$(CONFIG_HAS_GLIB_SUBPROCESS_TESTS) += tests/test-qdev-global-props$(EXESUF) check-unit-y += tests/check-qom-interface$(EXESUF) @@ -228,7 +230,7 @@ test-obj-y = tests/check-qint.o tests/check-qstring.o tests/check-qdict.o \ tests/test-qmp-commands.o tests/test-visitor-serialization.o \ tests/test-x86-cpuid.o tests/test-mul64.o tests/test-int128.o \ tests/test-opts-visitor.o tests/test-qmp-event.o \ - tests/rcutorture.o + tests/rcutorture.o tests/test-rcu-list.o test-qapi-obj-y = tests/test-qapi-visit.o tests/test-qapi-types.o \ tests/test-qapi-event.o @@ -257,7 +259,8 @@ tests/test-x86-cpuid$(EXESUF): tests/test-x86-cpuid.o tests/test-xbzrle$(EXESUF): tests/test-xbzrle.o migration/xbzrle.o page_cache.o libqemuutil.a tests/test-cutils$(EXESUF): tests/test-cutils.o util/cutils.o tests/test-int128$(EXESUF): tests/test-int128.o -tests/rcutorture$(EXESUF): tests/rcutorture.o libqemuutil.a +tests/rcutorture$(EXESUF): tests/rcutorture.o libqemuutil.a libqemustub.a +tests/test-rcu-list$(EXESUF): tests/test-rcu-list.o libqemuutil.a libqemustub.a tests/test-qdev-global-props$(EXESUF): tests/test-qdev-global-props.o \ hw/core/qdev.o hw/core/qdev-properties.o hw/core/hotplug.o\ diff --git a/tests/test-rcu-list.c b/tests/test-rcu-list.c new file mode 100644 index 0000000000..46b5e263e5 --- /dev/null +++ b/tests/test-rcu-list.c @@ -0,0 +1,306 @@ +/* + * rcuq_test.c + * + * usage: rcuq_test <readers> <duration> + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. + * + * Copyright (c) 2013 Mike D. Day, IBM Corporation. + */ + +#include <glib.h> +#include <stdlib.h> +#include <stdio.h> +#include <string.h> +#include "qemu/atomic.h" +#include "qemu/rcu.h" +#include "qemu/compiler.h" +#include "qemu/osdep.h" +#include "qemu/thread.h" +#include "qemu/rcu_queue.h" + +/* + * Test variables. + */ + +long long n_reads = 0LL; +long long n_updates = 0LL; +long long n_reclaims = 0LL; +long long n_nodes_removed = 0LL; +long long n_nodes = 0LL; +int g_test_in_charge = 0; + +int nthreadsrunning; + +char argsbuf[64]; + +#define GOFLAG_INIT 0 +#define GOFLAG_RUN 1 +#define GOFLAG_STOP 2 + +static volatile int goflag = GOFLAG_INIT; + +#define RCU_READ_RUN 1000 +#define RCU_UPDATE_RUN 10 +#define NR_THREADS 100 +#define RCU_Q_LEN 100 + +static QemuThread threads[NR_THREADS]; +static struct rcu_reader_data *data[NR_THREADS]; +static int n_threads; + +static int select_random_el(int max) +{ + return (rand() % max); +} + + +static void create_thread(void *(*func)(void *)) +{ + if (n_threads >= NR_THREADS) { + fprintf(stderr, "Thread limit of %d exceeded!\n", NR_THREADS); + exit(-1); + } + qemu_thread_create(&threads[n_threads], "test", func, &data[n_threads], + QEMU_THREAD_JOINABLE); + n_threads++; +} + +static void wait_all_threads(void) +{ + int i; + + for (i = 0; i < n_threads; i++) { + qemu_thread_join(&threads[i]); + } + n_threads = 0; +} + + +struct list_element { + QLIST_ENTRY(list_element) entry; + struct rcu_head rcu; + long long val; +}; + +static void reclaim_list_el(struct rcu_head *prcu) +{ + struct list_element *el = container_of(prcu, struct list_element, rcu); + g_free(el); + atomic_add(&n_reclaims, 1); +} + +static QLIST_HEAD(q_list_head, list_element) Q_list_head; + +static void *rcu_q_reader(void *arg) +{ + long long j, n_reads_local = 0; + struct list_element *el; + + *(struct rcu_reader_data **)arg = &rcu_reader; + atomic_inc(&nthreadsrunning); + while (goflag == GOFLAG_INIT) { + g_usleep(1000); + } + + while (goflag == GOFLAG_RUN) { + rcu_read_lock(); + QLIST_FOREACH_RCU(el, &Q_list_head, entry) { + j = atomic_read(&el->val); + (void)j; + n_reads_local++; + if (goflag == GOFLAG_STOP) { + break; + } + } + rcu_read_unlock(); + + g_usleep(100); + } + atomic_add(&n_reads, n_reads_local); + return NULL; +} + + +static void *rcu_q_updater(void *arg) +{ + int j, target_el; + long long n_updates_local = 0; + long long n_removed_local = 0; + struct list_element *el, *prev_el; + + *(struct rcu_reader_data **)arg = &rcu_reader; + atomic_inc(&nthreadsrunning); + while (goflag == GOFLAG_INIT) { + g_usleep(1000); + } + + while (goflag == GOFLAG_RUN) { + target_el = select_random_el(RCU_Q_LEN); + j = 0; + /* FOREACH_RCU could work here but let's use both macros */ + QLIST_FOREACH_SAFE_RCU(prev_el, &Q_list_head, entry, el) { + j++; + if (target_el == j) { + QLIST_REMOVE_RCU(prev_el, entry); + /* may be more than one updater in the future */ + call_rcu1(&prev_el->rcu, reclaim_list_el); + n_removed_local++; + break; + } + } + if (goflag == GOFLAG_STOP) { + break; + } + target_el = select_random_el(RCU_Q_LEN); + j = 0; + QLIST_FOREACH_RCU(el, &Q_list_head, entry) { + j++; + if (target_el == j) { + prev_el = g_new(struct list_element, 1); + atomic_add(&n_nodes, 1); + prev_el->val = atomic_read(&n_nodes); + QLIST_INSERT_BEFORE_RCU(el, prev_el, entry); + break; + } + } + + n_updates_local += 2; + synchronize_rcu(); + } + synchronize_rcu(); + atomic_add(&n_updates, n_updates_local); + atomic_add(&n_nodes_removed, n_removed_local); + return NULL; +} + +static void rcu_qtest_init(void) +{ + struct list_element *new_el; + int i; + nthreadsrunning = 0; + srand(time(0)); + for (i = 0; i < RCU_Q_LEN; i++) { + new_el = g_new(struct list_element, 1); + new_el->val = i; + QLIST_INSERT_HEAD_RCU(&Q_list_head, new_el, entry); + } + atomic_add(&n_nodes, RCU_Q_LEN); +} + +static void rcu_qtest_run(int duration, int nreaders) +{ + int nthreads = nreaders + 1; + while (atomic_read(&nthreadsrunning) < nthreads) { + g_usleep(1000); + } + + goflag = GOFLAG_RUN; + sleep(duration); + goflag = GOFLAG_STOP; + wait_all_threads(); +} + + +static void rcu_qtest(const char *test, int duration, int nreaders) +{ + int i; + long long n_removed_local = 0; + + struct list_element *el, *prev_el; + + rcu_qtest_init(); + for (i = 0; i < nreaders; i++) { + create_thread(rcu_q_reader); + } + create_thread(rcu_q_updater); + rcu_qtest_run(duration, nreaders); + + QLIST_FOREACH_SAFE_RCU(prev_el, &Q_list_head, entry, el) { + QLIST_REMOVE_RCU(prev_el, entry); + call_rcu1(&prev_el->rcu, reclaim_list_el); + n_removed_local++; + } + atomic_add(&n_nodes_removed, n_removed_local); + synchronize_rcu(); + while (n_nodes_removed > n_reclaims) { + g_usleep(100); + synchronize_rcu(); + } + if (g_test_in_charge) { + g_assert_cmpint(n_nodes_removed, ==, n_reclaims); + } else { + printf("%s: %d readers; 1 updater; nodes read: " \ + "%lld, nodes removed: %lld; nodes reclaimed: %lld\n", + test, nthreadsrunning - 1, n_reads, n_nodes_removed, n_reclaims); + exit(0); + } +} + +static void usage(int argc, char *argv[]) +{ + fprintf(stderr, "Usage: %s duration nreaders\n", argv[0]); + exit(-1); +} + +static int gtest_seconds; + +static void gtest_rcuq_one(void) +{ + rcu_qtest("rcuqtest", gtest_seconds / 4, 1); +} + +static void gtest_rcuq_few(void) +{ + rcu_qtest("rcuqtest", gtest_seconds / 4, 5); +} + +static void gtest_rcuq_many(void) +{ + rcu_qtest("rcuqtest", gtest_seconds / 2, 20); +} + + +int main(int argc, char *argv[]) +{ + int duration = 0, readers = 0; + + if (argc >= 2) { + if (argv[1][0] == '-') { + g_test_init(&argc, &argv, NULL); + if (g_test_quick()) { + gtest_seconds = 4; + } else { + gtest_seconds = 20; + } + g_test_add_func("/rcu/qlist/single-threaded", gtest_rcuq_one); + g_test_add_func("/rcu/qlist/short-few", gtest_rcuq_few); + g_test_add_func("/rcu/qlist/long-many", gtest_rcuq_many); + g_test_in_charge = 1; + return g_test_run(); + } + duration = strtoul(argv[1], NULL, 0); + } + if (argc >= 3) { + readers = strtoul(argv[2], NULL, 0); + } + if (duration && readers) { + rcu_qtest(argv[0], duration, readers); + return 0; + } + + usage(argc, argv); + return -1; +} diff --git a/util/rcu.c b/util/rcu.c index c9c3e6e4ab..bd73b8eb47 100644 --- a/util/rcu.c +++ b/util/rcu.c @@ -35,6 +35,7 @@ #include "qemu/rcu.h" #include "qemu/atomic.h" #include "qemu/thread.h" +#include "qemu/main-loop.h" /* * Global grace period counter. Bit 0 is always one in rcu_gp_ctr. @@ -223,32 +224,38 @@ static void *call_rcu_thread(void *opaque) * Fetch rcu_call_count now, we only must process elements that were * added before synchronize_rcu() starts. */ - while (n < RCU_CALL_MIN_SIZE && ++tries <= 5) { - g_usleep(100000); - qemu_event_reset(&rcu_call_ready_event); - n = atomic_read(&rcu_call_count); - if (n < RCU_CALL_MIN_SIZE) { - qemu_event_wait(&rcu_call_ready_event); + while (n == 0 || (n < RCU_CALL_MIN_SIZE && ++tries <= 5)) { + g_usleep(10000); + if (n == 0) { + qemu_event_reset(&rcu_call_ready_event); n = atomic_read(&rcu_call_count); + if (n == 0) { + qemu_event_wait(&rcu_call_ready_event); + } } + n = atomic_read(&rcu_call_count); } atomic_sub(&rcu_call_count, n); synchronize_rcu(); + qemu_mutex_lock_iothread(); while (n > 0) { node = try_dequeue(); while (!node) { + qemu_mutex_unlock_iothread(); qemu_event_reset(&rcu_call_ready_event); node = try_dequeue(); if (!node) { qemu_event_wait(&rcu_call_ready_event); node = try_dequeue(); } + qemu_mutex_lock_iothread(); } n--; node->func(node); } + qemu_mutex_unlock_iothread(); } abort(); } |