diff options
38 files changed, 748 insertions, 124 deletions
diff --git a/MAINTAINERS b/MAINTAINERS index 8c626f6a07..42a1892d6a 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -187,7 +187,7 @@ F: disas/microblaze.c MIPS M: Aurelien Jarno <aurelien@aurel32.net> -M: Yongbok Kim <yongbok.kim@mips.com> +M: Aleksandar Markovic <aleksandar.markovic@mips.com> S: Maintained F: target/mips/ F: hw/mips/ @@ -718,7 +718,7 @@ S: Maintained F: hw/mips/mips_malta.c Mipssim -M: Yongbok Kim <yongbok.kim@mips.com> +M: Aleksandar Markovic <aleksandar.markovic@mips.com> S: Odd Fixes F: hw/mips/mips_mipssim.c F: hw/net/mipsnet.c @@ -729,7 +729,7 @@ S: Maintained F: hw/mips/mips_r4k.c Fulong 2E -M: Yongbok Kim <yongbok.kim@mips.com> +M: Aleksandar Markovic <aleksandar.markovic@mips.com> S: Odd Fixes F: hw/mips/mips_fulong2e.c F: hw/isa/vt82c686.c diff --git a/accel/tcg/atomic_template.h b/accel/tcg/atomic_template.h index 3f41ef2782..d751bcba48 100644 --- a/accel/tcg/atomic_template.h +++ b/accel/tcg/atomic_template.h @@ -18,30 +18,37 @@ * License along with this library; if not, see <http://www.gnu.org/licenses/>. */ +#include "trace/mem.h" + #if DATA_SIZE == 16 # define SUFFIX o # define DATA_TYPE Int128 # define BSWAP bswap128 +# define SHIFT 4 #elif DATA_SIZE == 8 # define SUFFIX q # define DATA_TYPE uint64_t # define SDATA_TYPE int64_t # define BSWAP bswap64 +# define SHIFT 3 #elif DATA_SIZE == 4 # define SUFFIX l # define DATA_TYPE uint32_t # define SDATA_TYPE int32_t # define BSWAP bswap32 +# define SHIFT 2 #elif DATA_SIZE == 2 # define SUFFIX w # define DATA_TYPE uint16_t # define SDATA_TYPE int16_t # define BSWAP bswap16 +# define SHIFT 1 #elif DATA_SIZE == 1 # define SUFFIX b # define DATA_TYPE uint8_t # define SDATA_TYPE int8_t # define BSWAP +# define SHIFT 0 #else # error unsupported data size #endif @@ -52,14 +59,37 @@ # define ABI_TYPE uint32_t #endif +#define ATOMIC_TRACE_RMW do { \ + uint8_t info = glue(trace_mem_build_info_no_se, MEND)(SHIFT, false); \ + \ + trace_guest_mem_before_exec(ENV_GET_CPU(env), addr, info); \ + trace_guest_mem_before_exec(ENV_GET_CPU(env), addr, \ + info | TRACE_MEM_ST); \ + } while (0) + +#define ATOMIC_TRACE_LD do { \ + uint8_t info = glue(trace_mem_build_info_no_se, MEND)(SHIFT, false); \ + \ + trace_guest_mem_before_exec(ENV_GET_CPU(env), addr, info); \ + } while (0) + +# define ATOMIC_TRACE_ST do { \ + uint8_t info = glue(trace_mem_build_info_no_se, MEND)(SHIFT, true); \ + \ + trace_guest_mem_before_exec(ENV_GET_CPU(env), addr, info); \ + } while (0) + /* Define host-endian atomic operations. Note that END is used within the ATOMIC_NAME macro, and redefined below. */ #if DATA_SIZE == 1 # define END +# define MEND _be /* either le or be would be fine */ #elif defined(HOST_WORDS_BIGENDIAN) # define END _be +# define MEND _be #else # define END _le +# define MEND _le #endif ABI_TYPE ATOMIC_NAME(cmpxchg)(CPUArchState *env, target_ulong addr, @@ -67,7 +97,10 @@ ABI_TYPE ATOMIC_NAME(cmpxchg)(CPUArchState *env, target_ulong addr, { ATOMIC_MMU_DECLS; DATA_TYPE *haddr = ATOMIC_MMU_LOOKUP; - DATA_TYPE ret = atomic_cmpxchg__nocheck(haddr, cmpv, newv); + DATA_TYPE ret; + + ATOMIC_TRACE_RMW; + ret = atomic_cmpxchg__nocheck(haddr, cmpv, newv); ATOMIC_MMU_CLEANUP; return ret; } @@ -77,6 +110,8 @@ ABI_TYPE ATOMIC_NAME(ld)(CPUArchState *env, target_ulong addr EXTRA_ARGS) { ATOMIC_MMU_DECLS; DATA_TYPE val, *haddr = ATOMIC_MMU_LOOKUP; + + ATOMIC_TRACE_LD; __atomic_load(haddr, &val, __ATOMIC_RELAXED); ATOMIC_MMU_CLEANUP; return val; @@ -87,6 +122,8 @@ void ATOMIC_NAME(st)(CPUArchState *env, target_ulong addr, { ATOMIC_MMU_DECLS; DATA_TYPE *haddr = ATOMIC_MMU_LOOKUP; + + ATOMIC_TRACE_ST; __atomic_store(haddr, &val, __ATOMIC_RELAXED); ATOMIC_MMU_CLEANUP; } @@ -96,7 +133,10 @@ ABI_TYPE ATOMIC_NAME(xchg)(CPUArchState *env, target_ulong addr, { ATOMIC_MMU_DECLS; DATA_TYPE *haddr = ATOMIC_MMU_LOOKUP; - DATA_TYPE ret = atomic_xchg__nocheck(haddr, val); + DATA_TYPE ret; + + ATOMIC_TRACE_RMW; + ret = atomic_xchg__nocheck(haddr, val); ATOMIC_MMU_CLEANUP; return ret; } @@ -107,7 +147,10 @@ ABI_TYPE ATOMIC_NAME(X)(CPUArchState *env, target_ulong addr, \ { \ ATOMIC_MMU_DECLS; \ DATA_TYPE *haddr = ATOMIC_MMU_LOOKUP; \ - DATA_TYPE ret = atomic_##X(haddr, val); \ + DATA_TYPE ret; \ + \ + ATOMIC_TRACE_RMW; \ + ret = atomic_##X(haddr, val); \ ATOMIC_MMU_CLEANUP; \ return ret; \ } @@ -126,6 +169,9 @@ GEN_ATOMIC_HELPER(xor_fetch) /* These helpers are, as a whole, full barriers. Within the helper, * the leading barrier is explicit and the trailing barrier is within * cmpxchg primitive. + * + * Trace this load + RMW loop as a single RMW op. This way, regardless + * of CF_PARALLEL's value, we'll trace just a read and a write. */ #define GEN_ATOMIC_HELPER_FN(X, FN, XDATA_TYPE, RET) \ ABI_TYPE ATOMIC_NAME(X)(CPUArchState *env, target_ulong addr, \ @@ -134,6 +180,8 @@ ABI_TYPE ATOMIC_NAME(X)(CPUArchState *env, target_ulong addr, \ ATOMIC_MMU_DECLS; \ XDATA_TYPE *haddr = ATOMIC_MMU_LOOKUP; \ XDATA_TYPE cmp, old, new, val = xval; \ + \ + ATOMIC_TRACE_RMW; \ smp_mb(); \ cmp = atomic_read__nocheck(haddr); \ do { \ @@ -158,6 +206,7 @@ GEN_ATOMIC_HELPER_FN(umax_fetch, MAX, DATA_TYPE, new) #endif /* DATA SIZE >= 16 */ #undef END +#undef MEND #if DATA_SIZE > 1 @@ -165,8 +214,10 @@ GEN_ATOMIC_HELPER_FN(umax_fetch, MAX, DATA_TYPE, new) within the ATOMIC_NAME macro. */ #ifdef HOST_WORDS_BIGENDIAN # define END _le +# define MEND _le #else # define END _be +# define MEND _be #endif ABI_TYPE ATOMIC_NAME(cmpxchg)(CPUArchState *env, target_ulong addr, @@ -174,7 +225,10 @@ ABI_TYPE ATOMIC_NAME(cmpxchg)(CPUArchState *env, target_ulong addr, { ATOMIC_MMU_DECLS; DATA_TYPE *haddr = ATOMIC_MMU_LOOKUP; - DATA_TYPE ret = atomic_cmpxchg__nocheck(haddr, BSWAP(cmpv), BSWAP(newv)); + DATA_TYPE ret; + + ATOMIC_TRACE_RMW; + ret = atomic_cmpxchg__nocheck(haddr, BSWAP(cmpv), BSWAP(newv)); ATOMIC_MMU_CLEANUP; return BSWAP(ret); } @@ -184,6 +238,8 @@ ABI_TYPE ATOMIC_NAME(ld)(CPUArchState *env, target_ulong addr EXTRA_ARGS) { ATOMIC_MMU_DECLS; DATA_TYPE val, *haddr = ATOMIC_MMU_LOOKUP; + + ATOMIC_TRACE_LD; __atomic_load(haddr, &val, __ATOMIC_RELAXED); ATOMIC_MMU_CLEANUP; return BSWAP(val); @@ -194,6 +250,8 @@ void ATOMIC_NAME(st)(CPUArchState *env, target_ulong addr, { ATOMIC_MMU_DECLS; DATA_TYPE *haddr = ATOMIC_MMU_LOOKUP; + + ATOMIC_TRACE_ST; val = BSWAP(val); __atomic_store(haddr, &val, __ATOMIC_RELAXED); ATOMIC_MMU_CLEANUP; @@ -204,7 +262,10 @@ ABI_TYPE ATOMIC_NAME(xchg)(CPUArchState *env, target_ulong addr, { ATOMIC_MMU_DECLS; DATA_TYPE *haddr = ATOMIC_MMU_LOOKUP; - ABI_TYPE ret = atomic_xchg__nocheck(haddr, BSWAP(val)); + ABI_TYPE ret; + + ATOMIC_TRACE_RMW; + ret = atomic_xchg__nocheck(haddr, BSWAP(val)); ATOMIC_MMU_CLEANUP; return BSWAP(ret); } @@ -215,7 +276,10 @@ ABI_TYPE ATOMIC_NAME(X)(CPUArchState *env, target_ulong addr, \ { \ ATOMIC_MMU_DECLS; \ DATA_TYPE *haddr = ATOMIC_MMU_LOOKUP; \ - DATA_TYPE ret = atomic_##X(haddr, BSWAP(val)); \ + DATA_TYPE ret; \ + \ + ATOMIC_TRACE_RMW; \ + ret = atomic_##X(haddr, BSWAP(val)); \ ATOMIC_MMU_CLEANUP; \ return BSWAP(ret); \ } @@ -232,6 +296,9 @@ GEN_ATOMIC_HELPER(xor_fetch) /* These helpers are, as a whole, full barriers. Within the helper, * the leading barrier is explicit and the trailing barrier is within * cmpxchg primitive. + * + * Trace this load + RMW loop as a single RMW op. This way, regardless + * of CF_PARALLEL's value, we'll trace just a read and a write. */ #define GEN_ATOMIC_HELPER_FN(X, FN, XDATA_TYPE, RET) \ ABI_TYPE ATOMIC_NAME(X)(CPUArchState *env, target_ulong addr, \ @@ -240,6 +307,8 @@ ABI_TYPE ATOMIC_NAME(X)(CPUArchState *env, target_ulong addr, \ ATOMIC_MMU_DECLS; \ XDATA_TYPE *haddr = ATOMIC_MMU_LOOKUP; \ XDATA_TYPE ldo, ldn, old, new, val = xval; \ + \ + ATOMIC_TRACE_RMW; \ smp_mb(); \ ldn = atomic_read__nocheck(haddr); \ do { \ @@ -271,11 +340,17 @@ GEN_ATOMIC_HELPER_FN(add_fetch, ADD, DATA_TYPE, new) #endif /* DATA_SIZE >= 16 */ #undef END +#undef MEND #endif /* DATA_SIZE > 1 */ +#undef ATOMIC_TRACE_ST +#undef ATOMIC_TRACE_LD +#undef ATOMIC_TRACE_RMW + #undef BSWAP #undef ABI_TYPE #undef DATA_TYPE #undef SDATA_TYPE #undef SUFFIX #undef DATA_SIZE +#undef SHIFT diff --git a/accel/tcg/translate-all.c b/accel/tcg/translate-all.c index f0c3fd4d03..ad997cba28 100644 --- a/accel/tcg/translate-all.c +++ b/accel/tcg/translate-all.c @@ -191,7 +191,7 @@ struct page_collection { /* Make sure all possible CPU event bits fit in tb->trace_vcpu_dstate */ QEMU_BUILD_BUG_ON(CPU_TRACE_DSTATE_MAX_EVENTS > - sizeof(((TranslationBlock *)0)->trace_vcpu_dstate) + sizeof_field(TranslationBlock, trace_vcpu_dstate) * BITS_PER_BYTE); /* diff --git a/block/file-posix.c b/block/file-posix.c index 07bb061fe4..43b963b13e 100644 --- a/block/file-posix.c +++ b/block/file-posix.c @@ -545,11 +545,17 @@ static int raw_open_common(BlockDriverState *bs, QDict *options, #ifdef CONFIG_LINUX_AIO /* Currently Linux does AIO only for files opened with O_DIRECT */ - if (s->use_linux_aio && !(s->open_flags & O_DIRECT)) { - error_setg(errp, "aio=native was specified, but it requires " - "cache.direct=on, which was not specified."); - ret = -EINVAL; - goto fail; + if (s->use_linux_aio) { + if (!(s->open_flags & O_DIRECT)) { + error_setg(errp, "aio=native was specified, but it requires " + "cache.direct=on, which was not specified."); + ret = -EINVAL; + goto fail; + } + if (!aio_setup_linux_aio(bdrv_get_aio_context(bs), errp)) { + error_prepend(errp, "Unable to use native AIO: "); + goto fail; + } } #else if (s->use_linux_aio) { @@ -1723,6 +1729,22 @@ static BlockAIOCB *raw_aio_flush(BlockDriverState *bs, return paio_submit(bs, s->fd, 0, NULL, 0, cb, opaque, QEMU_AIO_FLUSH); } +static void raw_aio_attach_aio_context(BlockDriverState *bs, + AioContext *new_context) +{ +#ifdef CONFIG_LINUX_AIO + BDRVRawState *s = bs->opaque; + if (s->use_linux_aio) { + Error *local_err; + if (!aio_setup_linux_aio(new_context, &local_err)) { + error_reportf_err(local_err, "Unable to use native AIO, " + "falling back to thread pool: "); + s->use_linux_aio = false; + } + } +#endif +} + static void raw_close(BlockDriverState *bs) { BDRVRawState *s = bs->opaque; @@ -2601,6 +2623,7 @@ BlockDriver bdrv_file = { .bdrv_refresh_limits = raw_refresh_limits, .bdrv_io_plug = raw_aio_plug, .bdrv_io_unplug = raw_aio_unplug, + .bdrv_attach_aio_context = raw_aio_attach_aio_context, .bdrv_truncate = raw_truncate, .bdrv_getlength = raw_getlength, diff --git a/block/linux-aio.c b/block/linux-aio.c index 88b8d55ec7..19eb922fdd 100644 --- a/block/linux-aio.c +++ b/block/linux-aio.c @@ -15,6 +15,7 @@ #include "block/raw-aio.h" #include "qemu/event_notifier.h" #include "qemu/coroutine.h" +#include "qapi/error.h" #include <libaio.h> @@ -470,16 +471,21 @@ void laio_attach_aio_context(LinuxAioState *s, AioContext *new_context) qemu_laio_poll_cb); } -LinuxAioState *laio_init(void) +LinuxAioState *laio_init(Error **errp) { + int rc; LinuxAioState *s; s = g_malloc0(sizeof(*s)); - if (event_notifier_init(&s->e, false) < 0) { + rc = event_notifier_init(&s->e, false); + if (rc < 0) { + error_setg_errno(errp, -rc, "failed to to initialize event notifier"); goto out_free_state; } - if (io_setup(MAX_EVENTS, &s->ctx) != 0) { + rc = io_setup(MAX_EVENTS, &s->ctx); + if (rc < 0) { + error_setg_errno(errp, -rc, "failed to create linux AIO context"); goto out_close_efd; } diff --git a/docs/devel/tracing.txt b/docs/devel/tracing.txt index 07abbb345c..6f815ecbd7 100644 --- a/docs/devel/tracing.txt +++ b/docs/devel/tracing.txt @@ -104,6 +104,11 @@ Trace events should use types as follows: * For everything else, use primitive scalar types (char, int, long) with the appropriate signedness. + * Avoid floating point types (float and double) because SystemTap does not + support them. In most cases it is possible to round to an integer type + instead. This may require scaling the value first by multiplying it by 1000 + or the like when digits after the decimal point need to be preserved. + Format strings should reflect the types defined in the trace event. Take special care to use PRId64 and PRIu64 for int64_t and uint64_t types, respectively. This ensures portability between 32- and 64-bit platforms. @@ -1930,7 +1930,7 @@ static ram_addr_t find_ram_offset(ram_addr_t size) return offset; } -unsigned long last_ram_page(void) +static unsigned long last_ram_page(void) { RAMBlock *block; ram_addr_t last = 0; @@ -234,6 +234,8 @@ void hmp_info_migrate(Monitor *mon, const QDict *qdict) info->ram->dirty_sync_count); monitor_printf(mon, "page size: %" PRIu64 " kbytes\n", info->ram->page_size >> 10); + monitor_printf(mon, "multifd bytes: %" PRIu64 " kbytes\n", + info->ram->multifd_bytes >> 10); if (info->ram->dirty_pages_rate) { monitor_printf(mon, "dirty pages rate: %" PRIu64 " pages\n", diff --git a/hw/display/xenfb.c b/hw/display/xenfb.c index f5afcc0358..911291c5c3 100644 --- a/hw/display/xenfb.c +++ b/hw/display/xenfb.c @@ -525,8 +525,8 @@ static int xenfb_configure_fb(struct XenFB *xenfb, size_t fb_len_lim, int width, int height, int depth, size_t fb_len, int offset, int row_stride) { - size_t mfn_sz = sizeof(*((struct xenfb_page *)0)->pd); - size_t pd_len = sizeof(((struct xenfb_page *)0)->pd) / mfn_sz; + size_t mfn_sz = sizeof_field(struct xenfb_page, pd[0]); + size_t pd_len = sizeof_field(struct xenfb_page, pd) / mfn_sz; size_t fb_pages = pd_len * XC_PAGE_SIZE / mfn_sz; size_t fb_len_max = fb_pages * XC_PAGE_SIZE; int max_width, max_height; diff --git a/hw/mips/boston.c b/hw/mips/boston.c index 52cce19766..14e6f955d2 100644 --- a/hw/mips/boston.c +++ b/hw/mips/boston.c @@ -471,8 +471,7 @@ static void boston_mach_init(MachineState *machine) sysbus_mmio_map_overlap(SYS_BUS_DEVICE(s->cps), 0, 0, 1); flash = g_new(MemoryRegion, 1); - memory_region_init_rom_nomigrate(flash, NULL, - "boston.flash", 128 * M_BYTE, &err); + memory_region_init_rom(flash, NULL, "boston.flash", 128 * M_BYTE, &err); memory_region_add_subregion_overlap(sys_mem, 0x18000000, flash, 0); ddr = g_new(MemoryRegion, 1); diff --git a/hw/mips/mips_malta.c b/hw/mips/mips_malta.c index 494f84e290..b9d92bf47e 100644 --- a/hw/mips/mips_malta.c +++ b/hw/mips/mips_malta.c @@ -1152,7 +1152,7 @@ void mips_malta_init(MachineState *machine) * handled by an overlapping region as the resulting ROM code subpage * regions are not executable. */ - memory_region_init_ram_nomigrate(bios_copy, NULL, "bios.1fc", BIOS_SIZE, + memory_region_init_ram(bios_copy, NULL, "bios.1fc", BIOS_SIZE, &error_fatal); if (!rom_copy(memory_region_get_ram_ptr(bios_copy), FLASH_ADDRESS, BIOS_SIZE)) { diff --git a/hw/net/rocker/rocker_of_dpa.c b/hw/net/rocker/rocker_of_dpa.c index 60046720a5..8e347d1ee4 100644 --- a/hw/net/rocker/rocker_of_dpa.c +++ b/hw/net/rocker/rocker_of_dpa.c @@ -104,7 +104,7 @@ typedef struct of_dpa_flow_key { /* Width of key which includes field 'f' in u64s, rounded up */ #define FLOW_KEY_WIDTH(f) \ - DIV_ROUND_UP(offsetof(OfDpaFlowKey, f) + sizeof(((OfDpaFlowKey *)0)->f), \ + DIV_ROUND_UP(offsetof(OfDpaFlowKey, f) + sizeof_field(OfDpaFlowKey, f), \ sizeof(uint64_t)) typedef struct of_dpa_flow_action { diff --git a/hw/net/virtio-net.c b/hw/net/virtio-net.c index 90502fca7c..f154756e85 100644 --- a/hw/net/virtio-net.c +++ b/hw/net/virtio-net.c @@ -46,7 +46,7 @@ * 'container'. */ #define endof(container, field) \ - (offsetof(container, field) + sizeof(((container *)0)->field)) + (offsetof(container, field) + sizeof_field(container, field)) typedef struct VirtIOFeature { uint64_t flags; diff --git a/hw/pci-host/xilinx-pcie.c b/hw/pci-host/xilinx-pcie.c index 044e312dc1..b0a31b917d 100644 --- a/hw/pci-host/xilinx-pcie.c +++ b/hw/pci-host/xilinx-pcie.c @@ -120,9 +120,8 @@ static void xilinx_pcie_host_realize(DeviceState *dev, Error **errp) memory_region_init(&s->mmio, OBJECT(s), "mmio", UINT64_MAX); memory_region_set_enabled(&s->mmio, false); - /* dummy I/O region */ - memory_region_init_ram_nomigrate(&s->io, OBJECT(s), "io", 16, NULL); - memory_region_set_enabled(&s->io, false); + /* dummy PCI I/O region (not visible to the CPU) */ + memory_region_init(&s->io, OBJECT(s), "io", 16); /* interrupt out */ qdev_init_gpio_out_named(dev, &s->irq, "interrupt_out", 1); diff --git a/include/block/aio.h b/include/block/aio.h index ae6f354e6c..f08630c6e5 100644 --- a/include/block/aio.h +++ b/include/block/aio.h @@ -381,6 +381,9 @@ GSource *aio_get_g_source(AioContext *ctx); /* Return the ThreadPool bound to this AioContext */ struct ThreadPool *aio_get_thread_pool(AioContext *ctx); +/* Setup the LinuxAioState bound to this AioContext */ +struct LinuxAioState *aio_setup_linux_aio(AioContext *ctx, Error **errp); + /* Return the LinuxAioState bound to this AioContext */ struct LinuxAioState *aio_get_linux_aio(AioContext *ctx); diff --git a/include/block/raw-aio.h b/include/block/raw-aio.h index 0e717fd475..8d698ccd31 100644 --- a/include/block/raw-aio.h +++ b/include/block/raw-aio.h @@ -43,7 +43,7 @@ /* linux-aio.c - Linux native implementation */ #ifdef CONFIG_LINUX_AIO typedef struct LinuxAioState LinuxAioState; -LinuxAioState *laio_init(void); +LinuxAioState *laio_init(Error **errp); void laio_cleanup(LinuxAioState *s); int coroutine_fn laio_co_submit(BlockDriverState *bs, LinuxAioState *s, int fd, uint64_t offset, QEMUIOVector *qiov, int type); diff --git a/include/exec/cpu_ldst_useronly_template.h b/include/exec/cpu_ldst_useronly_template.h index c168f31bba..e30e58ed4a 100644 --- a/include/exec/cpu_ldst_useronly_template.h +++ b/include/exec/cpu_ldst_useronly_template.h @@ -33,20 +33,24 @@ #define SUFFIX q #define USUFFIX q #define DATA_TYPE uint64_t +#define SHIFT 3 #elif DATA_SIZE == 4 #define SUFFIX l #define USUFFIX l #define DATA_TYPE uint32_t +#define SHIFT 2 #elif DATA_SIZE == 2 #define SUFFIX w #define USUFFIX uw #define DATA_TYPE uint16_t #define DATA_STYPE int16_t +#define SHIFT 1 #elif DATA_SIZE == 1 #define SUFFIX b #define USUFFIX ub #define DATA_TYPE uint8_t #define DATA_STYPE int8_t +#define SHIFT 0 #else #error unsupported data size #endif @@ -63,7 +67,7 @@ glue(glue(cpu_ld, USUFFIX), MEMSUFFIX)(CPUArchState *env, target_ulong ptr) #if !defined(CODE_ACCESS) trace_guest_mem_before_exec( ENV_GET_CPU(env), ptr, - trace_mem_build_info(DATA_SIZE, false, MO_TE, false)); + trace_mem_build_info(SHIFT, false, MO_TE, false)); #endif return glue(glue(ld, USUFFIX), _p)(g2h(ptr)); } @@ -87,7 +91,7 @@ glue(glue(cpu_lds, SUFFIX), MEMSUFFIX)(CPUArchState *env, target_ulong ptr) #if !defined(CODE_ACCESS) trace_guest_mem_before_exec( ENV_GET_CPU(env), ptr, - trace_mem_build_info(DATA_SIZE, true, MO_TE, false)); + trace_mem_build_info(SHIFT, true, MO_TE, false)); #endif return glue(glue(lds, SUFFIX), _p)(g2h(ptr)); } @@ -113,7 +117,7 @@ glue(glue(cpu_st, SUFFIX), MEMSUFFIX)(CPUArchState *env, target_ulong ptr, #if !defined(CODE_ACCESS) trace_guest_mem_before_exec( ENV_GET_CPU(env), ptr, - trace_mem_build_info(DATA_SIZE, false, MO_TE, true)); + trace_mem_build_info(SHIFT, false, MO_TE, true)); #endif glue(glue(st, SUFFIX), _p)(g2h(ptr), v); } @@ -136,3 +140,4 @@ glue(glue(glue(cpu_st, SUFFIX), MEMSUFFIX), _ra)(CPUArchState *env, #undef SUFFIX #undef USUFFIX #undef DATA_SIZE +#undef SHIFT diff --git a/include/exec/ram_addr.h b/include/exec/ram_addr.h index cf2446a176..33c361cad5 100644 --- a/include/exec/ram_addr.h +++ b/include/exec/ram_addr.h @@ -71,7 +71,6 @@ static inline unsigned long int ramblock_recv_bitmap_offset(void *host_addr, } long qemu_getrampagesize(void); -unsigned long last_ram_page(void); RAMBlock *qemu_ram_alloc_from_file(ram_addr_t size, MemoryRegion *mr, bool share, const char *mem_path, Error **errp); diff --git a/include/hw/xen/io/ring.h b/include/hw/xen/io/ring.h index abbca47687..ffa3ebadc8 100644 --- a/include/hw/xen/io/ring.h +++ b/include/hw/xen/io/ring.h @@ -65,7 +65,7 @@ typedef unsigned int RING_IDX; */ #define __CONST_RING_SIZE(_s, _sz) \ (__RD32(((_sz) - offsetof(struct _s##_sring, ring)) / \ - sizeof(((struct _s##_sring *)0)->ring[0]))) + sizeof_field(struct _s##_sring, ring[0]))) /* * The same for passing in an actual pointer instead of a name tag. */ diff --git a/include/qemu/compiler.h b/include/qemu/compiler.h index 9f762695d1..5843812710 100644 --- a/include/qemu/compiler.h +++ b/include/qemu/compiler.h @@ -64,6 +64,8 @@ (type *) ((char *) __mptr - offsetof(type, member));}) #endif +#define sizeof_field(type, field) sizeof(((type *)0)->field) + /* Convert from a base type to a parent type, with compile time checking. */ #ifdef __GNUC__ #define DO_UPCAST(type, field, dev) ( __extension__ ( { \ diff --git a/migration/migration.c b/migration/migration.c index e1eaa97df4..94d71f8b24 100644 --- a/migration/migration.c +++ b/migration/migration.c @@ -518,11 +518,12 @@ void migration_ioc_process_incoming(QIOChannel *ioc) */ bool migration_has_all_channels(void) { + MigrationIncomingState *mis = migration_incoming_get_current(); bool all_channels; all_channels = multifd_recv_all_channels_created(); - return all_channels; + return all_channels && mis->from_src_file != NULL; } /* @@ -708,6 +709,7 @@ static void populate_ram_info(MigrationInfo *info, MigrationState *s) info->ram->dirty_sync_count = ram_counters.dirty_sync_count; info->ram->postcopy_requests = ram_counters.postcopy_requests; info->ram->page_size = qemu_target_page_size(); + info->ram->multifd_bytes = ram_counters.multifd_bytes; if (migrate_use_xbzrle()) { info->has_xbzrle_cache = true; @@ -2704,10 +2706,17 @@ static MigThrError migration_detect_error(MigrationState *s) } } +/* How many bytes have we transferred since the beggining of the migration */ +static uint64_t migration_total_bytes(MigrationState *s) +{ + return qemu_ftell(s->to_dst_file) + ram_counters.multifd_bytes; +} + static void migration_calculate_complete(MigrationState *s) { - uint64_t bytes = qemu_ftell(s->to_dst_file); + uint64_t bytes = migration_total_bytes(s); int64_t end_time = qemu_clock_get_ms(QEMU_CLOCK_REALTIME); + int64_t transfer_time; s->total_time = end_time - s->start_time; if (!s->downtime) { @@ -2718,8 +2727,9 @@ static void migration_calculate_complete(MigrationState *s) s->downtime = end_time - s->downtime_start; } - if (s->total_time) { - s->mbps = ((double) bytes * 8.0) / s->total_time / 1000; + transfer_time = s->total_time - s->setup_time; + if (transfer_time) { + s->mbps = ((double) bytes * 8.0) / transfer_time / 1000; } } @@ -2727,13 +2737,15 @@ static void migration_update_counters(MigrationState *s, int64_t current_time) { uint64_t transferred, time_spent; + uint64_t current_bytes; /* bytes transferred since the beginning */ double bandwidth; if (current_time < s->iteration_start_time + BUFFER_DELAY) { return; } - transferred = qemu_ftell(s->to_dst_file) - s->iteration_initial_bytes; + current_bytes = migration_total_bytes(s); + transferred = current_bytes - s->iteration_initial_bytes; time_spent = current_time - s->iteration_start_time; bandwidth = (double)transferred / time_spent; s->threshold_size = bandwidth * s->parameters.downtime_limit; @@ -2752,7 +2764,7 @@ static void migration_update_counters(MigrationState *s, qemu_file_reset_rate_limit(s->to_dst_file); s->iteration_start_time = current_time; - s->iteration_initial_bytes = qemu_ftell(s->to_dst_file); + s->iteration_initial_bytes = current_bytes; trace_migrate_transferred(transferred, time_spent, bandwidth, s->threshold_size); diff --git a/migration/postcopy-ram.c b/migration/postcopy-ram.c index 48e51556a7..932f188949 100644 --- a/migration/postcopy-ram.c +++ b/migration/postcopy-ram.c @@ -500,7 +500,7 @@ static int cleanup_range(const char *block_name, void *host_addr, * postcopy later; must be called prior to any precopy. * called from arch_init's similarly named ram_postcopy_incoming_init */ -int postcopy_ram_incoming_init(MigrationIncomingState *mis, size_t ram_pages) +int postcopy_ram_incoming_init(MigrationIncomingState *mis) { if (qemu_ram_foreach_migratable_block(init_range, NULL)) { return -1; @@ -1265,7 +1265,7 @@ bool postcopy_ram_supported_by_host(MigrationIncomingState *mis) return false; } -int postcopy_ram_incoming_init(MigrationIncomingState *mis, size_t ram_pages) +int postcopy_ram_incoming_init(MigrationIncomingState *mis) { error_report("postcopy_ram_incoming_init: No OS support"); return -1; diff --git a/migration/postcopy-ram.h b/migration/postcopy-ram.h index d900d9c34f..9d55536fd1 100644 --- a/migration/postcopy-ram.h +++ b/migration/postcopy-ram.h @@ -27,7 +27,7 @@ int postcopy_ram_enable_notify(MigrationIncomingState *mis); * postcopy later; must be called prior to any precopy. * called from ram.c's similarly named ram_postcopy_incoming_init */ -int postcopy_ram_incoming_init(MigrationIncomingState *mis, size_t ram_pages); +int postcopy_ram_incoming_init(MigrationIncomingState *mis); /* * At the end of a migration where postcopy_ram_incoming_init was called. diff --git a/migration/ram.c b/migration/ram.c index cd5f55117d..1cd98d6398 100644 --- a/migration/ram.c +++ b/migration/ram.c @@ -55,6 +55,7 @@ #include "sysemu/sysemu.h" #include "qemu/uuid.h" #include "savevm.h" +#include "qemu/iov.h" /***********************************************************/ /* ram save/restore */ @@ -510,6 +511,8 @@ exit: #define MULTIFD_MAGIC 0x11223344U #define MULTIFD_VERSION 1 +#define MULTIFD_FLAG_SYNC (1 << 0) + typedef struct { uint32_t magic; uint32_t version; @@ -518,6 +521,31 @@ typedef struct { } __attribute__((packed)) MultiFDInit_t; typedef struct { + uint32_t magic; + uint32_t version; + uint32_t flags; + uint32_t size; + uint32_t used; + uint64_t packet_num; + char ramblock[256]; + uint64_t offset[]; +} __attribute__((packed)) MultiFDPacket_t; + +typedef struct { + /* number of used pages */ + uint32_t used; + /* number of allocated pages */ + uint32_t allocated; + /* global number of generated multifd packets */ + uint64_t packet_num; + /* offset of each page */ + ram_addr_t *offset; + /* pointer to each page */ + struct iovec *iov; + RAMBlock *block; +} MultiFDPages_t; + +typedef struct { /* this fields are not changed once the thread is created */ /* channel number */ uint8_t id; @@ -535,6 +563,25 @@ typedef struct { bool running; /* should this thread finish */ bool quit; + /* thread has work to do */ + int pending_job; + /* array of pages to sent */ + MultiFDPages_t *pages; + /* packet allocated len */ + uint32_t packet_len; + /* pointer to the packet */ + MultiFDPacket_t *packet; + /* multifd flags for each packet */ + uint32_t flags; + /* global number of generated multifd packets */ + uint64_t packet_num; + /* thread local variables */ + /* packets sent through this channel */ + uint64_t num_packets; + /* pages sent through this channel */ + uint64_t num_pages; + /* syncs main thread and channels */ + QemuSemaphore sem_sync; } MultiFDSendParams; typedef struct { @@ -547,14 +594,27 @@ typedef struct { QemuThread thread; /* communication channel */ QIOChannel *c; - /* sem where to wait for more work */ - QemuSemaphore sem; /* this mutex protects the following parameters */ QemuMutex mutex; /* is this channel thread running */ bool running; - /* should this thread finish */ - bool quit; + /* array of pages to receive */ + MultiFDPages_t *pages; + /* packet allocated len */ + uint32_t packet_len; + /* pointer to the packet */ + MultiFDPacket_t *packet; + /* multifd flags for each packet */ + uint32_t flags; + /* global number of generated multifd packets */ + uint64_t packet_num; + /* thread local variables */ + /* packets sent through this channel */ + uint64_t num_packets; + /* pages sent through this channel */ + uint64_t num_pages; + /* syncs main thread and channels */ + QemuSemaphore sem_sync; } MultiFDRecvParams; static int multifd_send_initial_packet(MultiFDSendParams *p, Error **errp) @@ -619,12 +679,211 @@ static int multifd_recv_initial_packet(QIOChannel *c, Error **errp) return msg.id; } +static MultiFDPages_t *multifd_pages_init(size_t size) +{ + MultiFDPages_t *pages = g_new0(MultiFDPages_t, 1); + + pages->allocated = size; + pages->iov = g_new0(struct iovec, size); + pages->offset = g_new0(ram_addr_t, size); + + return pages; +} + +static void multifd_pages_clear(MultiFDPages_t *pages) +{ + pages->used = 0; + pages->allocated = 0; + pages->packet_num = 0; + pages->block = NULL; + g_free(pages->iov); + pages->iov = NULL; + g_free(pages->offset); + pages->offset = NULL; + g_free(pages); +} + +static void multifd_send_fill_packet(MultiFDSendParams *p) +{ + MultiFDPacket_t *packet = p->packet; + int i; + + packet->magic = cpu_to_be32(MULTIFD_MAGIC); + packet->version = cpu_to_be32(MULTIFD_VERSION); + packet->flags = cpu_to_be32(p->flags); + packet->size = cpu_to_be32(migrate_multifd_page_count()); + packet->used = cpu_to_be32(p->pages->used); + packet->packet_num = cpu_to_be64(p->packet_num); + + if (p->pages->block) { + strncpy(packet->ramblock, p->pages->block->idstr, 256); + } + + for (i = 0; i < p->pages->used; i++) { + packet->offset[i] = cpu_to_be64(p->pages->offset[i]); + } +} + +static int multifd_recv_unfill_packet(MultiFDRecvParams *p, Error **errp) +{ + MultiFDPacket_t *packet = p->packet; + RAMBlock *block; + int i; + + be32_to_cpus(&packet->magic); + if (packet->magic != MULTIFD_MAGIC) { + error_setg(errp, "multifd: received packet " + "magic %x and expected magic %x", + packet->magic, MULTIFD_MAGIC); + return -1; + } + + be32_to_cpus(&packet->version); + if (packet->version != MULTIFD_VERSION) { + error_setg(errp, "multifd: received packet " + "version %d and expected version %d", + packet->version, MULTIFD_VERSION); + return -1; + } + + p->flags = be32_to_cpu(packet->flags); + + be32_to_cpus(&packet->size); + if (packet->size > migrate_multifd_page_count()) { + error_setg(errp, "multifd: received packet " + "with size %d and expected maximum size %d", + packet->size, migrate_multifd_page_count()) ; + return -1; + } + + p->pages->used = be32_to_cpu(packet->used); + if (p->pages->used > packet->size) { + error_setg(errp, "multifd: received packet " + "with size %d and expected maximum size %d", + p->pages->used, packet->size) ; + return -1; + } + + p->packet_num = be64_to_cpu(packet->packet_num); + + if (p->pages->used) { + /* make sure that ramblock is 0 terminated */ + packet->ramblock[255] = 0; + block = qemu_ram_block_by_name(packet->ramblock); + if (!block) { + error_setg(errp, "multifd: unknown ram block %s", + packet->ramblock); + return -1; + } + } + + for (i = 0; i < p->pages->used; i++) { + ram_addr_t offset = be64_to_cpu(packet->offset[i]); + + if (offset > (block->used_length - TARGET_PAGE_SIZE)) { + error_setg(errp, "multifd: offset too long " RAM_ADDR_FMT + " (max " RAM_ADDR_FMT ")", + offset, block->max_length); + return -1; + } + p->pages->iov[i].iov_base = block->host + offset; + p->pages->iov[i].iov_len = TARGET_PAGE_SIZE; + } + + return 0; +} + struct { MultiFDSendParams *params; /* number of created threads */ int count; + /* array of pages to sent */ + MultiFDPages_t *pages; + /* syncs main thread and channels */ + QemuSemaphore sem_sync; + /* global number of generated multifd packets */ + uint64_t packet_num; + /* send channels ready */ + QemuSemaphore channels_ready; } *multifd_send_state; +/* + * How we use multifd_send_state->pages and channel->pages? + * + * We create a pages for each channel, and a main one. Each time that + * we need to send a batch of pages we interchange the ones between + * multifd_send_state and the channel that is sending it. There are + * two reasons for that: + * - to not have to do so many mallocs during migration + * - to make easier to know what to free at the end of migration + * + * This way we always know who is the owner of each "pages" struct, + * and we don't need any loocking. It belongs to the migration thread + * or to the channel thread. Switching is safe because the migration + * thread is using the channel mutex when changing it, and the channel + * have to had finish with its own, otherwise pending_job can't be + * false. + */ + +static void multifd_send_pages(void) +{ + int i; + static int next_channel; + MultiFDSendParams *p = NULL; /* make happy gcc */ + MultiFDPages_t *pages = multifd_send_state->pages; + uint64_t transferred; + + qemu_sem_wait(&multifd_send_state->channels_ready); + for (i = next_channel;; i = (i + 1) % migrate_multifd_channels()) { + p = &multifd_send_state->params[i]; + + qemu_mutex_lock(&p->mutex); + if (!p->pending_job) { + p->pending_job++; + next_channel = (i + 1) % migrate_multifd_channels(); + break; + } + qemu_mutex_unlock(&p->mutex); + } + p->pages->used = 0; + + p->packet_num = multifd_send_state->packet_num++; + p->pages->block = NULL; + multifd_send_state->pages = p->pages; + p->pages = pages; + transferred = pages->used * TARGET_PAGE_SIZE + p->packet_len; + ram_counters.multifd_bytes += transferred; + ram_counters.transferred += transferred;; + qemu_mutex_unlock(&p->mutex); + qemu_sem_post(&p->sem); +} + +static void multifd_queue_page(RAMBlock *block, ram_addr_t offset) +{ + MultiFDPages_t *pages = multifd_send_state->pages; + + if (!pages->block) { + pages->block = block; + } + + if (pages->block == block) { + pages->offset[pages->used] = offset; + pages->iov[pages->used].iov_base = block->host + offset; + pages->iov[pages->used].iov_len = TARGET_PAGE_SIZE; + pages->used++; + + if (pages->used < pages->allocated) { + return; + } + } + + multifd_send_pages(); + + if (pages->block != block) { + multifd_queue_page(block, offset); + } +} + static void multifd_send_terminate_threads(Error *err) { int i; @@ -670,33 +929,116 @@ int multifd_save_cleanup(Error **errp) p->c = NULL; qemu_mutex_destroy(&p->mutex); qemu_sem_destroy(&p->sem); + qemu_sem_destroy(&p->sem_sync); g_free(p->name); p->name = NULL; - } + multifd_pages_clear(p->pages); + p->pages = NULL; + p->packet_len = 0; + g_free(p->packet); + p->packet = NULL; + } + qemu_sem_destroy(&multifd_send_state->channels_ready); + qemu_sem_destroy(&multifd_send_state->sem_sync); g_free(multifd_send_state->params); multifd_send_state->params = NULL; + multifd_pages_clear(multifd_send_state->pages); + multifd_send_state->pages = NULL; g_free(multifd_send_state); multifd_send_state = NULL; return ret; } +static void multifd_send_sync_main(void) +{ + int i; + + if (!migrate_use_multifd()) { + return; + } + if (multifd_send_state->pages->used) { + multifd_send_pages(); + } + for (i = 0; i < migrate_multifd_channels(); i++) { + MultiFDSendParams *p = &multifd_send_state->params[i]; + + trace_multifd_send_sync_main_signal(p->id); + + qemu_mutex_lock(&p->mutex); + + p->packet_num = multifd_send_state->packet_num++; + p->flags |= MULTIFD_FLAG_SYNC; + p->pending_job++; + qemu_mutex_unlock(&p->mutex); + qemu_sem_post(&p->sem); + } + for (i = 0; i < migrate_multifd_channels(); i++) { + MultiFDSendParams *p = &multifd_send_state->params[i]; + + trace_multifd_send_sync_main_wait(p->id); + qemu_sem_wait(&multifd_send_state->sem_sync); + } + trace_multifd_send_sync_main(multifd_send_state->packet_num); +} + static void *multifd_send_thread(void *opaque) { MultiFDSendParams *p = opaque; Error *local_err = NULL; + int ret; + + trace_multifd_send_thread_start(p->id); if (multifd_send_initial_packet(p, &local_err) < 0) { goto out; } + /* initial packet */ + p->num_packets = 1; while (true) { + qemu_sem_wait(&p->sem); qemu_mutex_lock(&p->mutex); - if (p->quit) { + + if (p->pending_job) { + uint32_t used = p->pages->used; + uint64_t packet_num = p->packet_num; + uint32_t flags = p->flags; + + multifd_send_fill_packet(p); + p->flags = 0; + p->num_packets++; + p->num_pages += used; + p->pages->used = 0; + qemu_mutex_unlock(&p->mutex); + + trace_multifd_send(p->id, packet_num, used, flags); + + ret = qio_channel_write_all(p->c, (void *)p->packet, + p->packet_len, &local_err); + if (ret != 0) { + break; + } + + ret = qio_channel_writev_all(p->c, p->pages->iov, used, &local_err); + if (ret != 0) { + break; + } + + qemu_mutex_lock(&p->mutex); + p->pending_job--; + qemu_mutex_unlock(&p->mutex); + + if (flags & MULTIFD_FLAG_SYNC) { + qemu_sem_post(&multifd_send_state->sem_sync); + } + qemu_sem_post(&multifd_send_state->channels_ready); + } else if (p->quit) { qemu_mutex_unlock(&p->mutex); break; + } else { + qemu_mutex_unlock(&p->mutex); + /* sometimes there are spurious wakeups */ } - qemu_mutex_unlock(&p->mutex); - qemu_sem_wait(&p->sem); } out: @@ -708,6 +1050,8 @@ out: p->running = false; qemu_mutex_unlock(&p->mutex); + trace_multifd_send_thread_end(p->id, p->num_packets, p->num_pages); + return NULL; } @@ -735,6 +1079,7 @@ static void multifd_new_send_channel_async(QIOTask *task, gpointer opaque) int multifd_save_setup(void) { int thread_count; + uint32_t page_count = migrate_multifd_page_count(); uint8_t i; if (!migrate_use_multifd()) { @@ -744,13 +1089,23 @@ int multifd_save_setup(void) multifd_send_state = g_malloc0(sizeof(*multifd_send_state)); multifd_send_state->params = g_new0(MultiFDSendParams, thread_count); atomic_set(&multifd_send_state->count, 0); + multifd_send_state->pages = multifd_pages_init(page_count); + qemu_sem_init(&multifd_send_state->sem_sync, 0); + qemu_sem_init(&multifd_send_state->channels_ready, 0); + for (i = 0; i < thread_count; i++) { MultiFDSendParams *p = &multifd_send_state->params[i]; qemu_mutex_init(&p->mutex); qemu_sem_init(&p->sem, 0); + qemu_sem_init(&p->sem_sync, 0); p->quit = false; + p->pending_job = 0; p->id = i; + p->pages = multifd_pages_init(page_count); + p->packet_len = sizeof(MultiFDPacket_t) + + sizeof(ram_addr_t) * page_count; + p->packet = g_malloc0(p->packet_len); p->name = g_strdup_printf("multifdsend_%d", i); socket_send_channel_create(multifd_new_send_channel_async, p); } @@ -761,6 +1116,10 @@ struct { MultiFDRecvParams *params; /* number of created threads */ int count; + /* syncs main thread and channels */ + QemuSemaphore sem_sync; + /* global number of generated multifd packets */ + uint64_t packet_num; } *multifd_recv_state; static void multifd_recv_terminate_threads(Error *err) @@ -781,8 +1140,11 @@ static void multifd_recv_terminate_threads(Error *err) MultiFDRecvParams *p = &multifd_recv_state->params[i]; qemu_mutex_lock(&p->mutex); - p->quit = true; - qemu_sem_post(&p->sem); + /* We could arrive here for two reasons: + - normal quit, i.e. everything went fine, just finished + - error quit: We close the channels so the channel threads + finish the qio_channel_read_all_eof() */ + qio_channel_shutdown(p->c, QIO_CHANNEL_SHUTDOWN_BOTH, NULL); qemu_mutex_unlock(&p->mutex); } } @@ -805,10 +1167,16 @@ int multifd_load_cleanup(Error **errp) object_unref(OBJECT(p->c)); p->c = NULL; qemu_mutex_destroy(&p->mutex); - qemu_sem_destroy(&p->sem); + qemu_sem_destroy(&p->sem_sync); g_free(p->name); p->name = NULL; + multifd_pages_clear(p->pages); + p->pages = NULL; + p->packet_len = 0; + g_free(p->packet); + p->packet = NULL; } + qemu_sem_destroy(&multifd_recv_state->sem_sync); g_free(multifd_recv_state->params); multifd_recv_state->params = NULL; g_free(multifd_recv_state); @@ -817,30 +1185,95 @@ int multifd_load_cleanup(Error **errp) return ret; } +static void multifd_recv_sync_main(void) +{ + int i; + + if (!migrate_use_multifd()) { + return; + } + for (i = 0; i < migrate_multifd_channels(); i++) { + MultiFDRecvParams *p = &multifd_recv_state->params[i]; + + trace_multifd_recv_sync_main_wait(p->id); + qemu_sem_wait(&multifd_recv_state->sem_sync); + qemu_mutex_lock(&p->mutex); + if (multifd_recv_state->packet_num < p->packet_num) { + multifd_recv_state->packet_num = p->packet_num; + } + qemu_mutex_unlock(&p->mutex); + } + for (i = 0; i < migrate_multifd_channels(); i++) { + MultiFDRecvParams *p = &multifd_recv_state->params[i]; + + trace_multifd_recv_sync_main_signal(p->id); + qemu_sem_post(&p->sem_sync); + } + trace_multifd_recv_sync_main(multifd_recv_state->packet_num); +} + static void *multifd_recv_thread(void *opaque) { MultiFDRecvParams *p = opaque; + Error *local_err = NULL; + int ret; + + trace_multifd_recv_thread_start(p->id); while (true) { + uint32_t used; + uint32_t flags; + + ret = qio_channel_read_all_eof(p->c, (void *)p->packet, + p->packet_len, &local_err); + if (ret == 0) { /* EOF */ + break; + } + if (ret == -1) { /* Error */ + break; + } + qemu_mutex_lock(&p->mutex); - if (p->quit) { + ret = multifd_recv_unfill_packet(p, &local_err); + if (ret) { qemu_mutex_unlock(&p->mutex); break; } + + used = p->pages->used; + flags = p->flags; + trace_multifd_recv(p->id, p->packet_num, used, flags); + p->num_packets++; + p->num_pages += used; qemu_mutex_unlock(&p->mutex); - qemu_sem_wait(&p->sem); + + ret = qio_channel_readv_all(p->c, p->pages->iov, used, &local_err); + if (ret != 0) { + break; + } + + if (flags & MULTIFD_FLAG_SYNC) { + qemu_sem_post(&multifd_recv_state->sem_sync); + qemu_sem_wait(&p->sem_sync); + } } + if (local_err) { + multifd_recv_terminate_threads(local_err); + } qemu_mutex_lock(&p->mutex); p->running = false; qemu_mutex_unlock(&p->mutex); + trace_multifd_recv_thread_end(p->id, p->num_packets, p->num_pages); + return NULL; } int multifd_load_setup(void) { int thread_count; + uint32_t page_count = migrate_multifd_page_count(); uint8_t i; if (!migrate_use_multifd()) { @@ -850,13 +1283,18 @@ int multifd_load_setup(void) multifd_recv_state = g_malloc0(sizeof(*multifd_recv_state)); multifd_recv_state->params = g_new0(MultiFDRecvParams, thread_count); atomic_set(&multifd_recv_state->count, 0); + qemu_sem_init(&multifd_recv_state->sem_sync, 0); + for (i = 0; i < thread_count; i++) { MultiFDRecvParams *p = &multifd_recv_state->params[i]; qemu_mutex_init(&p->mutex); - qemu_sem_init(&p->sem, 0); - p->quit = false; + qemu_sem_init(&p->sem_sync, 0); p->id = i; + p->pages = multifd_pages_init(page_count); + p->packet_len = sizeof(MultiFDPacket_t) + + sizeof(ram_addr_t) * page_count; + p->packet = g_malloc0(p->packet_len); p->name = g_strdup_printf("multifdrecv_%d", i); } return 0; @@ -894,6 +1332,8 @@ void multifd_recv_new_channel(QIOChannel *ioc) } p->c = ioc; object_ref(OBJECT(ioc)); + /* initial packet */ + p->num_packets = 1; p->running = true; qemu_thread_create(&p->thread, p->name, multifd_recv_thread, p, @@ -1374,6 +1814,15 @@ static int ram_save_page(RAMState *rs, PageSearchStatus *pss, bool last_stage) return pages; } +static int ram_save_multifd_page(RAMState *rs, RAMBlock *block, + ram_addr_t offset) +{ + multifd_queue_page(block, offset); + ram_counters.normal++; + + return 1; +} + static int do_compress_ram_page(QEMUFile *f, z_stream *stream, RAMBlock *block, ram_addr_t offset, uint8_t *source_buf) { @@ -1779,6 +2228,8 @@ static int ram_save_target_page(RAMState *rs, PageSearchStatus *pss, */ if (block == rs->last_sent_block && save_page_use_compression(rs)) { return compress_page_with_multi_thread(rs, block, offset); + } else if (migrate_use_multifd()) { + return ram_save_multifd_page(rs, block, offset); } return ram_save_page(rs, pss, last_stage); @@ -2605,7 +3056,9 @@ static int ram_save_setup(QEMUFile *f, void *opaque) ram_control_before_iterate(f, RAM_CONTROL_SETUP); ram_control_after_iterate(f, RAM_CONTROL_SETUP); + multifd_send_sync_main(); qemu_put_be64(f, RAM_SAVE_FLAG_EOS); + qemu_fflush(f); return 0; } @@ -2685,8 +3138,10 @@ static int ram_save_iterate(QEMUFile *f, void *opaque) */ ram_control_after_iterate(f, RAM_CONTROL_ROUND); + multifd_send_sync_main(); out: qemu_put_be64(f, RAM_SAVE_FLAG_EOS); + qemu_fflush(f); ram_counters.transferred += 8; ret = qemu_file_get_error(f); @@ -2738,7 +3193,9 @@ static int ram_save_complete(QEMUFile *f, void *opaque) rcu_read_unlock(); + multifd_send_sync_main(); qemu_put_be64(f, RAM_SAVE_FLAG_EOS); + qemu_fflush(f); return 0; } @@ -3107,9 +3564,7 @@ static int ram_load_cleanup(void *opaque) */ int ram_postcopy_incoming_init(MigrationIncomingState *mis) { - unsigned long ram_pages = last_ram_page(); - - return postcopy_ram_incoming_init(mis, ram_pages); + return postcopy_ram_incoming_init(mis); } /** @@ -3227,6 +3682,7 @@ static int ram_load_postcopy(QEMUFile *f) break; case RAM_SAVE_FLAG_EOS: /* normal exit */ + multifd_recv_sync_main(); break; default: error_report("Unknown combination of migration flags: %#x" @@ -3415,6 +3871,7 @@ static int ram_load(QEMUFile *f, void *opaque, int version_id) break; case RAM_SAVE_FLAG_EOS: /* normal exit */ + multifd_recv_sync_main(); break; default: if (flags & RAM_SAVE_FLAG_HOOK) { diff --git a/migration/trace-events b/migration/trace-events index 3f67758893..9430f3cbe0 100644 --- a/migration/trace-events +++ b/migration/trace-events @@ -76,6 +76,18 @@ get_queued_page_not_dirty(const char *block_name, uint64_t tmp_offset, unsigned migration_bitmap_sync_start(void) "" migration_bitmap_sync_end(uint64_t dirty_pages) "dirty_pages %" PRIu64 migration_throttle(void) "" +multifd_recv(uint8_t id, uint64_t packet_num, uint32_t used, uint32_t flags) "channel %d packet number %" PRIu64 " pages %d flags 0x%x" +multifd_recv_sync_main(long packet_num) "packet num %ld" +multifd_recv_sync_main_signal(uint8_t id) "channel %d" +multifd_recv_sync_main_wait(uint8_t id) "channel %d" +multifd_recv_thread_end(uint8_t id, uint64_t packets, uint64_t pages) "channel %d packets %" PRIu64 " pages %" PRIu64 +multifd_recv_thread_start(uint8_t id) "%d" +multifd_send(uint8_t id, uint64_t packet_num, uint32_t used, uint32_t flags) "channel %d packet_num %" PRIu64 " pages %d flags 0x%x" +multifd_send_sync_main(long packet_num) "packet num %ld" +multifd_send_sync_main_signal(uint8_t id) "channel %d" +multifd_send_sync_main_wait(uint8_t id) "channel %d" +multifd_send_thread_end(uint8_t id, uint64_t packets, uint64_t pages) "channel %d packets %" PRIu64 " pages %" PRIu64 +multifd_send_thread_start(uint8_t id) "%d" ram_discard_range(const char *rbname, uint64_t start, size_t len) "%s: start: %" PRIx64 " %zx" ram_load_loop(const char *rbname, uint64_t addr, int flags, void *host) "%s: addr: 0x%" PRIx64 " flags: 0x%x host: %p" ram_load_postcopy_loop(uint64_t addr, int flags) "@%" PRIx64 " %x" @@ -133,7 +145,7 @@ migrate_global_state_post_load(const char *state) "loaded state: %s" migrate_global_state_pre_save(const char *state) "saved state: %s" migration_thread_low_pending(uint64_t pending) "%" PRIu64 migrate_state_too_big(void) "" -migrate_transferred(uint64_t tranferred, uint64_t time_spent, double bandwidth, uint64_t size) "transferred %" PRIu64 " time_spent %" PRIu64 " bandwidth %g max_size %" PRId64 +migrate_transferred(uint64_t tranferred, uint64_t time_spent, uint64_t bandwidth, uint64_t size) "transferred %" PRIu64 " time_spent %" PRIu64 " bandwidth %" PRIu64 " max_size %" PRId64 process_incoming_migration_co_end(int ret, int ps) "ret=%d postcopy-state=%d" process_incoming_migration_co_postcopy_end_main(void) "" migration_set_incoming_channel(void *ioc, const char *ioctype) "ioc=%p ioctype=%s" diff --git a/qapi/migration.json b/qapi/migration.json index 1b4c1db670..186e8a7303 100644 --- a/qapi/migration.json +++ b/qapi/migration.json @@ -39,6 +39,8 @@ # @page-size: The number of bytes per page for the various page-based # statistics (since 2.10) # +# @multifd-bytes: The number of bytes sent through multifd (since 3.0) +# # Since: 0.14.0 ## { 'struct': 'MigrationStats', @@ -46,7 +48,8 @@ 'duplicate': 'int', 'skipped': 'int', 'normal': 'int', 'normal-bytes': 'int', 'dirty-pages-rate' : 'int', 'mbps' : 'number', 'dirty-sync-count' : 'int', - 'postcopy-requests' : 'int', 'page-size' : 'int' } } + 'postcopy-requests' : 'int', 'page-size' : 'int', + 'multifd-bytes' : 'uint64' } } ## # @XBZRLECacheStats: diff --git a/qapi/trace-events b/qapi/trace-events index 9e9008a1dc..70e049ea80 100644 --- a/qapi/trace-events +++ b/qapi/trace-events @@ -29,6 +29,6 @@ visit_type_int64(void *v, const char *name, int64_t *obj) "v=%p name=%s obj=%p" visit_type_size(void *v, const char *name, uint64_t *obj) "v=%p name=%s obj=%p" visit_type_bool(void *v, const char *name, bool *obj) "v=%p name=%s obj=%p" visit_type_str(void *v, const char *name, char **obj) "v=%p name=%s obj=%p" -visit_type_number(void *v, const char *name, double *obj) "v=%p name=%s obj=%p" +visit_type_number(void *v, const char *name, void *obj) "v=%p name=%s obj=%p" visit_type_any(void *v, const char *name, void *obj) "v=%p name=%s obj=%p" visit_type_null(void *v, const char *name, void *obj) "v=%p name=%s obj=%p" diff --git a/scripts/tracetool/__init__.py b/scripts/tracetool/__init__.py index b20fac34a3..0e3c9e146c 100644 --- a/scripts/tracetool/__init__.py +++ b/scripts/tracetool/__init__.py @@ -53,8 +53,6 @@ ALLOWED_TYPES = [ "bool", "unsigned", "signed", - "float", - "double", "int8_t", "uint8_t", "int16_t", diff --git a/stubs/linux-aio.c b/stubs/linux-aio.c index ed47bd443c..84d1f784ae 100644 --- a/stubs/linux-aio.c +++ b/stubs/linux-aio.c @@ -21,7 +21,7 @@ void laio_attach_aio_context(LinuxAioState *s, AioContext *new_context) abort(); } -LinuxAioState *laio_init(void) +LinuxAioState *laio_init(Error **errp) { abort(); } diff --git a/target/i386/kvm.c b/target/i386/kvm.c index 2d174f3a91..5116a8a956 100644 --- a/target/i386/kvm.c +++ b/target/i386/kvm.c @@ -1533,7 +1533,7 @@ static int kvm_put_fpu(X86CPU *cpu) #define XSAVE_PKRU 672 #define XSAVE_BYTE_OFFSET(word_offset) \ - ((word_offset) * sizeof(((struct kvm_xsave *)0)->region[0])) + ((word_offset) * sizeof_field(struct kvm_xsave, region[0])) #define ASSERT_OFFSET(word_offset, field) \ QEMU_BUILD_BUG_ON(XSAVE_BYTE_OFFSET(word_offset) != \ diff --git a/target/mips/gdbstub.c b/target/mips/gdbstub.c index 6d1fb70f2c..18e0e6dce4 100644 --- a/target/mips/gdbstub.c +++ b/target/mips/gdbstub.c @@ -39,7 +39,7 @@ int mips_cpu_gdb_read_register(CPUState *cs, uint8_t *mem_buf, int n) return gdb_get_regl(mem_buf, (int32_t)env->active_fpu.fcr0); default: if (env->CP0_Status & (1 << CP0St_FR)) { - return gdb_get_regl(mem_buf, + return gdb_get_reg64(mem_buf, env->active_fpu.fpr[n - 38].d); } else { return gdb_get_regl(mem_buf, @@ -100,6 +100,7 @@ int mips_cpu_gdb_write_register(CPUState *cs, uint8_t *mem_buf, int n) break; default: if (env->CP0_Status & (1 << CP0St_FR)) { + uint64_t tmp = ldq_p(mem_buf); env->active_fpu.fpr[n - 38].d = tmp; } else { env->active_fpu.fpr[n - 38].w[FP_ENDIAN_IDX] = tmp; diff --git a/target/mips/op_helper.c b/target/mips/op_helper.c index 9025f42366..41d3634289 100644 --- a/target/mips/op_helper.c +++ b/target/mips/op_helper.c @@ -2627,6 +2627,9 @@ void helper_ctc1(CPUMIPSState *env, target_ulong arg1, uint32_t fs, uint32_t rt) (env->active_fpu.fcr31 & ~(env->active_fpu.fcr31_rw_bitmask)); break; default: + if (env->insn_flags & ISA_MIPS32R6) { + do_raise_exception(env, EXCP_RI, GETPC()); + } return; } restore_fp_status(env); diff --git a/target/mips/translate.c b/target/mips/translate.c index e57d71e485..20b43c0337 100644 --- a/target/mips/translate.c +++ b/target/mips/translate.c @@ -2112,7 +2112,7 @@ OP_ST_ATOMIC(scd,st64,ld64,0x7); #undef OP_ST_ATOMIC static void gen_base_offset_addr (DisasContext *ctx, TCGv addr, - int base, int16_t offset) + int base, int offset) { if (base == 0) { tcg_gen_movi_tl(addr, offset); @@ -2140,7 +2140,7 @@ static target_ulong pc_relative_pc (DisasContext *ctx) /* Load */ static void gen_ld(DisasContext *ctx, uint32_t opc, - int rt, int base, int16_t offset) + int rt, int base, int offset) { TCGv t0, t1, t2; int mem_idx = ctx->mem_idx; @@ -2337,7 +2337,7 @@ static void gen_ld(DisasContext *ctx, uint32_t opc, /* Store */ static void gen_st (DisasContext *ctx, uint32_t opc, int rt, - int base, int16_t offset) + int base, int offset) { TCGv t0 = tcg_temp_new(); TCGv t1 = tcg_temp_new(); @@ -2433,11 +2433,8 @@ static void gen_st_cond (DisasContext *ctx, uint32_t opc, int rt, /* Load and store */ static void gen_flt_ldst (DisasContext *ctx, uint32_t opc, int ft, - int base, int16_t offset) + TCGv t0) { - TCGv t0 = tcg_temp_new(); - - gen_base_offset_addr(ctx, t0, base, offset); /* Don't do NOP if destination is zero: we must perform the actual memory access. */ switch (opc) { @@ -2480,15 +2477,15 @@ static void gen_flt_ldst (DisasContext *ctx, uint32_t opc, int ft, default: MIPS_INVAL("flt_ldst"); generate_exception_end(ctx, EXCP_RI); - goto out; + break; } - out: - tcg_temp_free(t0); } static void gen_cop1_ldst(DisasContext *ctx, uint32_t op, int rt, int rs, int16_t imm) { + TCGv t0 = tcg_temp_new(); + if (ctx->CP0_Config1 & (1 << CP0C1_FP)) { check_cp1_enabled(ctx); switch (op) { @@ -2497,16 +2494,18 @@ static void gen_cop1_ldst(DisasContext *ctx, uint32_t op, int rt, check_insn(ctx, ISA_MIPS2); /* Fallthrough */ default: - gen_flt_ldst(ctx, op, rt, rs, imm); + gen_base_offset_addr(ctx, t0, rs, imm); + gen_flt_ldst(ctx, op, rt, t0); } } else { generate_exception_err(ctx, EXCP_CpU, 1); } + tcg_temp_free(t0); } /* Arithmetic with immediate operand */ static void gen_arith_imm(DisasContext *ctx, uint32_t opc, - int rt, int rs, int16_t imm) + int rt, int rs, int imm) { target_ulong uimm = (target_long)imm; /* Sign extend to 32/64 bits */ @@ -20713,6 +20712,11 @@ void cpu_state_reset(CPUMIPSState *env) env->CP0_Status |= (1 << CP0St_FR); } + if (env->CP0_Config3 & (1 << CP0C3_ISA)) { + /* microMIPS on reset when Config3.ISA == {1, 3} */ + env->hflags |= MIPS_HFLAG_M16; + } + /* MSA */ if (env->CP0_Config3 & (1 << CP0C3_MSAP)) { msa_reset(env); diff --git a/target/ppc/arch_dump.c b/target/ppc/arch_dump.c index 351a65b22f..cc1460e4e3 100644 --- a/target/ppc/arch_dump.c +++ b/target/ppc/arch_dump.c @@ -210,11 +210,11 @@ static const struct NoteFuncDescStruct { int contents_size; void (*note_contents_func)(NoteFuncArg *arg, PowerPCCPU *cpu); } note_func[] = { - {sizeof(((Note *)0)->contents.prstatus), ppc_write_elf_prstatus}, - {sizeof(((Note *)0)->contents.fpregset), ppc_write_elf_fpregset}, - {sizeof(((Note *)0)->contents.vmxregset), ppc_write_elf_vmxregset}, - {sizeof(((Note *)0)->contents.vsxregset), ppc_write_elf_vsxregset}, - {sizeof(((Note *)0)->contents.speregset), ppc_write_elf_speregset}, + {sizeof_field(Note, contents.prstatus), ppc_write_elf_prstatus}, + {sizeof_field(Note, contents.fpregset), ppc_write_elf_fpregset}, + {sizeof_field(Note, contents.vmxregset), ppc_write_elf_vmxregset}, + {sizeof_field(Note, contents.vsxregset), ppc_write_elf_vsxregset}, + {sizeof_field(Note, contents.speregset), ppc_write_elf_speregset}, { 0, NULL} }; diff --git a/target/s390x/arch_dump.c b/target/s390x/arch_dump.c index 6f61ff95af..c9ef0a6e60 100644 --- a/target/s390x/arch_dump.c +++ b/target/s390x/arch_dump.c @@ -184,20 +184,20 @@ typedef struct NoteFuncDescStruct { } NoteFuncDesc; static const NoteFuncDesc note_core[] = { - {sizeof(((Note *)0)->contents.prstatus), s390x_write_elf64_prstatus}, - {sizeof(((Note *)0)->contents.fpregset), s390x_write_elf64_fpregset}, + {sizeof_field(Note, contents.prstatus), s390x_write_elf64_prstatus}, + {sizeof_field(Note, contents.fpregset), s390x_write_elf64_fpregset}, { 0, NULL} }; static const NoteFuncDesc note_linux[] = { - {sizeof(((Note *)0)->contents.prefix), s390x_write_elf64_prefix}, - {sizeof(((Note *)0)->contents.ctrs), s390x_write_elf64_ctrs}, - {sizeof(((Note *)0)->contents.timer), s390x_write_elf64_timer}, - {sizeof(((Note *)0)->contents.todcmp), s390x_write_elf64_todcmp}, - {sizeof(((Note *)0)->contents.todpreg), s390x_write_elf64_todpreg}, - {sizeof(((Note *)0)->contents.vregslo), s390x_write_elf64_vregslo}, - {sizeof(((Note *)0)->contents.vregshi), s390x_write_elf64_vregshi}, - {sizeof(((Note *)0)->contents.gscb), s390x_write_elf64_gscb}, + {sizeof_field(Note, contents.prefix), s390x_write_elf64_prefix}, + {sizeof_field(Note, contents.ctrs), s390x_write_elf64_ctrs}, + {sizeof_field(Note, contents.timer), s390x_write_elf64_timer}, + {sizeof_field(Note, contents.todcmp), s390x_write_elf64_todcmp}, + {sizeof_field(Note, contents.todpreg), s390x_write_elf64_todpreg}, + {sizeof_field(Note, contents.vregslo), s390x_write_elf64_vregslo}, + {sizeof_field(Note, contents.vregshi), s390x_write_elf64_vregshi}, + {sizeof_field(Note, contents.gscb), s390x_write_elf64_gscb}, { 0, NULL} }; diff --git a/trace/mem-internal.h b/trace/mem-internal.h index ddda934253..f6efaf6d6b 100644 --- a/trace/mem-internal.h +++ b/trace/mem-internal.h @@ -10,37 +10,45 @@ #ifndef TRACE__MEM_INTERNAL_H #define TRACE__MEM_INTERNAL_H -static inline uint8_t trace_mem_get_info(TCGMemOp op, bool store) +#define TRACE_MEM_SZ_SHIFT_MASK 0x7 /* size shift mask */ +#define TRACE_MEM_SE (1ULL << 3) /* sign extended (y/n) */ +#define TRACE_MEM_BE (1ULL << 4) /* big endian (y/n) */ +#define TRACE_MEM_ST (1ULL << 5) /* store (y/n) */ + +static inline uint8_t trace_mem_build_info( + int size_shift, bool sign_extend, TCGMemOp endianness, bool store) { - uint8_t res = op; - bool be = (op & MO_BSWAP) == MO_BE; - - /* remove untraced fields */ - res &= (1ULL << 4) - 1; - /* make endianness absolute */ - res &= ~MO_BSWAP; - if (be) { - res |= 1ULL << 3; + uint8_t res; + + res = size_shift & TRACE_MEM_SZ_SHIFT_MASK; + if (sign_extend) { + res |= TRACE_MEM_SE; + } + if (endianness == MO_BE) { + res |= TRACE_MEM_BE; } - /* add fields */ if (store) { - res |= 1ULL << 4; + res |= TRACE_MEM_ST; } - return res; } -static inline uint8_t trace_mem_build_info( - TCGMemOp size, bool sign_extend, TCGMemOp endianness, bool store) +static inline uint8_t trace_mem_get_info(TCGMemOp op, bool store) { - uint8_t res = 0; - res |= size; - res |= (sign_extend << 2); - if (endianness == MO_BE) { - res |= (1ULL << 3); - } - res |= (store << 4); - return res; + return trace_mem_build_info(op & MO_SIZE, !!(op & MO_SIGN), + op & MO_BSWAP, store); +} + +static inline +uint8_t trace_mem_build_info_no_se_be(int size_shift, bool store) +{ + return trace_mem_build_info(size_shift, false, MO_BE, store); +} + +static inline +uint8_t trace_mem_build_info_no_se_le(int size_shift, bool store) +{ + return trace_mem_build_info(size_shift, false, MO_LE, store); } #endif /* TRACE__MEM_INTERNAL_H */ diff --git a/trace/mem.h b/trace/mem.h index 9c88bcb4e6..2b58196e53 100644 --- a/trace/mem.h +++ b/trace/mem.h @@ -25,7 +25,7 @@ static uint8_t trace_mem_get_info(TCGMemOp op, bool store); * * Return a value for the 'info' argument in guest memory access traces. */ -static uint8_t trace_mem_build_info(TCGMemOp size, bool sign_extend, +static uint8_t trace_mem_build_info(int size_shift, bool sign_extend, TCGMemOp endianness, bool store); diff --git a/util/async.c b/util/async.c index 03f62787f2..05979f8014 100644 --- a/util/async.c +++ b/util/async.c @@ -323,14 +323,22 @@ ThreadPool *aio_get_thread_pool(AioContext *ctx) } #ifdef CONFIG_LINUX_AIO -LinuxAioState *aio_get_linux_aio(AioContext *ctx) +LinuxAioState *aio_setup_linux_aio(AioContext *ctx, Error **errp) { if (!ctx->linux_aio) { - ctx->linux_aio = laio_init(); - laio_attach_aio_context(ctx->linux_aio, ctx); + ctx->linux_aio = laio_init(errp); + if (ctx->linux_aio) { + laio_attach_aio_context(ctx->linux_aio, ctx); + } } return ctx->linux_aio; } + +LinuxAioState *aio_get_linux_aio(AioContext *ctx) +{ + assert(ctx->linux_aio); + return ctx->linux_aio; +} #endif void aio_notify(AioContext *ctx) |