diff options
-rw-r--r-- | arch_init.c | 177 | ||||
-rw-r--r-- | hmp-commands.hx | 16 | ||||
-rw-r--r-- | hmp.c | 14 | ||||
-rw-r--r-- | hmp.h | 1 | ||||
-rw-r--r-- | hw/core/machine.c | 20 | ||||
-rw-r--r-- | hw/i386/pc_piix.c | 1 | ||||
-rw-r--r-- | hw/i386/pc_q35.c | 1 | ||||
-rw-r--r-- | hw/intc/arm_gic_kvm.c | 7 | ||||
-rw-r--r-- | include/hw/boards.h | 1 | ||||
-rw-r--r-- | include/migration/migration.h | 2 | ||||
-rw-r--r-- | include/migration/qemu-file.h | 2 | ||||
-rw-r--r-- | linux-user/arm/target_cpu.h | 15 | ||||
-rw-r--r-- | linux-user/main.c | 2 | ||||
-rw-r--r-- | migration/migration.c | 48 | ||||
-rw-r--r-- | migration/qemu-file.c | 3 | ||||
-rw-r--r-- | migration/rdma.c | 2 | ||||
-rw-r--r-- | qapi-schema.json | 15 | ||||
-rw-r--r-- | qemu-options.hx | 3 | ||||
-rw-r--r-- | qmp-commands.hx | 31 | ||||
-rw-r--r-- | savevm.c | 36 | ||||
-rw-r--r-- | target-arm/helper.c | 222 | ||||
-rw-r--r-- | target-arm/translate.c | 18 | ||||
-rw-r--r-- | tcg/optimize.c | 5 |
23 files changed, 491 insertions, 151 deletions
diff --git a/arch_init.c b/arch_init.c index 691b5e2e91..c3f7d3f56d 100644 --- a/arch_init.c +++ b/arch_init.c @@ -305,34 +305,50 @@ uint64_t xbzrle_mig_pages_overflow(void) return acct_info.xbzrle_overflows; } -static size_t save_block_hdr(QEMUFile *f, RAMBlock *block, ram_addr_t offset, - int cont, int flag) +/* This is the last block that we have visited serching for dirty pages + */ +static RAMBlock *last_seen_block; +/* This is the last block from where we have sent data */ +static RAMBlock *last_sent_block; +static ram_addr_t last_offset; +static unsigned long *migration_bitmap; +static uint64_t migration_dirty_pages; +static uint32_t last_version; +static bool ram_bulk_stage; + +/** + * save_page_header: Write page header to wire + * + * If this is the 1st block, it also writes the block identification + * + * Returns: Number of bytes written + * + * @f: QEMUFile where to send the data + * @block: block that contains the page we want to send + * @offset: offset inside the block for the page + * in the lower bits, it contains flags + */ +static size_t save_page_header(QEMUFile *f, RAMBlock *block, ram_addr_t offset) { size_t size; - qemu_put_be64(f, offset | cont | flag); + if (block == last_sent_block) { + offset |= RAM_SAVE_FLAG_CONTINUE; + } + + qemu_put_be64(f, offset); size = 8; - if (!cont) { + if (block != last_sent_block) { qemu_put_byte(f, strlen(block->idstr)); qemu_put_buffer(f, (uint8_t *)block->idstr, strlen(block->idstr)); size += 1 + strlen(block->idstr); + last_sent_block = block; } return size; } -/* This is the last block that we have visited serching for dirty pages - */ -static RAMBlock *last_seen_block; -/* This is the last block from where we have sent data */ -static RAMBlock *last_sent_block; -static ram_addr_t last_offset; -static unsigned long *migration_bitmap; -static uint64_t migration_dirty_pages; -static uint32_t last_version; -static bool ram_bulk_stage; - /* Update the xbzrle cache to reflect a page that's been sent as all 0. * The important thing is that a stale (not-yet-0'd) page be replaced * by the new data. @@ -353,11 +369,27 @@ static void xbzrle_cache_zero_page(ram_addr_t current_addr) #define ENCODING_FLAG_XBZRLE 0x1 +/** + * save_xbzrle_page: compress and send current page + * + * Returns: 1 means that we wrote the page + * 0 means that page is identical to the one already sent + * -1 means that xbzrle would be longer than normal + * + * @f: QEMUFile where to send the data + * @current_data: + * @current_addr: + * @block: block that contains the page we want to send + * @offset: offset inside the block for the page + * @last_stage: if we are at the completion stage + * @bytes_transferred: increase it with the number of transferred bytes + */ static int save_xbzrle_page(QEMUFile *f, uint8_t **current_data, ram_addr_t current_addr, RAMBlock *block, - ram_addr_t offset, int cont, bool last_stage) + ram_addr_t offset, bool last_stage, + uint64_t *bytes_transferred) { - int encoded_len = 0, bytes_sent = -1; + int encoded_len = 0, bytes_xbzrle; uint8_t *prev_cached_page; if (!cache_is_cached(XBZRLE.cache, current_addr, bitmap_sync_count)) { @@ -404,15 +436,16 @@ static int save_xbzrle_page(QEMUFile *f, uint8_t **current_data, } /* Send XBZRLE based compressed page */ - bytes_sent = save_block_hdr(f, block, offset, cont, RAM_SAVE_FLAG_XBZRLE); + bytes_xbzrle = save_page_header(f, block, offset | RAM_SAVE_FLAG_XBZRLE); qemu_put_byte(f, ENCODING_FLAG_XBZRLE); qemu_put_be16(f, encoded_len); qemu_put_buffer(f, XBZRLE.encoded_buf, encoded_len); - bytes_sent += encoded_len + 1 + 2; + bytes_xbzrle += encoded_len + 1 + 2; acct_info.xbzrle_pages++; - acct_info.xbzrle_bytes += bytes_sent; + acct_info.xbzrle_bytes += bytes_xbzrle; + *bytes_transferred += bytes_xbzrle; - return bytes_sent; + return 1; } static inline @@ -575,55 +608,64 @@ static void migration_bitmap_sync(void) } } -/* +/** * ram_save_page: Send the given page to the stream * - * Returns: Number of bytes written. + * Returns: Number of pages written. + * + * @f: QEMUFile where to send the data + * @block: block that contains the page we want to send + * @offset: offset inside the block for the page + * @last_stage: if we are at the completion stage + * @bytes_transferred: increase it with the number of transferred bytes */ static int ram_save_page(QEMUFile *f, RAMBlock* block, ram_addr_t offset, - bool last_stage) + bool last_stage, uint64_t *bytes_transferred) { - int bytes_sent; - int cont; + int pages = -1; + uint64_t bytes_xmit; ram_addr_t current_addr; MemoryRegion *mr = block->mr; uint8_t *p; int ret; bool send_async = true; - cont = (block == last_sent_block) ? RAM_SAVE_FLAG_CONTINUE : 0; - p = memory_region_get_ram_ptr(mr) + offset; /* In doubt sent page as normal */ - bytes_sent = -1; + bytes_xmit = 0; ret = ram_control_save_page(f, block->offset, - offset, TARGET_PAGE_SIZE, &bytes_sent); + offset, TARGET_PAGE_SIZE, &bytes_xmit); + if (bytes_xmit) { + *bytes_transferred += bytes_xmit; + pages = 1; + } XBZRLE_cache_lock(); current_addr = block->offset + offset; if (ret != RAM_SAVE_CONTROL_NOT_SUPP) { if (ret != RAM_SAVE_CONTROL_DELAYED) { - if (bytes_sent > 0) { + if (bytes_xmit > 0) { acct_info.norm_pages++; - } else if (bytes_sent == 0) { + } else if (bytes_xmit == 0) { acct_info.dup_pages++; } } } else if (is_zero_range(p, TARGET_PAGE_SIZE)) { acct_info.dup_pages++; - bytes_sent = save_block_hdr(f, block, offset, cont, - RAM_SAVE_FLAG_COMPRESS); + *bytes_transferred += save_page_header(f, block, + offset | RAM_SAVE_FLAG_COMPRESS); qemu_put_byte(f, 0); - bytes_sent++; + *bytes_transferred += 1; + pages = 1; /* Must let xbzrle know, otherwise a previous (now 0'd) cached * page would be stale */ xbzrle_cache_zero_page(current_addr); } else if (!ram_bulk_stage && migrate_use_xbzrle()) { - bytes_sent = save_xbzrle_page(f, &p, current_addr, block, - offset, cont, last_stage); + pages = save_xbzrle_page(f, &p, current_addr, block, + offset, last_stage, bytes_transferred); if (!last_stage) { /* Can't send this cached data async, since the cache page * might get updated before it gets to the wire @@ -633,37 +675,44 @@ static int ram_save_page(QEMUFile *f, RAMBlock* block, ram_addr_t offset, } /* XBZRLE overflow or normal page */ - if (bytes_sent == -1) { - bytes_sent = save_block_hdr(f, block, offset, cont, RAM_SAVE_FLAG_PAGE); + if (pages == -1) { + *bytes_transferred += save_page_header(f, block, + offset | RAM_SAVE_FLAG_PAGE); if (send_async) { qemu_put_buffer_async(f, p, TARGET_PAGE_SIZE); } else { qemu_put_buffer(f, p, TARGET_PAGE_SIZE); } - bytes_sent += TARGET_PAGE_SIZE; + *bytes_transferred += TARGET_PAGE_SIZE; + pages = 1; acct_info.norm_pages++; } XBZRLE_cache_unlock(); - return bytes_sent; + return pages; } -/* - * ram_find_and_save_block: Finds a page to send and sends it to f +/** + * ram_find_and_save_block: Finds a dirty page and sends it to f * * Called within an RCU critical section. * - * Returns: The number of bytes written. + * Returns: The number of pages written * 0 means no dirty pages + * + * @f: QEMUFile where to send the data + * @last_stage: if we are at the completion stage + * @bytes_transferred: increase it with the number of transferred bytes */ -static int ram_find_and_save_block(QEMUFile *f, bool last_stage) +static int ram_find_and_save_block(QEMUFile *f, bool last_stage, + uint64_t *bytes_transferred) { RAMBlock *block = last_seen_block; ram_addr_t offset = last_offset; bool complete_round = false; - int bytes_sent = 0; + int pages = 0; MemoryRegion *mr; if (!block) @@ -685,11 +734,11 @@ static int ram_find_and_save_block(QEMUFile *f, bool last_stage) ram_bulk_stage = false; } } else { - bytes_sent = ram_save_page(f, block, offset, last_stage); + pages = ram_save_page(f, block, offset, last_stage, + bytes_transferred); /* if page is unmodified, continue to the next */ - if (bytes_sent > 0) { - last_sent_block = block; + if (pages > 0) { break; } } @@ -697,7 +746,8 @@ static int ram_find_and_save_block(QEMUFile *f, bool last_stage) last_seen_block = block; last_offset = offset; - return bytes_sent; + + return pages; } static uint64_t bytes_transferred; @@ -881,7 +931,7 @@ static int ram_save_iterate(QEMUFile *f, void *opaque) int ret; int i; int64_t t0; - int total_sent = 0; + int pages_sent = 0; rcu_read_lock(); if (ram_list.version != last_version) { @@ -896,14 +946,14 @@ static int ram_save_iterate(QEMUFile *f, void *opaque) t0 = qemu_clock_get_ns(QEMU_CLOCK_REALTIME); i = 0; while ((ret = qemu_file_rate_limit(f)) == 0) { - int bytes_sent; + int pages; - bytes_sent = ram_find_and_save_block(f, false); - /* no more blocks to sent */ - if (bytes_sent == 0) { + pages = ram_find_and_save_block(f, false, &bytes_transferred); + /* no more pages to sent */ + if (pages == 0) { break; } - total_sent += bytes_sent; + pages_sent += pages; acct_info.iterations++; check_guest_throttling(); /* we want to check in the 1st loop, just in case it was the 1st time @@ -929,12 +979,6 @@ static int ram_save_iterate(QEMUFile *f, void *opaque) */ ram_control_after_iterate(f, RAM_CONTROL_ROUND); - bytes_transferred += total_sent; - - /* - * Do not count these 8 bytes into total_sent, so that we can - * return 0 if no page had been dirtied. - */ qemu_put_be64(f, RAM_SAVE_FLAG_EOS); bytes_transferred += 8; @@ -943,7 +987,7 @@ static int ram_save_iterate(QEMUFile *f, void *opaque) return ret; } - return total_sent; + return pages_sent; } /* Called with iothread lock */ @@ -959,14 +1003,13 @@ static int ram_save_complete(QEMUFile *f, void *opaque) /* flush all remaining blocks regardless of rate limiting */ while (true) { - int bytes_sent; + int pages; - bytes_sent = ram_find_and_save_block(f, true); + pages = ram_find_and_save_block(f, true, &bytes_transferred); /* no more blocks to sent */ - if (bytes_sent == 0) { + if (pages == 0) { break; } - bytes_transferred += bytes_sent; } ram_control_after_iterate(f, RAM_CONTROL_FINISH); diff --git a/hmp-commands.hx b/hmp-commands.hx index d5022d843c..9c1e849859 100644 --- a/hmp-commands.hx +++ b/hmp-commands.hx @@ -922,6 +922,22 @@ Cancel the current VM migration. ETEXI { + .name = "migrate_incoming", + .args_type = "uri:s", + .params = "uri", + .help = "Continue an incoming migration from an -incoming defer", + .mhandler.cmd = hmp_migrate_incoming, + }, + +STEXI +@item migrate_incoming @var{uri} +@findex migrate_incoming +Continue an incoming migration using the @var{uri} (that has the same syntax +as the -incoming option). + +ETEXI + + { .name = "migrate_set_cache_size", .args_type = "value:o", .params = "value", @@ -1116,6 +1116,20 @@ void hmp_migrate_cancel(Monitor *mon, const QDict *qdict) qmp_migrate_cancel(NULL); } +void hmp_migrate_incoming(Monitor *mon, const QDict *qdict) +{ + Error *err = NULL; + const char *uri = qdict_get_str(qdict, "uri"); + + qmp_migrate_incoming(uri, &err); + + if (err) { + monitor_printf(mon, "%s\n", error_get_pretty(err)); + error_free(err); + return; + } +} + void hmp_migrate_set_downtime(Monitor *mon, const QDict *qdict) { double value = qdict_get_double(qdict, "value"); @@ -60,6 +60,7 @@ void hmp_snapshot_delete_blkdev_internal(Monitor *mon, const QDict *qdict); void hmp_drive_mirror(Monitor *mon, const QDict *qdict); void hmp_drive_backup(Monitor *mon, const QDict *qdict); void hmp_migrate_cancel(Monitor *mon, const QDict *qdict); +void hmp_migrate_incoming(Monitor *mon, const QDict *qdict); void hmp_migrate_set_downtime(Monitor *mon, const QDict *qdict); void hmp_migrate_set_speed(Monitor *mon, const QDict *qdict); void hmp_migrate_set_capability(Monitor *mon, const QDict *qdict); diff --git a/hw/core/machine.c b/hw/core/machine.c index e3a3e2ab73..cb1185ada4 100644 --- a/hw/core/machine.c +++ b/hw/core/machine.c @@ -254,6 +254,20 @@ static void machine_set_iommu(Object *obj, bool value, Error **errp) ms->iommu = value; } +static void machine_set_suppress_vmdesc(Object *obj, bool value, Error **errp) +{ + MachineState *ms = MACHINE(obj); + + ms->suppress_vmdesc = value; +} + +static bool machine_get_suppress_vmdesc(Object *obj, Error **errp) +{ + MachineState *ms = MACHINE(obj); + + return ms->suppress_vmdesc; +} + static int error_on_sysbus_device(SysBusDevice *sbdev, void *opaque) { error_report("Option '-device %s' cannot be handled by this machine", @@ -377,6 +391,12 @@ static void machine_initfn(Object *obj) object_property_set_description(obj, "iommu", "Set on/off to enable/disable Intel IOMMU (VT-d)", NULL); + object_property_add_bool(obj, "suppress-vmdesc", + machine_get_suppress_vmdesc, + machine_set_suppress_vmdesc, NULL); + object_property_set_description(obj, "suppress-vmdesc", + "Set on to disable self-describing migration", + NULL); /* Register notifier when init is done for sysbus sanity checks */ ms->sysbus_notifier.notify = machine_init_notify; diff --git a/hw/i386/pc_piix.c b/hw/i386/pc_piix.c index 8eab4ba94d..36c69d71ef 100644 --- a/hw/i386/pc_piix.c +++ b/hw/i386/pc_piix.c @@ -335,6 +335,7 @@ static void pc_compat_2_2(MachineState *machine) CPUID_7_0_EBX_HLE | CPUID_7_0_EBX_RTM, 0); x86_cpu_compat_set_features("Broadwell", FEAT_7_0_EBX, CPUID_7_0_EBX_HLE | CPUID_7_0_EBX_RTM, 0); + machine->suppress_vmdesc = true; } static void pc_compat_2_1(MachineState *machine) diff --git a/hw/i386/pc_q35.c b/hw/i386/pc_q35.c index c0f21fe725..bc40537d55 100644 --- a/hw/i386/pc_q35.c +++ b/hw/i386/pc_q35.c @@ -314,6 +314,7 @@ static void pc_compat_2_2(MachineState *machine) CPUID_7_0_EBX_HLE | CPUID_7_0_EBX_RTM, 0); x86_cpu_compat_set_features("Broadwell", FEAT_7_0_EBX, CPUID_7_0_EBX_HLE | CPUID_7_0_EBX_RTM, 0); + machine->suppress_vmdesc = true; } static void pc_compat_2_1(MachineState *machine) diff --git a/hw/intc/arm_gic_kvm.c b/hw/intc/arm_gic_kvm.c index 1ad3eb0ff8..0d207508a0 100644 --- a/hw/intc/arm_gic_kvm.c +++ b/hw/intc/arm_gic_kvm.c @@ -573,6 +573,13 @@ static void kvm_arm_gic_realize(DeviceState *dev, Error **errp) kvm_gic_access(s, KVM_DEV_ARM_VGIC_GRP_NR_IRQS, 0, 0, &numirqs, 1); } + /* Tell the kernel to complete VGIC initialization now */ + if (kvm_gic_supports_attr(s, KVM_DEV_ARM_VGIC_GRP_CTRL, + KVM_DEV_ARM_VGIC_CTRL_INIT)) { + kvm_gic_access(s, KVM_DEV_ARM_VGIC_GRP_CTRL, + KVM_DEV_ARM_VGIC_CTRL_INIT, 0, 0, 1); + } + /* Distributor */ memory_region_init_reservation(&s->iomem, OBJECT(s), "kvm-gic_dist", 0x1000); diff --git a/include/hw/boards.h b/include/hw/boards.h index f44d6f542a..1feea2b176 100644 --- a/include/hw/boards.h +++ b/include/hw/boards.h @@ -143,6 +143,7 @@ struct MachineState { bool usb; char *firmware; bool iommu; + bool suppress_vmdesc; ram_addr_t ram_size; ram_addr_t maxram_size; diff --git a/include/migration/migration.h b/include/migration/migration.h index 703b7d7c4b..5e16af60fd 100644 --- a/include/migration/migration.h +++ b/include/migration/migration.h @@ -169,6 +169,6 @@ void ram_control_load_hook(QEMUFile *f, uint64_t flags); size_t ram_control_save_page(QEMUFile *f, ram_addr_t block_offset, ram_addr_t offset, size_t size, - int *bytes_sent); + uint64_t *bytes_sent); #endif diff --git a/include/migration/qemu-file.h b/include/migration/qemu-file.h index a923cec2a6..94a8c978a4 100644 --- a/include/migration/qemu-file.h +++ b/include/migration/qemu-file.h @@ -82,7 +82,7 @@ typedef size_t (QEMURamSaveFunc)(QEMUFile *f, void *opaque, ram_addr_t block_offset, ram_addr_t offset, size_t size, - int *bytes_sent); + uint64_t *bytes_sent); /* * Stop any read or write (depending on flags) on the underlying diff --git a/linux-user/arm/target_cpu.h b/linux-user/arm/target_cpu.h index d8a534d7b1..6832262e39 100644 --- a/linux-user/arm/target_cpu.h +++ b/linux-user/arm/target_cpu.h @@ -29,7 +29,20 @@ static inline void cpu_clone_regs(CPUARMState *env, target_ulong newsp) static inline void cpu_set_tls(CPUARMState *env, target_ulong newtls) { - env->cp15.tpidrro_el[0] = newtls; + if (access_secure_reg(env)) { + env->cp15.tpidruro_s = newtls; + } else { + env->cp15.tpidrro_el[0] = newtls; + } +} + +static inline target_ulong cpu_get_tls(CPUARMState *env) +{ + if (access_secure_reg(env)) { + return env->cp15.tpidruro_s; + } else { + return env->cp15.tpidrro_el[0]; + } } #endif diff --git a/linux-user/main.c b/linux-user/main.c index 6bd23af2ba..6e446de4dd 100644 --- a/linux-user/main.c +++ b/linux-user/main.c @@ -566,7 +566,7 @@ do_kernel_trap(CPUARMState *env) end_exclusive(); break; case 0xffff0fe0: /* __kernel_get_tls */ - env->regs[0] = env->cp15.tpidrro_el[0]; + env->regs[0] = cpu_get_tls(env); break; case 0xffff0f60: /* __kernel_cmpxchg64 */ arm_kernel_cmpxchg64_helper(env); diff --git a/migration/migration.c b/migration/migration.c index b3adbc653a..2c805f11f5 100644 --- a/migration/migration.c +++ b/migration/migration.c @@ -49,6 +49,8 @@ enum { static NotifierList migration_state_notifiers = NOTIFIER_LIST_INITIALIZER(migration_state_notifiers); +static bool deferred_incoming; + /* When we add fault tolerance, we could have several migrations at once. For now we don't need to add dynamic creation of migration */ @@ -65,25 +67,40 @@ MigrationState *migrate_get_current(void) return ¤t_migration; } +/* + * Called on -incoming with a defer: uri. + * The migration can be started later after any parameters have been + * changed. + */ +static void deferred_incoming_migration(Error **errp) +{ + if (deferred_incoming) { + error_setg(errp, "Incoming migration already deferred"); + } + deferred_incoming = true; +} + void qemu_start_incoming_migration(const char *uri, Error **errp) { const char *p; - if (strstart(uri, "tcp:", &p)) + if (!strcmp(uri, "defer")) { + deferred_incoming_migration(errp); + } else if (strstart(uri, "tcp:", &p)) { tcp_start_incoming_migration(p, errp); #ifdef CONFIG_RDMA - else if (strstart(uri, "rdma:", &p)) + } else if (strstart(uri, "rdma:", &p)) { rdma_start_incoming_migration(p, errp); #endif #if !defined(WIN32) - else if (strstart(uri, "exec:", &p)) + } else if (strstart(uri, "exec:", &p)) { exec_start_incoming_migration(p, errp); - else if (strstart(uri, "unix:", &p)) + } else if (strstart(uri, "unix:", &p)) { unix_start_incoming_migration(p, errp); - else if (strstart(uri, "fd:", &p)) + } else if (strstart(uri, "fd:", &p)) { fd_start_incoming_migration(p, errp); #endif - else { + } else { error_setg(errp, "unknown migration protocol: %s", uri); } } @@ -415,6 +432,25 @@ void migrate_del_blocker(Error *reason) migration_blockers = g_slist_remove(migration_blockers, reason); } +void qmp_migrate_incoming(const char *uri, Error **errp) +{ + Error *local_err = NULL; + + if (!deferred_incoming) { + error_setg(errp, "'-incoming defer' is required for migrate_incoming"); + return; + } + + qemu_start_incoming_migration(uri, &local_err); + + if (local_err) { + error_propagate(errp, local_err); + return; + } + + deferred_incoming = false; +} + void qmp_migrate(const char *uri, bool has_blk, bool blk, bool has_inc, bool inc, bool has_detach, bool detach, Error **errp) diff --git a/migration/qemu-file.c b/migration/qemu-file.c index e66e55712f..1a4f9868ed 100644 --- a/migration/qemu-file.c +++ b/migration/qemu-file.c @@ -161,7 +161,8 @@ void ram_control_load_hook(QEMUFile *f, uint64_t flags) } size_t ram_control_save_page(QEMUFile *f, ram_addr_t block_offset, - ram_addr_t offset, size_t size, int *bytes_sent) + ram_addr_t offset, size_t size, + uint64_t *bytes_sent) { if (f->ops->save_page) { int ret = f->ops->save_page(f, f->opaque, block_offset, diff --git a/migration/rdma.c b/migration/rdma.c index 42d443cc64..d1c19ffdac 100644 --- a/migration/rdma.c +++ b/migration/rdma.c @@ -2654,7 +2654,7 @@ static int qemu_rdma_close(void *opaque) */ static size_t qemu_rdma_save_page(QEMUFile *f, void *opaque, ram_addr_t block_offset, ram_addr_t offset, - size_t size, int *bytes_sent) + size_t size, uint64_t *bytes_sent) { QEMUFileRDMA *rfile = opaque; RDMAContext *rdma = rfile->rdma; diff --git a/qapi-schema.json b/qapi-schema.json index 8141f71fb3..2b3e275983 100644 --- a/qapi-schema.json +++ b/qapi-schema.json @@ -1738,6 +1738,21 @@ { 'command': 'migrate', 'data': {'uri': 'str', '*blk': 'bool', '*inc': 'bool', '*detach': 'bool' } } +## +# @migrate-incoming +# +# Start an incoming migration, the qemu must have been started +# with -incoming defer +# +# @uri: The Uniform Resource Identifier identifying the source or +# address to listen on +# +# Returns: nothing on success +# +# Since: 2.3 +## +{ 'command': 'migrate-incoming', 'data': {'uri': 'str' } } + # @xen-save-devices-state: # # Save the state of all devices to file. The RAM and the block devices diff --git a/qemu-options.hx b/qemu-options.hx index ad07ddecd0..c513352ea8 100644 --- a/qemu-options.hx +++ b/qemu-options.hx @@ -39,7 +39,8 @@ DEF("machine", HAS_ARG, QEMU_OPTION_machine, \ " mem-merge=on|off controls memory merge support (default: on)\n" " iommu=on|off controls emulated Intel IOMMU (VT-d) support (default=off)\n" " aes-key-wrap=on|off controls support for AES key wrapping (default=on)\n" - " dea-key-wrap=on|off controls support for DEA key wrapping (default=on)\n", + " dea-key-wrap=on|off controls support for DEA key wrapping (default=on)\n" + " suppress-vmdesc=on|off disables self-describing migration (default=off)\n", QEMU_ARCH_ALL) STEXI @item -machine [type=]@var{name}[,prop=@var{value}[,...]] diff --git a/qmp-commands.hx b/qmp-commands.hx index c12334a370..06639248f2 100644 --- a/qmp-commands.hx +++ b/qmp-commands.hx @@ -661,7 +661,36 @@ Example: <- { "return": {} } EQMP -{ + + { + .name = "migrate-incoming", + .args_type = "uri:s", + .mhandler.cmd_new = qmp_marshal_input_migrate_incoming, + }, + +SQMP +migrate-incoming +---------------- + +Continue an incoming migration + +Arguments: + +- "uri": Source/listening URI (json-string) + +Example: + +-> { "execute": "migrate-incoming", "arguments": { "uri": "tcp::4446" } } +<- { "return": {} } + +Notes: + +(1) QEMU must be started with -incoming defer to allow migrate-incoming to + be used +(2) The uri format is the same as to -incoming + +EQMP + { .name = "migrate-set-cache-size", .args_type = "value:o", .mhandler.cmd_new = qmp_marshal_input_migrate_set_cache_size, @@ -710,6 +710,12 @@ int qemu_savevm_state_iterate(QEMUFile *f) return ret; } +static bool should_send_vmdesc(void) +{ + MachineState *machine = MACHINE(qdev_get_machine()); + return !machine->suppress_vmdesc; +} + void qemu_savevm_state_complete(QEMUFile *f) { QJSON *vmdesc; @@ -782,9 +788,11 @@ void qemu_savevm_state_complete(QEMUFile *f) qjson_finish(vmdesc); vmdesc_len = strlen(qjson_get_str(vmdesc)); - qemu_put_byte(f, QEMU_VM_VMDESCRIPTION); - qemu_put_be32(f, vmdesc_len); - qemu_put_buffer(f, (uint8_t *)qjson_get_str(vmdesc), vmdesc_len); + if (should_send_vmdesc()) { + qemu_put_byte(f, QEMU_VM_VMDESCRIPTION); + qemu_put_be32(f, vmdesc_len); + qemu_put_buffer(f, (uint8_t *)qjson_get_str(vmdesc), vmdesc_len); + } object_unref(OBJECT(vmdesc)); qemu_fflush(f); @@ -930,6 +938,7 @@ int qemu_loadvm_state(QEMUFile *f) uint8_t section_type; unsigned int v; int ret; + int file_error_after_eof = -1; if (qemu_savevm_state_blocked(&local_err)) { error_report("%s", error_get_pretty(local_err)); @@ -1035,6 +1044,24 @@ int qemu_loadvm_state(QEMUFile *f) } } + file_error_after_eof = qemu_file_get_error(f); + + /* + * Try to read in the VMDESC section as well, so that dumping tools that + * intercept our migration stream have the chance to see it. + */ + if (qemu_get_byte(f) == QEMU_VM_VMDESCRIPTION) { + uint32_t size = qemu_get_be32(f); + uint8_t *buf = g_malloc(0x1000); + + while (size > 0) { + uint32_t read_chunk = MIN(size, 0x1000); + qemu_get_buffer(f, buf, read_chunk); + size -= read_chunk; + } + g_free(buf); + } + cpu_synchronize_all_post_init(); ret = 0; @@ -1046,7 +1073,8 @@ out: } if (ret == 0) { - ret = qemu_file_get_error(f); + /* We may not have a VMDESC section, so ignore relative errors */ + ret = file_error_after_eof; } return ret; diff --git a/target-arm/helper.c b/target-arm/helper.c index 3bc20af04f..10886c5281 100644 --- a/target-arm/helper.c +++ b/target-arm/helper.c @@ -4334,6 +4334,16 @@ static void do_v7m_exception_exit(CPUARMState *env) env->regs[12] = v7m_pop(env); env->regs[14] = v7m_pop(env); env->regs[15] = v7m_pop(env); + if (env->regs[15] & 1) { + qemu_log_mask(LOG_GUEST_ERROR, + "M profile return from interrupt with misaligned " + "PC is UNPREDICTABLE\n"); + /* Actual hardware seems to ignore the lsbit, and there are several + * RTOSes out there which incorrectly assume the r15 in the stack + * frame should be a Thumb-style "lsbit indicates ARM/Thumb" value. + */ + env->regs[15] &= ~1U; + } xpsr = v7m_pop(env); xpsr_write(env, xpsr, 0xfffffdff); /* Undo stack alignment. */ @@ -4903,34 +4913,28 @@ static inline bool regime_is_user(CPUARMState *env, ARMMMUIdx mmu_idx) } } -/* Check section/page access permissions. - Returns the page protection flags, or zero if the access is not - permitted. */ -static inline int check_ap(CPUARMState *env, ARMMMUIdx mmu_idx, - int ap, int domain_prot, - int access_type) +/* Translate section/page access permissions to page + * R/W protection flags + * + * @env: CPUARMState + * @mmu_idx: MMU index indicating required translation regime + * @ap: The 3-bit access permissions (AP[2:0]) + * @domain_prot: The 2-bit domain access permissions + */ +static inline int ap_to_rw_prot(CPUARMState *env, ARMMMUIdx mmu_idx, + int ap, int domain_prot) { - int prot_ro; bool is_user = regime_is_user(env, mmu_idx); if (domain_prot == 3) { return PAGE_READ | PAGE_WRITE; } - if (access_type == 1) { - prot_ro = 0; - } else { - prot_ro = PAGE_READ; - } - switch (ap) { case 0: if (arm_feature(env, ARM_FEATURE_V7)) { return 0; } - if (access_type == 1) { - return 0; - } switch (regime_sctlr(env, mmu_idx) & (SCTLR_S | SCTLR_R)) { case SCTLR_S: return is_user ? 0 : PAGE_READ; @@ -4943,7 +4947,7 @@ static inline int check_ap(CPUARMState *env, ARMMMUIdx mmu_idx, return is_user ? 0 : PAGE_READ | PAGE_WRITE; case 2: if (is_user) { - return prot_ro; + return PAGE_READ; } else { return PAGE_READ | PAGE_WRITE; } @@ -4952,17 +4956,126 @@ static inline int check_ap(CPUARMState *env, ARMMMUIdx mmu_idx, case 4: /* Reserved. */ return 0; case 5: - return is_user ? 0 : prot_ro; + return is_user ? 0 : PAGE_READ; case 6: - return prot_ro; + return PAGE_READ; case 7: if (!arm_feature(env, ARM_FEATURE_V6K)) { return 0; } - return prot_ro; + return PAGE_READ; default: - abort(); + g_assert_not_reached(); + } +} + +/* Translate section/page access permissions to page + * R/W protection flags. + * + * @ap: The 2-bit simple AP (AP[2:1]) + * @is_user: TRUE if accessing from PL0 + */ +static inline int simple_ap_to_rw_prot_is_user(int ap, bool is_user) +{ + switch (ap) { + case 0: + return is_user ? 0 : PAGE_READ | PAGE_WRITE; + case 1: + return PAGE_READ | PAGE_WRITE; + case 2: + return is_user ? 0 : PAGE_READ; + case 3: + return PAGE_READ; + default: + g_assert_not_reached(); + } +} + +static inline int +simple_ap_to_rw_prot(CPUARMState *env, ARMMMUIdx mmu_idx, int ap) +{ + return simple_ap_to_rw_prot_is_user(ap, regime_is_user(env, mmu_idx)); +} + +/* Translate section/page access permissions to protection flags + * + * @env: CPUARMState + * @mmu_idx: MMU index indicating required translation regime + * @is_aa64: TRUE if AArch64 + * @ap: The 2-bit simple AP (AP[2:1]) + * @ns: NS (non-secure) bit + * @xn: XN (execute-never) bit + * @pxn: PXN (privileged execute-never) bit + */ +static int get_S1prot(CPUARMState *env, ARMMMUIdx mmu_idx, bool is_aa64, + int ap, int ns, int xn, int pxn) +{ + bool is_user = regime_is_user(env, mmu_idx); + int prot_rw, user_rw; + bool have_wxn; + int wxn = 0; + + assert(mmu_idx != ARMMMUIdx_S2NS); + + user_rw = simple_ap_to_rw_prot_is_user(ap, true); + if (is_user) { + prot_rw = user_rw; + } else { + prot_rw = simple_ap_to_rw_prot_is_user(ap, false); + } + + if (ns && arm_is_secure(env) && (env->cp15.scr_el3 & SCR_SIF)) { + return prot_rw; + } + + /* TODO have_wxn should be replaced with + * ARM_FEATURE_V8 || (ARM_FEATURE_V7 && ARM_FEATURE_EL2) + * when ARM_FEATURE_EL2 starts getting set. For now we assume all LPAE + * compatible processors have EL2, which is required for [U]WXN. + */ + have_wxn = arm_feature(env, ARM_FEATURE_LPAE); + + if (have_wxn) { + wxn = regime_sctlr(env, mmu_idx) & SCTLR_WXN; } + + if (is_aa64) { + switch (regime_el(env, mmu_idx)) { + case 1: + if (!is_user) { + xn = pxn || (user_rw & PAGE_WRITE); + } + break; + case 2: + case 3: + break; + } + } else if (arm_feature(env, ARM_FEATURE_V7)) { + switch (regime_el(env, mmu_idx)) { + case 1: + case 3: + if (is_user) { + xn = xn || !(user_rw & PAGE_READ); + } else { + int uwxn = 0; + if (have_wxn) { + uwxn = regime_sctlr(env, mmu_idx) & SCTLR_UWXN; + } + xn = xn || !(prot_rw & PAGE_READ) || pxn || + (uwxn && (user_rw & PAGE_WRITE)); + } + break; + case 2: + break; + } + } else { + xn = wxn = 0; + } + + if (xn || (wxn && (prot_rw & PAGE_WRITE))) { + return prot_rw; + } + return prot_rw | PAGE_EXEC; } static bool get_level1_table_address(CPUARMState *env, ARMMMUIdx mmu_idx, @@ -5083,12 +5196,12 @@ static int get_phys_addr_v5(CPUARMState *env, uint32_t address, int access_type, } code = 15; } - *prot = check_ap(env, mmu_idx, ap, domain_prot, access_type); - if (!*prot) { + *prot = ap_to_rw_prot(env, mmu_idx, ap, domain_prot); + *prot |= *prot ? PAGE_EXEC : 0; + if (!(*prot & (1 << access_type))) { /* Access permission fault. */ goto do_fault; } - *prot |= PAGE_EXEC; *phys_ptr = phys_addr; return 0; do_fault: @@ -5197,21 +5310,25 @@ static int get_phys_addr_v6(CPUARMState *env, uint32_t address, int access_type, if (xn && access_type == 2) goto do_fault; - /* The simplified model uses AP[0] as an access control bit. */ - if ((regime_sctlr(env, mmu_idx) & SCTLR_AFE) - && (ap & 1) == 0) { - /* Access flag fault. */ - code = (code == 15) ? 6 : 3; - goto do_fault; + if (arm_feature(env, ARM_FEATURE_V6K) && + (regime_sctlr(env, mmu_idx) & SCTLR_AFE)) { + /* The simplified model uses AP[0] as an access control bit. */ + if ((ap & 1) == 0) { + /* Access flag fault. */ + code = (code == 15) ? 6 : 3; + goto do_fault; + } + *prot = simple_ap_to_rw_prot(env, mmu_idx, ap >> 1); + } else { + *prot = ap_to_rw_prot(env, mmu_idx, ap, domain_prot); + } + if (*prot && !xn) { + *prot |= PAGE_EXEC; } - *prot = check_ap(env, mmu_idx, ap, domain_prot, access_type); - if (!*prot) { + if (!(*prot & (1 << access_type))) { /* Access permission fault. */ goto do_fault; } - if (!xn) { - *prot |= PAGE_EXEC; - } } *phys_ptr = phys_addr; return 0; @@ -5249,8 +5366,8 @@ static int get_phys_addr_lpae(CPUARMState *env, target_ulong address, int32_t granule_sz = 9; int32_t va_size = 32; int32_t tbi = 0; - bool is_user; TCR *tcr = regime_tcr(env, mmu_idx); + int ap, ns, xn, pxn; /* TODO: * This code assumes we're either a 64-bit EL1 or a 32-bit PL1; @@ -5411,7 +5528,7 @@ static int get_phys_addr_lpae(CPUARMState *env, target_ulong address, if (extract32(tableattrs, 2, 1)) { attrs &= ~(1 << 4); } - /* Since we're always in the Non-secure state, NSTable is ignored. */ + attrs |= extract32(tableattrs, 4, 1) << 3; /* NS */ break; } /* Here descaddr is the final physical address, and attributes @@ -5422,31 +5539,18 @@ static int get_phys_addr_lpae(CPUARMState *env, target_ulong address, /* Access flag */ goto do_fault; } + + ap = extract32(attrs, 4, 2); + ns = extract32(attrs, 3, 1); + xn = extract32(attrs, 12, 1); + pxn = extract32(attrs, 11, 1); + + *prot = get_S1prot(env, mmu_idx, va_size == 64, ap, ns, xn, pxn); + fault_type = permission_fault; - is_user = regime_is_user(env, mmu_idx); - if (is_user && !(attrs & (1 << 4))) { - /* Unprivileged access not enabled */ + if (!(*prot & (1 << access_type))) { goto do_fault; } - *prot = PAGE_READ | PAGE_WRITE | PAGE_EXEC; - if ((arm_feature(env, ARM_FEATURE_V8) && is_user && (attrs & (1 << 12))) || - (!arm_feature(env, ARM_FEATURE_V8) && (attrs & (1 << 12))) || - (!is_user && (attrs & (1 << 11)))) { - /* XN/UXN or PXN. Since we only implement EL0/EL1 we unconditionally - * treat XN/UXN as UXN for v8. - */ - if (access_type == 2) { - goto do_fault; - } - *prot &= ~PAGE_EXEC; - } - if (attrs & (1 << 5)) { - /* Write access forbidden */ - if (access_type == 1) { - goto do_fault; - } - *prot &= ~PAGE_WRITE; - } *phys_ptr = descaddr; *page_size_ptr = page_size; diff --git a/target-arm/translate.c b/target-arm/translate.c index 381d89624f..9116529306 100644 --- a/target-arm/translate.c +++ b/target-arm/translate.c @@ -8859,17 +8859,23 @@ static void disas_arm_insn(DisasContext *s, unsigned int insn) case 0x08: case 0x09: { - int j, n, user, loaded_base; + int j, n, loaded_base; + bool exc_return = false; + bool is_load = extract32(insn, 20, 1); + bool user = false; TCGv_i32 loaded_var; /* load/store multiple words */ /* XXX: store correct base if write back */ - user = 0; if (insn & (1 << 22)) { + /* LDM (user), LDM (exception return) and STM (user) */ if (IS_USER(s)) goto illegal_op; /* only usable in supervisor mode */ - if ((insn & (1 << 15)) == 0) - user = 1; + if (is_load && extract32(insn, 15, 1)) { + exc_return = true; + } else { + user = true; + } } rn = (insn >> 16) & 0xf; addr = load_reg(s, rn); @@ -8903,7 +8909,7 @@ static void disas_arm_insn(DisasContext *s, unsigned int insn) j = 0; for(i=0;i<16;i++) { if (insn & (1 << i)) { - if (insn & (1 << 20)) { + if (is_load) { /* load */ tmp = tcg_temp_new_i32(); gen_aa32_ld32u(tmp, addr, get_mem_index(s)); @@ -8968,7 +8974,7 @@ static void disas_arm_insn(DisasContext *s, unsigned int insn) if (loaded_base) { store_reg(s, rn, loaded_var); } - if ((insn & (1 << 22)) && !user) { + if (exc_return) { /* Restore CPSR from SPSR. */ tmp = load_cpu_field(spsr); gen_set_cpsr(tmp, CPSR_ERET_MASK); diff --git a/tcg/optimize.c b/tcg/optimize.c index 067917c396..37c11103a6 100644 --- a/tcg/optimize.c +++ b/tcg/optimize.c @@ -980,8 +980,11 @@ static void tcg_constant_folding(TCGContext *s) if (temps_are_copies(args[1], args[2])) { if (temps_are_copies(args[0], args[1])) { tcg_op_remove(s, op); - } else { + } else if (temps[args[1]].state != TCG_TEMP_CONST) { tcg_opt_gen_mov(s, op, args, opc, args[0], args[1]); + } else { + tcg_opt_gen_movi(s, op, args, opc, + args[0], temps[args[1]].val); } continue; } |