diff options
-rw-r--r-- | migration/migration.c | 36 | ||||
-rw-r--r-- | migration/migration.h | 16 | ||||
-rw-r--r-- | migration/options.c | 9 | ||||
-rw-r--r-- | migration/qemu-file.c | 15 | ||||
-rw-r--r-- | migration/ram.c | 33 | ||||
-rw-r--r-- | migration/rdma.c | 1023 | ||||
-rw-r--r-- | migration/savevm.c | 3 | ||||
-rw-r--r-- | migration/trace-events | 10 | ||||
-rw-r--r-- | qapi/migration.json | 5 | ||||
-rw-r--r-- | tests/qtest/libqtest.c | 33 | ||||
-rw-r--r-- | tests/qtest/libqtest.h | 28 | ||||
-rw-r--r-- | tests/qtest/meson.build | 1 | ||||
-rw-r--r-- | tests/qtest/migration-helpers.c | 60 | ||||
-rw-r--r-- | tests/qtest/migration-helpers.h | 10 | ||||
-rw-r--r-- | tests/qtest/migration-test.c | 39 | ||||
-rw-r--r-- | tests/qtest/virtio-net-failover.c | 77 |
16 files changed, 757 insertions, 641 deletions
diff --git a/migration/migration.c b/migration/migration.c index 585d3c8f55..1c6c81ad49 100644 --- a/migration/migration.c +++ b/migration/migration.c @@ -431,13 +431,16 @@ void migrate_add_address(SocketAddress *address) static void qemu_start_incoming_migration(const char *uri, Error **errp) { const char *p = NULL; + MigrationIncomingState *mis = migration_incoming_get_current(); /* URI is not suitable for migration? */ if (!migration_channels_and_uri_compatible(uri, errp)) { return; } - qapi_event_send_migration(MIGRATION_STATUS_SETUP); + migrate_set_state(&mis->state, MIGRATION_STATUS_NONE, + MIGRATION_STATUS_SETUP); + if (strstart(uri, "tcp:", &p) || strstart(uri, "unix:", NULL) || strstart(uri, "vsock:", NULL)) { @@ -531,7 +534,7 @@ process_incoming_migration_co(void *opaque) mis->largest_page_size = qemu_ram_pagesize_largest(); postcopy_state_set(POSTCOPY_INCOMING_NONE); - migrate_set_state(&mis->state, MIGRATION_STATUS_NONE, + migrate_set_state(&mis->state, MIGRATION_STATUS_SETUP, MIGRATION_STATUS_ACTIVE); mis->loadvm_co = qemu_coroutine_self(); @@ -1057,9 +1060,6 @@ static void fill_source_migration_info(MigrationInfo *info) break; case MIGRATION_STATUS_FAILED: info->has_status = true; - if (s->error) { - info->error_desc = g_strdup(error_get_pretty(s->error)); - } break; case MIGRATION_STATUS_CANCELLED: info->has_status = true; @@ -1069,6 +1069,11 @@ static void fill_source_migration_info(MigrationInfo *info) break; } info->status = state; + + QEMU_LOCK_GUARD(&s->error_mutex); + if (s->error) { + info->error_desc = g_strdup(error_get_pretty(s->error)); + } } static void fill_destination_migration_info(MigrationInfo *info) @@ -1229,6 +1234,13 @@ void migrate_set_error(MigrationState *s, const Error *error) } } +bool migrate_has_error(MigrationState *s) +{ + /* The lock is not helpful here, but still follow the rule */ + QEMU_LOCK_GUARD(&s->error_mutex); + return qatomic_read(&s->error); +} + static void migrate_error_free(MigrationState *s) { QEMU_LOCK_GUARD(&s->error_mutex); @@ -1751,6 +1763,16 @@ static void mark_source_rp_bad(MigrationState *s) s->rp_state.error = true; } +void migration_rp_wait(MigrationState *s) +{ + qemu_sem_wait(&s->rp_state.rp_sem); +} + +void migration_rp_kick(MigrationState *s) +{ + qemu_sem_post(&s->rp_state.rp_sem); +} + static struct rp_cmd_args { ssize_t len; /* -1 = variable */ const char *name; @@ -1823,7 +1845,7 @@ static int migrate_handle_rp_resume_ack(MigrationState *s, uint32_t value) MIGRATION_STATUS_POSTCOPY_ACTIVE); /* Notify send thread that time to continue send pages */ - qemu_sem_post(&s->rp_state.rp_sem); + migration_rp_kick(s); return 0; } @@ -2452,7 +2474,7 @@ static int postcopy_resume_handshake(MigrationState *s) qemu_savevm_send_postcopy_resume(s->to_dst_file); while (s->state == MIGRATION_STATUS_POSTCOPY_RECOVER) { - qemu_sem_wait(&s->rp_state.rp_sem); + migration_rp_wait(s); } if (s->state == MIGRATION_STATUS_POSTCOPY_ACTIVE) { diff --git a/migration/migration.h b/migration/migration.h index 972597f4de..cd5534337c 100644 --- a/migration/migration.h +++ b/migration/migration.h @@ -316,6 +316,12 @@ struct MigrationState { * be cleared in the rp_thread! */ bool rp_thread_created; + /* + * Used to synchronize between migration main thread and return + * path thread. The migration thread can wait() on this sem, while + * other threads (e.g., return path thread) can kick it using a + * post(). + */ QemuSemaphore rp_sem; /* * We post to this when we got one PONG from dest. So far it's an @@ -476,6 +482,7 @@ bool migration_has_all_channels(void); uint64_t migrate_max_downtime(void); void migrate_set_error(MigrationState *s, const Error *error); +bool migrate_has_error(MigrationState *s); void migrate_fd_connect(MigrationState *s, Error *error_in); @@ -526,4 +533,13 @@ void migration_populate_vfio_info(MigrationInfo *info); void migration_reset_vfio_bytes_transferred(void); void postcopy_temp_page_reset(PostcopyTmpPage *tmp_page); +/* Migration thread waiting for return path thread. */ +void migration_rp_wait(MigrationState *s); +/* + * Kick the migration thread waiting for return path messages. NOTE: the + * name can be slightly confusing (when read as "kick the rp thread"), just + * to remember the target is always the migration thread. + */ +void migration_rp_kick(MigrationState *s); + #endif diff --git a/migration/options.c b/migration/options.c index 1d1e1321b0..6bbfd4853d 100644 --- a/migration/options.c +++ b/migration/options.c @@ -1408,20 +1408,25 @@ void qmp_migrate_set_parameters(MigrateSetParameters *params, Error **errp) { MigrationParameters tmp; - /* TODO Rewrite "" to null instead */ + /* TODO Rewrite "" to null instead for all three tls_* parameters */ if (params->tls_creds && params->tls_creds->type == QTYPE_QNULL) { qobject_unref(params->tls_creds->u.n); params->tls_creds->type = QTYPE_QSTRING; params->tls_creds->u.s = strdup(""); } - /* TODO Rewrite "" to null instead */ if (params->tls_hostname && params->tls_hostname->type == QTYPE_QNULL) { qobject_unref(params->tls_hostname->u.n); params->tls_hostname->type = QTYPE_QSTRING; params->tls_hostname->u.s = strdup(""); } + if (params->tls_authz + && params->tls_authz->type == QTYPE_QNULL) { + qobject_unref(params->tls_authz->u.n); + params->tls_authz->type = QTYPE_QSTRING; + params->tls_authz->u.s = strdup(""); + } migrate_params_test_apply(params, &tmp); diff --git a/migration/qemu-file.c b/migration/qemu-file.c index 5e8207dae4..7fb659296f 100644 --- a/migration/qemu-file.c +++ b/migration/qemu-file.c @@ -142,15 +142,24 @@ void qemu_file_set_hooks(QEMUFile *f, const QEMUFileHooks *hooks) * * Return negative error value if there has been an error on previous * operations, return 0 if no error happened. - * Optional, it returns Error* in errp, but it may be NULL even if return value - * is not 0. * + * If errp is specified, a verbose error message will be copied over. */ static int qemu_file_get_error_obj(QEMUFile *f, Error **errp) { + if (!f->last_error) { + return 0; + } + + /* There is an error */ if (errp) { - *errp = f->last_error_obj ? error_copy(f->last_error_obj) : NULL; + if (f->last_error_obj) { + *errp = error_copy(f->last_error_obj); + } else { + error_setg_errno(errp, -f->last_error, "Channel error"); + } } + return f->last_error; } diff --git a/migration/ram.c b/migration/ram.c index e4bfd39f08..2f5ce4d60b 100644 --- a/migration/ram.c +++ b/migration/ram.c @@ -394,6 +394,14 @@ struct RAMState { /* Queue of outstanding page requests from the destination */ QemuMutex src_page_req_mutex; QSIMPLEQ_HEAD(, RAMSrcPageRequest) src_page_requests; + + /* + * This is only used when postcopy is in recovery phase, to communicate + * between the migration thread and the return path thread on dirty + * bitmap synchronizations. This field is unused in other stages of + * RAM migration. + */ + unsigned int postcopy_bmap_sync_requested; }; typedef struct RAMState RAMState; @@ -4119,21 +4127,21 @@ static int ram_dirty_bitmap_sync_all(MigrationState *s, RAMState *rs) { RAMBlock *block; QEMUFile *file = s->to_dst_file; - int ramblock_count = 0; trace_ram_dirty_bitmap_sync_start(); + qatomic_set(&rs->postcopy_bmap_sync_requested, 0); RAMBLOCK_FOREACH_NOT_IGNORED(block) { qemu_savevm_send_recv_bitmap(file, block->idstr); trace_ram_dirty_bitmap_request(block->idstr); - ramblock_count++; + qatomic_inc(&rs->postcopy_bmap_sync_requested); } trace_ram_dirty_bitmap_sync_wait(); /* Wait until all the ramblocks' dirty bitmap synced */ - while (ramblock_count--) { - qemu_sem_wait(&s->rp_state.rp_sem); + while (qatomic_read(&rs->postcopy_bmap_sync_requested)) { + migration_rp_wait(s); } trace_ram_dirty_bitmap_sync_complete(); @@ -4141,11 +4149,6 @@ static int ram_dirty_bitmap_sync_all(MigrationState *s, RAMState *rs) return 0; } -static void ram_dirty_bitmap_reload_notify(MigrationState *s) -{ - qemu_sem_post(&s->rp_state.rp_sem); -} - /* * Read the received bitmap, revert it as the initial dirty bitmap. * This is only used when the postcopy migration is paused but wants @@ -4159,6 +4162,7 @@ int ram_dirty_bitmap_reload(MigrationState *s, RAMBlock *block) unsigned long *le_bitmap, nbits = block->used_length >> TARGET_PAGE_BITS; uint64_t local_size = DIV_ROUND_UP(nbits, 8); uint64_t size, end_mark; + RAMState *rs = ram_state; trace_ram_dirty_bitmap_reload_begin(block->idstr); @@ -4225,11 +4229,16 @@ int ram_dirty_bitmap_reload(MigrationState *s, RAMBlock *block) /* We'll recalculate migration_dirty_pages in ram_state_resume_prepare(). */ trace_ram_dirty_bitmap_reload_complete(block->idstr); + qatomic_dec(&rs->postcopy_bmap_sync_requested); + /* - * We succeeded to sync bitmap for current ramblock. If this is - * the last one to sync, we need to notify the main send thread. + * We succeeded to sync bitmap for current ramblock. Always kick the + * migration thread to check whether all requested bitmaps are + * reloaded. NOTE: it's racy to only kick when requested==0, because + * we don't know whether the migration thread may still be increasing + * it. */ - ram_dirty_bitmap_reload_notify(s); + migration_rp_kick(s); ret = 0; out: diff --git a/migration/rdma.c b/migration/rdma.c index cd5e1afe60..f6fc226c9b 100644 --- a/migration/rdma.c +++ b/migration/rdma.c @@ -40,17 +40,6 @@ #include "options.h" #include <poll.h> -/* - * Print and error on both the Monitor and the Log file. - */ -#define ERROR(errp, fmt, ...) \ - do { \ - fprintf(stderr, "RDMA ERROR: " fmt "\n", ## __VA_ARGS__); \ - if (errp && (*(errp) == NULL)) { \ - error_setg(errp, "RDMA ERROR: " fmt, ## __VA_ARGS__); \ - } \ - } while (0) - #define RDMA_RESOLVE_TIMEOUT_MS 10000 /* Do not merge data if larger than this. */ @@ -85,18 +74,6 @@ */ static uint32_t known_capabilities = RDMA_CAPABILITY_PIN_ALL; -#define CHECK_ERROR_STATE() \ - do { \ - if (rdma->error_state) { \ - if (!rdma->error_reported) { \ - error_report("RDMA is in an error state waiting migration" \ - " to abort!"); \ - rdma->error_reported = 1; \ - } \ - return rdma->error_state; \ - } \ - } while (0) - /* * A work request ID is 64-bits and we split up these bits * into 3 parts: @@ -133,13 +110,6 @@ enum { RDMA_WRID_RECV_CONTROL = 4000, }; -static const char *wrid_desc[] = { - [RDMA_WRID_NONE] = "NONE", - [RDMA_WRID_RDMA_WRITE] = "WRITE RDMA", - [RDMA_WRID_SEND_CONTROL] = "CONTROL SEND", - [RDMA_WRID_RECV_CONTROL] = "CONTROL RECV", -}; - /* * Work request IDs for IB SEND messages only (not RDMA writes). * This is used by the migration protocol to transmit @@ -371,9 +341,9 @@ typedef struct RDMAContext { * memory registration, then do not attempt any future work * and remember the error state. */ - int error_state; - int error_reported; - int received_error; + bool errored; + bool error_reported; + bool received_error; /* * Description of ram blocks used throughout the code. @@ -458,6 +428,16 @@ typedef struct QEMU_PACKED { uint64_t chunks; /* how many sequential chunks to register */ } RDMARegister; +static bool rdma_errored(RDMAContext *rdma) +{ + if (rdma->errored && !rdma->error_reported) { + error_report("RDMA is in an error state waiting migration" + " to abort!"); + rdma->error_reported = true; + } + return rdma->errored; +} + static void register_to_network(RDMAContext *rdma, RDMARegister *reg) { RDMALocalBlock *local_block; @@ -535,11 +515,12 @@ static void network_to_result(RDMARegisterResult *result) result->host_addr = ntohll(result->host_addr); }; -const char *print_wrid(int wrid); static int qemu_rdma_exchange_send(RDMAContext *rdma, RDMAControlHeader *head, uint8_t *data, RDMAControlHeader *resp, int *resp_idx, - int (*callback)(RDMAContext *rdma)); + int (*callback)(RDMAContext *rdma, + Error **errp), + Error **errp); static inline uint64_t ram_chunk_index(const uint8_t *start, const uint8_t *host) @@ -567,9 +548,9 @@ static inline uint8_t *ram_chunk_end(const RDMALocalBlock *rdma_ram_block, return result; } -static int rdma_add_block(RDMAContext *rdma, const char *block_name, - void *host_addr, - ram_addr_t block_offset, uint64_t length) +static void rdma_add_block(RDMAContext *rdma, const char *block_name, + void *host_addr, + ram_addr_t block_offset, uint64_t length) { RDMALocalBlocks *local = &rdma->local_ram_blocks; RDMALocalBlock *block; @@ -623,8 +604,6 @@ static int rdma_add_block(RDMAContext *rdma, const char *block_name, block->nb_chunks); local->nb_blocks++; - - return 0; } /* @@ -638,7 +617,8 @@ static int qemu_rdma_init_one_block(RAMBlock *rb, void *opaque) void *host_addr = qemu_ram_get_host_addr(rb); ram_addr_t block_offset = qemu_ram_get_offset(rb); ram_addr_t length = qemu_ram_get_used_length(rb); - return rdma_add_block(opaque, block_name, host_addr, block_offset, length); + rdma_add_block(opaque, block_name, host_addr, block_offset, length); + return 0; } /* @@ -646,7 +626,7 @@ static int qemu_rdma_init_one_block(RAMBlock *rb, void *opaque) * identify chunk boundaries inside each RAMBlock and also be referenced * during dynamic page registration. */ -static int qemu_rdma_init_ram_blocks(RDMAContext *rdma) +static void qemu_rdma_init_ram_blocks(RDMAContext *rdma) { RDMALocalBlocks *local = &rdma->local_ram_blocks; int ret; @@ -654,21 +634,18 @@ static int qemu_rdma_init_ram_blocks(RDMAContext *rdma) assert(rdma->blockmap == NULL); memset(local, 0, sizeof *local); ret = foreach_not_ignored_block(qemu_rdma_init_one_block, rdma); - if (ret) { - return ret; - } + assert(!ret); trace_qemu_rdma_init_ram_blocks(local->nb_blocks); rdma->dest_blocks = g_new0(RDMADestBlock, rdma->local_ram_blocks.nb_blocks); local->init = true; - return 0; } /* * Note: If used outside of cleanup, the caller must ensure that the destination * block structures are also updated */ -static int rdma_delete_block(RDMAContext *rdma, RDMALocalBlock *block) +static void rdma_delete_block(RDMAContext *rdma, RDMALocalBlock *block) { RDMALocalBlocks *local = &rdma->local_ram_blocks; RDMALocalBlock *old = local->block; @@ -754,43 +731,34 @@ static int rdma_delete_block(RDMAContext *rdma, RDMALocalBlock *block) &local->block[x]); } } - - return 0; } /* - * Put in the log file which RDMA device was opened and the details - * associated with that device. + * Trace RDMA device open, with device details. */ static void qemu_rdma_dump_id(const char *who, struct ibv_context *verbs) { struct ibv_port_attr port; if (ibv_query_port(verbs, 1, &port)) { - error_report("Failed to query port information"); + trace_qemu_rdma_dump_id_failed(who); return; } - printf("%s RDMA Device opened: kernel name %s " - "uverbs device name %s, " - "infiniband_verbs class device path %s, " - "infiniband class device path %s, " - "transport: (%d) %s\n", - who, + trace_qemu_rdma_dump_id(who, verbs->device->name, verbs->device->dev_name, verbs->device->dev_path, verbs->device->ibdev_path, port.link_layer, - (port.link_layer == IBV_LINK_LAYER_INFINIBAND) ? "Infiniband" : - ((port.link_layer == IBV_LINK_LAYER_ETHERNET) - ? "Ethernet" : "Unknown")); + port.link_layer == IBV_LINK_LAYER_INFINIBAND ? "Infiniband" + : port.link_layer == IBV_LINK_LAYER_ETHERNET ? "Ethernet" + : "Unknown"); } /* - * Put in the log file the RDMA gid addressing information, - * useful for folks who have trouble understanding the - * RDMA device hierarchy in the kernel. + * Trace RDMA gid addressing information. + * Useful for understanding the RDMA device hierarchy in the kernel. */ static void qemu_rdma_dump_gid(const char *who, struct rdma_cm_id *id) { @@ -867,18 +835,27 @@ static int qemu_rdma_broken_ipv6_kernel(struct ibv_context *verbs, Error **errp) for (x = 0; x < num_devices; x++) { verbs = ibv_open_device(dev_list[x]); + /* + * ibv_open_device() is not documented to set errno. If + * it does, it's somebody else's doc bug. If it doesn't, + * the use of errno below is wrong. + * TODO Find out whether ibv_open_device() sets errno. + */ if (!verbs) { if (errno == EPERM) { continue; } else { - return -EINVAL; + error_setg_errno(errp, errno, + "could not open RDMA device context"); + return -1; } } if (ibv_query_port(verbs, 1, &port_attr)) { ibv_close_device(verbs); - ERROR(errp, "Could not query initial IB port"); - return -EINVAL; + error_setg(errp, + "RDMA ERROR: Could not query initial IB port"); + return -1; } if (port_attr.link_layer == IBV_LINK_LAYER_INFINIBAND) { @@ -893,17 +870,18 @@ static int qemu_rdma_broken_ipv6_kernel(struct ibv_context *verbs, Error **errp) if (roce_found) { if (ib_found) { - fprintf(stderr, "WARN: migrations may fail:" - " IPv6 over RoCE / iWARP in linux" - " is broken. But since you appear to have a" - " mixed RoCE / IB environment, be sure to only" - " migrate over the IB fabric until the kernel " - " fixes the bug.\n"); + warn_report("migrations may fail:" + " IPv6 over RoCE / iWARP in linux" + " is broken. But since you appear to have a" + " mixed RoCE / IB environment, be sure to only" + " migrate over the IB fabric until the kernel " + " fixes the bug."); } else { - ERROR(errp, "You only have RoCE / iWARP devices in your systems" - " and your management software has specified '[::]'" - ", but IPv6 over RoCE / iWARP is not supported in Linux."); - return -ENONET; + error_setg(errp, "RDMA ERROR: " + "You only have RoCE / iWARP devices in your systems" + " and your management software has specified '[::]'" + ", but IPv6 over RoCE / iWARP is not supported in Linux."); + return -1; } } @@ -918,14 +896,15 @@ static int qemu_rdma_broken_ipv6_kernel(struct ibv_context *verbs, Error **errp) /* IB ports start with 1, not 0 */ if (ibv_query_port(verbs, 1, &port_attr)) { - ERROR(errp, "Could not query initial IB port"); - return -EINVAL; + error_setg(errp, "RDMA ERROR: Could not query initial IB port"); + return -1; } if (port_attr.link_layer == IBV_LINK_LAYER_ETHERNET) { - ERROR(errp, "Linux kernel's RoCE / iWARP does not support IPv6 " - "(but patches on linux-rdma in progress)"); - return -ENONET; + error_setg(errp, "RDMA ERROR: " + "Linux kernel's RoCE / iWARP does not support IPv6 " + "(but patches on linux-rdma in progress)"); + return -1; } #endif @@ -940,6 +919,7 @@ static int qemu_rdma_broken_ipv6_kernel(struct ibv_context *verbs, Error **errp) */ static int qemu_rdma_resolve_host(RDMAContext *rdma, Error **errp) { + Error *err = NULL; int ret; struct rdma_addrinfo *res; char port_str[16]; @@ -948,21 +928,21 @@ static int qemu_rdma_resolve_host(RDMAContext *rdma, Error **errp) struct rdma_addrinfo *e; if (rdma->host == NULL || !strcmp(rdma->host, "")) { - ERROR(errp, "RDMA hostname has not been set"); - return -EINVAL; + error_setg(errp, "RDMA ERROR: RDMA hostname has not been set"); + return -1; } /* create CM channel */ rdma->channel = rdma_create_event_channel(); if (!rdma->channel) { - ERROR(errp, "could not create CM channel"); - return -EINVAL; + error_setg(errp, "RDMA ERROR: could not create CM channel"); + return -1; } /* create CM id */ ret = rdma_create_id(rdma->channel, &rdma->cm_id, NULL, RDMA_PS_TCP); - if (ret) { - ERROR(errp, "could not create channel id"); + if (ret < 0) { + error_setg(errp, "RDMA ERROR: could not create channel id"); goto err_resolve_create_id; } @@ -970,31 +950,42 @@ static int qemu_rdma_resolve_host(RDMAContext *rdma, Error **errp) port_str[15] = '\0'; ret = rdma_getaddrinfo(rdma->host, port_str, NULL, &res); - if (ret < 0) { - ERROR(errp, "could not rdma_getaddrinfo address %s", rdma->host); + if (ret) { + error_setg(errp, "RDMA ERROR: could not rdma_getaddrinfo address %s", + rdma->host); goto err_resolve_get_addr; } + /* Try all addresses, saving the first error in @err */ for (e = res; e != NULL; e = e->ai_next) { + Error **local_errp = err ? NULL : &err; + inet_ntop(e->ai_family, &((struct sockaddr_in *) e->ai_dst_addr)->sin_addr, ip, sizeof ip); trace_qemu_rdma_resolve_host_trying(rdma->host, ip); ret = rdma_resolve_addr(rdma->cm_id, NULL, e->ai_dst_addr, RDMA_RESOLVE_TIMEOUT_MS); - if (!ret) { + if (ret >= 0) { if (e->ai_family == AF_INET6) { - ret = qemu_rdma_broken_ipv6_kernel(rdma->cm_id->verbs, errp); - if (ret) { + ret = qemu_rdma_broken_ipv6_kernel(rdma->cm_id->verbs, + local_errp); + if (ret < 0) { continue; } } + error_free(err); goto route; } } rdma_freeaddrinfo(res); - ERROR(errp, "could not resolve address %s", rdma->host); + if (err) { + error_propagate(errp, err); + } else { + error_setg(errp, "RDMA ERROR: could not resolve address %s", + rdma->host); + } goto err_resolve_get_addr; route: @@ -1002,38 +993,37 @@ route: qemu_rdma_dump_gid("source_resolve_addr", rdma->cm_id); ret = rdma_get_cm_event(rdma->channel, &cm_event); - if (ret) { - ERROR(errp, "could not perform event_addr_resolved"); + if (ret < 0) { + error_setg(errp, "RDMA ERROR: could not perform event_addr_resolved"); goto err_resolve_get_addr; } if (cm_event->event != RDMA_CM_EVENT_ADDR_RESOLVED) { - ERROR(errp, "result not equal to event_addr_resolved %s", - rdma_event_str(cm_event->event)); - error_report("rdma_resolve_addr"); + error_setg(errp, + "RDMA ERROR: result not equal to event_addr_resolved %s", + rdma_event_str(cm_event->event)); rdma_ack_cm_event(cm_event); - ret = -EINVAL; goto err_resolve_get_addr; } rdma_ack_cm_event(cm_event); /* resolve route */ ret = rdma_resolve_route(rdma->cm_id, RDMA_RESOLVE_TIMEOUT_MS); - if (ret) { - ERROR(errp, "could not resolve rdma route"); + if (ret < 0) { + error_setg(errp, "RDMA ERROR: could not resolve rdma route"); goto err_resolve_get_addr; } ret = rdma_get_cm_event(rdma->channel, &cm_event); - if (ret) { - ERROR(errp, "could not perform event_route_resolved"); + if (ret < 0) { + error_setg(errp, "RDMA ERROR: could not perform event_route_resolved"); goto err_resolve_get_addr; } if (cm_event->event != RDMA_CM_EVENT_ROUTE_RESOLVED) { - ERROR(errp, "result not equal to event_route_resolved: %s", - rdma_event_str(cm_event->event)); + error_setg(errp, "RDMA ERROR: " + "result not equal to event_route_resolved: %s", + rdma_event_str(cm_event->event)); rdma_ack_cm_event(cm_event); - ret = -EINVAL; goto err_resolve_get_addr; } rdma_ack_cm_event(cm_event); @@ -1048,25 +1038,25 @@ err_resolve_get_addr: err_resolve_create_id: rdma_destroy_event_channel(rdma->channel); rdma->channel = NULL; - return ret; + return -1; } /* * Create protection domain and completion queues */ -static int qemu_rdma_alloc_pd_cq(RDMAContext *rdma) +static int qemu_rdma_alloc_pd_cq(RDMAContext *rdma, Error **errp) { /* allocate pd */ rdma->pd = ibv_alloc_pd(rdma->verbs); if (!rdma->pd) { - error_report("failed to allocate protection domain"); + error_setg(errp, "failed to allocate protection domain"); return -1; } /* create receive completion channel */ rdma->recv_comp_channel = ibv_create_comp_channel(rdma->verbs); if (!rdma->recv_comp_channel) { - error_report("failed to allocate receive completion channel"); + error_setg(errp, "failed to allocate receive completion channel"); goto err_alloc_pd_cq; } @@ -1076,21 +1066,21 @@ static int qemu_rdma_alloc_pd_cq(RDMAContext *rdma) rdma->recv_cq = ibv_create_cq(rdma->verbs, (RDMA_SIGNALED_SEND_MAX * 3), NULL, rdma->recv_comp_channel, 0); if (!rdma->recv_cq) { - error_report("failed to allocate receive completion queue"); + error_setg(errp, "failed to allocate receive completion queue"); goto err_alloc_pd_cq; } /* create send completion channel */ rdma->send_comp_channel = ibv_create_comp_channel(rdma->verbs); if (!rdma->send_comp_channel) { - error_report("failed to allocate send completion channel"); + error_setg(errp, "failed to allocate send completion channel"); goto err_alloc_pd_cq; } rdma->send_cq = ibv_create_cq(rdma->verbs, (RDMA_SIGNALED_SEND_MAX * 3), NULL, rdma->send_comp_channel, 0); if (!rdma->send_cq) { - error_report("failed to allocate send completion queue"); + error_setg(errp, "failed to allocate send completion queue"); goto err_alloc_pd_cq; } @@ -1134,7 +1124,7 @@ static int qemu_rdma_alloc_qp(RDMAContext *rdma) attr.qp_type = IBV_QPT_RC; ret = rdma_create_qp(rdma->cm_id, rdma->pd, &attr); - if (ret) { + if (ret < 0) { return -1; } @@ -1176,15 +1166,11 @@ static void qemu_rdma_advise_prefetch_mr(struct ibv_pd *pd, uint64_t addr, ret = ibv_advise_mr(pd, advice, IBV_ADVISE_MR_FLAG_FLUSH, &sg_list, 1); /* ignore the error */ - if (ret) { - trace_qemu_rdma_advise_mr(name, len, addr, strerror(errno)); - } else { - trace_qemu_rdma_advise_mr(name, len, addr, "successed"); - } + trace_qemu_rdma_advise_mr(name, len, addr, strerror(ret)); #endif } -static int qemu_rdma_reg_whole_ram_blocks(RDMAContext *rdma) +static int qemu_rdma_reg_whole_ram_blocks(RDMAContext *rdma, Error **errp) { int i; RDMALocalBlocks *local = &rdma->local_ram_blocks; @@ -1197,7 +1183,12 @@ static int qemu_rdma_reg_whole_ram_blocks(RDMAContext *rdma) local->block[i].local_host_addr, local->block[i].length, access ); - + /* + * ibv_reg_mr() is not documented to set errno. If it does, + * it's somebody else's doc bug. If it doesn't, the use of + * errno below is wrong. + * TODO Find out whether ibv_reg_mr() sets errno. + */ if (!local->block[i].mr && errno == ENOTSUP && rdma_support_odp(rdma->verbs)) { access |= IBV_ACCESS_ON_DEMAND; @@ -1219,16 +1210,16 @@ static int qemu_rdma_reg_whole_ram_blocks(RDMAContext *rdma) } if (!local->block[i].mr) { - perror("Failed to register local dest ram block!"); - break; + error_setg_errno(errp, errno, + "Failed to register local dest ram block!"); + goto err; } rdma->total_registrations++; } - if (i >= local->nb_blocks) { - return 0; - } + return 0; +err: for (i--; i >= 0; i--) { ibv_dereg_mr(local->block[i].mr); local->block[i].mr = NULL; @@ -1245,15 +1236,13 @@ static int qemu_rdma_reg_whole_ram_blocks(RDMAContext *rdma) * * Once the block is found, also identify which 'chunk' within that * block that the page belongs to. - * - * This search cannot fail or the migration will fail. */ -static int qemu_rdma_search_ram_block(RDMAContext *rdma, - uintptr_t block_offset, - uint64_t offset, - uint64_t length, - uint64_t *block_index, - uint64_t *chunk_index) +static void qemu_rdma_search_ram_block(RDMAContext *rdma, + uintptr_t block_offset, + uint64_t offset, + uint64_t length, + uint64_t *block_index, + uint64_t *chunk_index) { uint64_t current_addr = block_offset + offset; RDMALocalBlock *block = g_hash_table_lookup(rdma->blockmap, @@ -1265,8 +1254,6 @@ static int qemu_rdma_search_ram_block(RDMAContext *rdma, *block_index = block->index; *chunk_index = ram_chunk_index(block->local_host_addr, block->local_host_addr + (current_addr - block->offset)); - - return 0; } /* @@ -1309,6 +1296,12 @@ static int qemu_rdma_register_and_get_keys(RDMAContext *rdma, trace_qemu_rdma_register_and_get_keys(len, chunk_start); block->pmr[chunk] = ibv_reg_mr(rdma->pd, chunk_start, len, access); + /* + * ibv_reg_mr() is not documented to set errno. If it does, + * it's somebody else's doc bug. If it doesn't, the use of + * errno below is wrong. + * TODO Find out whether ibv_reg_mr() sets errno. + */ if (!block->pmr[chunk] && errno == ENOTSUP && rdma_support_odp(rdma->verbs)) { access |= IBV_ACCESS_ON_DEMAND; @@ -1325,15 +1318,6 @@ static int qemu_rdma_register_and_get_keys(RDMAContext *rdma, } } if (!block->pmr[chunk]) { - perror("Failed to register chunk!"); - fprintf(stderr, "Chunk details: block: %d chunk index %d" - " start %" PRIuPTR " end %" PRIuPTR - " host %" PRIuPTR - " local %" PRIuPTR " registrations: %d\n", - block->index, chunk, (uintptr_t)chunk_start, - (uintptr_t)chunk_end, host_addr, - (uintptr_t)block->local_host_addr, - rdma->total_registrations); return -1; } rdma->total_registrations++; @@ -1360,18 +1344,9 @@ static int qemu_rdma_reg_control(RDMAContext *rdma, int idx) rdma->total_registrations++; return 0; } - error_report("qemu_rdma_reg_control failed"); return -1; } -const char *print_wrid(int wrid) -{ - if (wrid >= RDMA_WRID_RECV_CONTROL) { - return wrid_desc[RDMA_WRID_RECV_CONTROL]; - } - return wrid_desc[wrid]; -} - /* * Perform a non-optimized memory unregistration after every transfer * for demonstration purposes, only if pin-all is not requested. @@ -1385,6 +1360,8 @@ const char *print_wrid(int wrid) */ static int qemu_rdma_unregister_waiting(RDMAContext *rdma) { + Error *err = NULL; + while (rdma->unregistrations[rdma->unregister_current]) { int ret; uint64_t wr_id = rdma->unregistrations[rdma->unregister_current]; @@ -1434,17 +1411,19 @@ static int qemu_rdma_unregister_waiting(RDMAContext *rdma) block->remote_keys[chunk] = 0; if (ret != 0) { - perror("unregistration chunk failed"); - return -ret; + error_report("unregistration chunk failed: %s", + strerror(ret)); + return -1; } rdma->total_registrations--; reg.key.chunk = chunk; register_to_network(rdma, ®); ret = qemu_rdma_exchange_send(rdma, &head, (uint8_t *) ®, - &resp, NULL, NULL); + &resp, NULL, NULL, &err); if (ret < 0) { - return ret; + error_report_err(err); + return -1; } trace_qemu_rdma_unregister_waiting_complete(chunk); @@ -1469,8 +1448,8 @@ static uint64_t qemu_rdma_make_wrid(uint64_t wr_id, uint64_t index, * (of any kind) has completed. * Return the work request ID that completed. */ -static uint64_t qemu_rdma_poll(RDMAContext *rdma, struct ibv_cq *cq, - uint64_t *wr_id_out, uint32_t *byte_len) +static int qemu_rdma_poll(RDMAContext *rdma, struct ibv_cq *cq, + uint64_t *wr_id_out, uint32_t *byte_len) { int ret; struct ibv_wc wc; @@ -1484,24 +1463,19 @@ static uint64_t qemu_rdma_poll(RDMAContext *rdma, struct ibv_cq *cq, } if (ret < 0) { - error_report("ibv_poll_cq return %d", ret); - return ret; + return -1; } wr_id = wc.wr_id & RDMA_WRID_TYPE_MASK; if (wc.status != IBV_WC_SUCCESS) { - fprintf(stderr, "ibv_poll_cq wc.status=%d %s!\n", - wc.status, ibv_wc_status_str(wc.status)); - fprintf(stderr, "ibv_poll_cq wrid=%s!\n", wrid_desc[wr_id]); - return -1; } if (rdma->control_ready_expected && (wr_id >= RDMA_WRID_RECV_CONTROL)) { - trace_qemu_rdma_poll_recv(wrid_desc[RDMA_WRID_RECV_CONTROL], - wr_id - RDMA_WRID_RECV_CONTROL, wr_id, rdma->nb_sent); + trace_qemu_rdma_poll_recv(wr_id - RDMA_WRID_RECV_CONTROL, wr_id, + rdma->nb_sent); rdma->control_ready_expected = 0; } @@ -1512,7 +1486,7 @@ static uint64_t qemu_rdma_poll(RDMAContext *rdma, struct ibv_cq *cq, (wc.wr_id & RDMA_WRID_BLOCK_MASK) >> RDMA_WRID_BLOCK_SHIFT; RDMALocalBlock *block = &(rdma->local_ram_blocks.block[index]); - trace_qemu_rdma_poll_write(print_wrid(wr_id), wr_id, rdma->nb_sent, + trace_qemu_rdma_poll_write(wr_id, rdma->nb_sent, index, chunk, block->local_host_addr, (void *)(uintptr_t)block->remote_host_addr); @@ -1522,7 +1496,7 @@ static uint64_t qemu_rdma_poll(RDMAContext *rdma, struct ibv_cq *cq, rdma->nb_sent--; } } else { - trace_qemu_rdma_poll_other(print_wrid(wr_id), wr_id, rdma->nb_sent); + trace_qemu_rdma_poll_other(wr_id, rdma->nb_sent); } *wr_id_out = wc.wr_id; @@ -1540,7 +1514,7 @@ static int qemu_rdma_wait_comp_channel(RDMAContext *rdma, struct ibv_comp_channel *comp_channel) { struct rdma_cm_event *cm_event; - int ret = -1; + int ret; /* * Coroutine doesn't start until migration_fd_process_incoming() @@ -1557,7 +1531,7 @@ static int qemu_rdma_wait_comp_channel(RDMAContext *rdma, * But we need to be able to handle 'cancel' or an error * without hanging forever. */ - while (!rdma->error_state && !rdma->received_error) { + while (!rdma->errored && !rdma->received_error) { GPollFD pfds[2]; pfds[0].fd = comp_channel->fd; pfds[0].events = G_IO_IN | G_IO_HUP | G_IO_ERR; @@ -1577,18 +1551,14 @@ static int qemu_rdma_wait_comp_channel(RDMAContext *rdma, if (pfds[1].revents) { ret = rdma_get_cm_event(rdma->channel, &cm_event); - if (ret) { - error_report("failed to get cm event while wait " - "completion channel"); - return -EPIPE; + if (ret < 0) { + return -1; } - error_report("receive cm event while wait comp channel," - "cm event is %d", cm_event->event); if (cm_event->event == RDMA_CM_EVENT_DISCONNECTED || cm_event->event == RDMA_CM_EVENT_DEVICE_REMOVAL) { rdma_ack_cm_event(cm_event); - return -EPIPE; + return -1; } rdma_ack_cm_event(cm_event); } @@ -1600,30 +1570,29 @@ static int qemu_rdma_wait_comp_channel(RDMAContext *rdma, default: /* Error of some type - * I don't trust errno from qemu_poll_ns */ - error_report("%s: poll failed", __func__); - return -EPIPE; + return -1; } if (migrate_get_current()->state == MIGRATION_STATUS_CANCELLING) { /* Bail out and let the cancellation happen */ - return -EPIPE; + return -1; } } } if (rdma->received_error) { - return -EPIPE; + return -1; } - return rdma->error_state; + return -rdma->errored; } -static struct ibv_comp_channel *to_channel(RDMAContext *rdma, int wrid) +static struct ibv_comp_channel *to_channel(RDMAContext *rdma, uint64_t wrid) { return wrid < RDMA_WRID_RECV_CONTROL ? rdma->send_comp_channel : rdma->recv_comp_channel; } -static struct ibv_cq *to_cq(RDMAContext *rdma, int wrid) +static struct ibv_cq *to_cq(RDMAContext *rdma, uint64_t wrid) { return wrid < RDMA_WRID_RECV_CONTROL ? rdma->send_cq : rdma->recv_cq; } @@ -1641,10 +1610,11 @@ static struct ibv_cq *to_cq(RDMAContext *rdma, int wrid) * completions only need to be recorded, but do not actually * need further processing. */ -static int qemu_rdma_block_for_wrid(RDMAContext *rdma, int wrid_requested, +static int qemu_rdma_block_for_wrid(RDMAContext *rdma, + uint64_t wrid_requested, uint32_t *byte_len) { - int num_cq_events = 0, ret = 0; + int num_cq_events = 0, ret; struct ibv_cq *cq; void *cq_ctx; uint64_t wr_id = RDMA_WRID_NONE, wr_id_in; @@ -1658,7 +1628,7 @@ static int qemu_rdma_block_for_wrid(RDMAContext *rdma, int wrid_requested, while (wr_id != wrid_requested) { ret = qemu_rdma_poll(rdma, poll_cq, &wr_id_in, byte_len); if (ret < 0) { - return ret; + return -1; } wr_id = wr_id_in & RDMA_WRID_TYPE_MASK; @@ -1667,8 +1637,7 @@ static int qemu_rdma_block_for_wrid(RDMAContext *rdma, int wrid_requested, break; } if (wr_id != wrid_requested) { - trace_qemu_rdma_block_for_wrid_miss(print_wrid(wrid_requested), - wrid_requested, print_wrid(wr_id), wr_id); + trace_qemu_rdma_block_for_wrid_miss(wrid_requested, wr_id); } } @@ -1678,20 +1647,18 @@ static int qemu_rdma_block_for_wrid(RDMAContext *rdma, int wrid_requested, while (1) { ret = qemu_rdma_wait_comp_channel(rdma, ch); - if (ret) { + if (ret < 0) { goto err_block_for_wrid; } ret = ibv_get_cq_event(ch, &cq, &cq_ctx); - if (ret) { - perror("ibv_get_cq_event"); + if (ret < 0) { goto err_block_for_wrid; } num_cq_events++; - ret = -ibv_req_notify_cq(cq, 0); - if (ret) { + if (ibv_req_notify_cq(cq, 0)) { goto err_block_for_wrid; } @@ -1707,8 +1674,7 @@ static int qemu_rdma_block_for_wrid(RDMAContext *rdma, int wrid_requested, break; } if (wr_id != wrid_requested) { - trace_qemu_rdma_block_for_wrid_miss(print_wrid(wrid_requested), - wrid_requested, print_wrid(wr_id), wr_id); + trace_qemu_rdma_block_for_wrid_miss(wrid_requested, wr_id); } } @@ -1728,8 +1694,8 @@ err_block_for_wrid: ibv_ack_cq_events(cq, num_cq_events); } - rdma->error_state = ret; - return ret; + rdma->errored = true; + return -1; } /* @@ -1737,9 +1703,10 @@ err_block_for_wrid: * containing some data and block until the post completes. */ static int qemu_rdma_post_send_control(RDMAContext *rdma, uint8_t *buf, - RDMAControlHeader *head) + RDMAControlHeader *head, + Error **errp) { - int ret = 0; + int ret; RDMAWorkRequestData *wr = &rdma->wr_data[RDMA_WRID_CONTROL]; struct ibv_send_wr *bad_wr; struct ibv_sge sge = { @@ -1777,23 +1744,25 @@ static int qemu_rdma_post_send_control(RDMAContext *rdma, uint8_t *buf, ret = ibv_post_send(rdma->qp, &send_wr, &bad_wr); if (ret > 0) { - error_report("Failed to use post IB SEND for control"); - return -ret; + error_setg(errp, "Failed to use post IB SEND for control"); + return -1; } ret = qemu_rdma_block_for_wrid(rdma, RDMA_WRID_SEND_CONTROL, NULL); if (ret < 0) { - error_report("rdma migration: send polling control error"); + error_setg(errp, "rdma migration: send polling control error"); + return -1; } - return ret; + return 0; } /* * Post a RECV work request in anticipation of some future receipt * of data on the control channel. */ -static int qemu_rdma_post_recv_control(RDMAContext *rdma, int idx) +static int qemu_rdma_post_recv_control(RDMAContext *rdma, int idx, + Error **errp) { struct ibv_recv_wr *bad_wr; struct ibv_sge sge = { @@ -1810,6 +1779,7 @@ static int qemu_rdma_post_recv_control(RDMAContext *rdma, int idx) if (ibv_post_recv(rdma->qp, &recv_wr, &bad_wr)) { + error_setg(errp, "error posting control recv"); return -1; } @@ -1820,15 +1790,16 @@ static int qemu_rdma_post_recv_control(RDMAContext *rdma, int idx) * Block and wait for a RECV control channel message to arrive. */ static int qemu_rdma_exchange_get_response(RDMAContext *rdma, - RDMAControlHeader *head, int expecting, int idx) + RDMAControlHeader *head, uint32_t expecting, int idx, + Error **errp) { uint32_t byte_len; int ret = qemu_rdma_block_for_wrid(rdma, RDMA_WRID_RECV_CONTROL + idx, &byte_len); if (ret < 0) { - error_report("rdma migration: recv polling control error!"); - return ret; + error_setg(errp, "rdma migration: recv polling control error!"); + return -1; } network_to_control((void *) rdma->wr_data[idx].control); @@ -1840,22 +1811,23 @@ static int qemu_rdma_exchange_get_response(RDMAContext *rdma, trace_qemu_rdma_exchange_get_response_none(control_desc(head->type), head->type); } else if (head->type != expecting || head->type == RDMA_CONTROL_ERROR) { - error_report("Was expecting a %s (%d) control message" + error_setg(errp, "Was expecting a %s (%d) control message" ", but got: %s (%d), length: %d", control_desc(expecting), expecting, control_desc(head->type), head->type, head->len); if (head->type == RDMA_CONTROL_ERROR) { rdma->received_error = true; } - return -EIO; + return -1; } if (head->len > RDMA_CONTROL_MAX_BUFFER - sizeof(*head)) { - error_report("too long length: %d", head->len); - return -EINVAL; + error_setg(errp, "too long length: %d", head->len); + return -1; } if (sizeof(*head) + head->len != byte_len) { - error_report("Malformed length: %d byte_len %d", head->len, byte_len); - return -EINVAL; + error_setg(errp, "Malformed length: %d byte_len %d", + head->len, byte_len); + return -1; } return 0; @@ -1893,9 +1865,11 @@ static void qemu_rdma_move_header(RDMAContext *rdma, int idx, static int qemu_rdma_exchange_send(RDMAContext *rdma, RDMAControlHeader *head, uint8_t *data, RDMAControlHeader *resp, int *resp_idx, - int (*callback)(RDMAContext *rdma)) + int (*callback)(RDMAContext *rdma, + Error **errp), + Error **errp) { - int ret = 0; + int ret; /* * Wait until the dest is ready before attempting to deliver the message @@ -1906,9 +1880,9 @@ static int qemu_rdma_exchange_send(RDMAContext *rdma, RDMAControlHeader *head, ret = qemu_rdma_exchange_get_response(rdma, &resp_ignored, RDMA_CONTROL_READY, - RDMA_WRID_READY); + RDMA_WRID_READY, errp); if (ret < 0) { - return ret; + return -1; } } @@ -1916,31 +1890,27 @@ static int qemu_rdma_exchange_send(RDMAContext *rdma, RDMAControlHeader *head, * If the user is expecting a response, post a WR in anticipation of it. */ if (resp) { - ret = qemu_rdma_post_recv_control(rdma, RDMA_WRID_DATA); - if (ret) { - error_report("rdma migration: error posting" - " extra control recv for anticipated result!"); - return ret; + ret = qemu_rdma_post_recv_control(rdma, RDMA_WRID_DATA, errp); + if (ret < 0) { + return -1; } } /* * Post a WR to replace the one we just consumed for the READY message. */ - ret = qemu_rdma_post_recv_control(rdma, RDMA_WRID_READY); - if (ret) { - error_report("rdma migration: error posting first control recv!"); - return ret; + ret = qemu_rdma_post_recv_control(rdma, RDMA_WRID_READY, errp); + if (ret < 0) { + return -1; } /* * Deliver the control message that was requested. */ - ret = qemu_rdma_post_send_control(rdma, data, head); + ret = qemu_rdma_post_send_control(rdma, data, head, errp); if (ret < 0) { - error_report("Failed to send control buffer!"); - return ret; + return -1; } /* @@ -1949,18 +1919,19 @@ static int qemu_rdma_exchange_send(RDMAContext *rdma, RDMAControlHeader *head, if (resp) { if (callback) { trace_qemu_rdma_exchange_send_issue_callback(); - ret = callback(rdma); + ret = callback(rdma, errp); if (ret < 0) { - return ret; + return -1; } } trace_qemu_rdma_exchange_send_waiting(control_desc(resp->type)); ret = qemu_rdma_exchange_get_response(rdma, resp, - resp->type, RDMA_WRID_DATA); + resp->type, RDMA_WRID_DATA, + errp); if (ret < 0) { - return ret; + return -1; } qemu_rdma_move_header(rdma, RDMA_WRID_DATA, resp); @@ -1980,7 +1951,7 @@ static int qemu_rdma_exchange_send(RDMAContext *rdma, RDMAControlHeader *head, * control-channel message. */ static int qemu_rdma_exchange_recv(RDMAContext *rdma, RDMAControlHeader *head, - int expecting) + uint32_t expecting, Error **errp) { RDMAControlHeader ready = { .len = 0, @@ -1992,21 +1963,20 @@ static int qemu_rdma_exchange_recv(RDMAContext *rdma, RDMAControlHeader *head, /* * Inform the source that we're ready to receive a message. */ - ret = qemu_rdma_post_send_control(rdma, NULL, &ready); + ret = qemu_rdma_post_send_control(rdma, NULL, &ready, errp); if (ret < 0) { - error_report("Failed to send control buffer!"); - return ret; + return -1; } /* * Block and wait for the message. */ ret = qemu_rdma_exchange_get_response(rdma, head, - expecting, RDMA_WRID_READY); + expecting, RDMA_WRID_READY, errp); if (ret < 0) { - return ret; + return -1; } qemu_rdma_move_header(rdma, RDMA_WRID_READY, head); @@ -2014,10 +1984,9 @@ static int qemu_rdma_exchange_recv(RDMAContext *rdma, RDMAControlHeader *head, /* * Post a new RECV work request to replace the one we just consumed. */ - ret = qemu_rdma_post_recv_control(rdma, RDMA_WRID_READY); - if (ret) { - error_report("rdma migration: error posting second control recv!"); - return ret; + ret = qemu_rdma_post_recv_control(rdma, RDMA_WRID_READY, errp); + if (ret < 0) { + return -1; } return 0; @@ -2031,7 +2000,7 @@ static int qemu_rdma_exchange_recv(RDMAContext *rdma, RDMAControlHeader *head, */ static int qemu_rdma_write_one(RDMAContext *rdma, int current_index, uint64_t current_addr, - uint64_t length) + uint64_t length, Error **errp) { struct ibv_sge sge; struct ibv_send_wr send_wr = { 0 }; @@ -2086,11 +2055,11 @@ retry: ret = qemu_rdma_block_for_wrid(rdma, RDMA_WRID_RDMA_WRITE, NULL); if (ret < 0) { - error_report("Failed to Wait for previous write to complete " + error_setg(errp, "Failed to Wait for previous write to complete " "block %d chunk %" PRIu64 " current %" PRIu64 " len %" PRIu64 " %d", current_index, chunk, sge.addr, length, rdma->nb_sent); - return ret; + return -1; } } @@ -2118,10 +2087,10 @@ retry: compress_to_network(rdma, &comp); ret = qemu_rdma_exchange_send(rdma, &head, - (uint8_t *) &comp, NULL, NULL, NULL); + (uint8_t *) &comp, NULL, NULL, NULL, errp); if (ret < 0) { - return -EIO; + return -1; } /* @@ -2155,17 +2124,17 @@ retry: register_to_network(rdma, ®); ret = qemu_rdma_exchange_send(rdma, &head, (uint8_t *) ®, - &resp, ®_result_idx, NULL); + &resp, ®_result_idx, NULL, errp); if (ret < 0) { - return ret; + return -1; } /* try to overlap this single registration with the one we sent. */ if (qemu_rdma_register_and_get_keys(rdma, block, sge.addr, &sge.lkey, NULL, chunk, chunk_start, chunk_end)) { - error_report("cannot get lkey"); - return -EINVAL; + error_setg(errp, "cannot get lkey"); + return -1; } reg_result = (RDMARegisterResult *) @@ -2183,8 +2152,8 @@ retry: if (qemu_rdma_register_and_get_keys(rdma, block, sge.addr, &sge.lkey, NULL, chunk, chunk_start, chunk_end)) { - error_report("cannot get lkey!"); - return -EINVAL; + error_setg(errp, "cannot get lkey!"); + return -1; } } @@ -2195,8 +2164,8 @@ retry: if (qemu_rdma_register_and_get_keys(rdma, block, sge.addr, &sge.lkey, NULL, chunk, chunk_start, chunk_end)) { - error_report("cannot get lkey!"); - return -EINVAL; + error_setg(errp, "cannot get lkey!"); + return -1; } } @@ -2229,16 +2198,17 @@ retry: trace_qemu_rdma_write_one_queue_full(); ret = qemu_rdma_block_for_wrid(rdma, RDMA_WRID_RDMA_WRITE, NULL); if (ret < 0) { - error_report("rdma migration: failed to make " - "room in full send queue! %d", ret); - return ret; + error_setg(errp, "rdma migration: failed to make " + "room in full send queue!"); + return -1; } goto retry; } else if (ret > 0) { - perror("rdma migration: post rdma write failed"); - return -ret; + error_setg_errno(errp, ret, + "rdma migration: post rdma write failed"); + return -1; } set_bit(chunk, block->transit_bitmap); @@ -2265,7 +2235,7 @@ retry: * We support sending out multiple chunks at the same time. * Not all of them need to get signaled in the completion queue. */ -static int qemu_rdma_write_flush(RDMAContext *rdma) +static int qemu_rdma_write_flush(RDMAContext *rdma, Error **errp) { int ret; @@ -2273,11 +2243,11 @@ static int qemu_rdma_write_flush(RDMAContext *rdma) return 0; } - ret = qemu_rdma_write_one(rdma, - rdma->current_index, rdma->current_addr, rdma->current_length); + ret = qemu_rdma_write_one(rdma, rdma->current_index, rdma->current_addr, + rdma->current_length, errp); if (ret < 0) { - return ret; + return -1; } if (ret == 0) { @@ -2291,7 +2261,7 @@ static int qemu_rdma_write_flush(RDMAContext *rdma) return 0; } -static inline int qemu_rdma_buffer_mergable(RDMAContext *rdma, +static inline bool qemu_rdma_buffer_mergeable(RDMAContext *rdma, uint64_t offset, uint64_t len) { RDMALocalBlock *block; @@ -2299,11 +2269,11 @@ static inline int qemu_rdma_buffer_mergable(RDMAContext *rdma, uint8_t *chunk_end; if (rdma->current_index < 0) { - return 0; + return false; } if (rdma->current_chunk < 0) { - return 0; + return false; } block = &(rdma->local_ram_blocks.block[rdma->current_index]); @@ -2311,29 +2281,29 @@ static inline int qemu_rdma_buffer_mergable(RDMAContext *rdma, chunk_end = ram_chunk_end(block, rdma->current_chunk); if (rdma->current_length == 0) { - return 0; + return false; } /* * Only merge into chunk sequentially. */ if (offset != (rdma->current_addr + rdma->current_length)) { - return 0; + return false; } if (offset < block->offset) { - return 0; + return false; } if ((offset + len) > (block->offset + block->length)) { - return 0; + return false; } if ((host_addr + len) > chunk_end) { - return 0; + return false; } - return 1; + return true; } /* @@ -2348,7 +2318,7 @@ static inline int qemu_rdma_buffer_mergable(RDMAContext *rdma, */ static int qemu_rdma_write(RDMAContext *rdma, uint64_t block_offset, uint64_t offset, - uint64_t len) + uint64_t len, Error **errp) { uint64_t current_addr = block_offset + offset; uint64_t index = rdma->current_index; @@ -2356,20 +2326,16 @@ static int qemu_rdma_write(RDMAContext *rdma, int ret; /* If we cannot merge it, we flush the current buffer first. */ - if (!qemu_rdma_buffer_mergable(rdma, current_addr, len)) { - ret = qemu_rdma_write_flush(rdma); - if (ret) { - return ret; + if (!qemu_rdma_buffer_mergeable(rdma, current_addr, len)) { + ret = qemu_rdma_write_flush(rdma, errp); + if (ret < 0) { + return -1; } rdma->current_length = 0; rdma->current_addr = current_addr; - ret = qemu_rdma_search_ram_block(rdma, block_offset, - offset, len, &index, &chunk); - if (ret) { - error_report("ram block search failed"); - return ret; - } + qemu_rdma_search_ram_block(rdma, block_offset, + offset, len, &index, &chunk); rdma->current_index = index; rdma->current_chunk = chunk; } @@ -2379,7 +2345,7 @@ static int qemu_rdma_write(RDMAContext *rdma, /* flush it if buffer is too large */ if (rdma->current_length >= RDMA_MERGE_MAX) { - return qemu_rdma_write_flush(rdma); + return qemu_rdma_write_flush(rdma, errp); } return 0; @@ -2387,18 +2353,21 @@ static int qemu_rdma_write(RDMAContext *rdma, static void qemu_rdma_cleanup(RDMAContext *rdma) { + Error *err = NULL; int idx; if (rdma->cm_id && rdma->connected) { - if ((rdma->error_state || + if ((rdma->errored || migrate_get_current()->state == MIGRATION_STATUS_CANCELLING) && !rdma->received_error) { RDMAControlHeader head = { .len = 0, .type = RDMA_CONTROL_ERROR, .repeat = 1, }; - error_report("Early error. Sending error."); - qemu_rdma_post_send_control(rdma, NULL, &head); + warn_report("Early error. Sending error."); + if (qemu_rdma_post_send_control(rdma, NULL, &head, &err) < 0) { + warn_report_err(err); + } } rdma_disconnect(rdma->cm_id); @@ -2484,7 +2453,6 @@ static void qemu_rdma_cleanup(RDMAContext *rdma) static int qemu_rdma_source_init(RDMAContext *rdma, bool pin_all, Error **errp) { int ret, idx; - Error *local_err = NULL, **temp = &local_err; /* * Will be validated against destination's actual capabilities @@ -2492,30 +2460,23 @@ static int qemu_rdma_source_init(RDMAContext *rdma, bool pin_all, Error **errp) */ rdma->pin_all = pin_all; - ret = qemu_rdma_resolve_host(rdma, temp); - if (ret) { + ret = qemu_rdma_resolve_host(rdma, errp); + if (ret < 0) { goto err_rdma_source_init; } - ret = qemu_rdma_alloc_pd_cq(rdma); - if (ret) { - ERROR(temp, "rdma migration: error allocating pd and cq! Your mlock()" - " limits may be too low. Please check $ ulimit -a # and " - "search for 'ulimit -l' in the output"); + ret = qemu_rdma_alloc_pd_cq(rdma, errp); + if (ret < 0) { goto err_rdma_source_init; } ret = qemu_rdma_alloc_qp(rdma); - if (ret) { - ERROR(temp, "rdma migration: error allocating qp!"); + if (ret < 0) { + error_setg(errp, "RDMA ERROR: rdma migration: error allocating qp!"); goto err_rdma_source_init; } - ret = qemu_rdma_init_ram_blocks(rdma); - if (ret) { - ERROR(temp, "rdma migration: error initializing ram blocks!"); - goto err_rdma_source_init; - } + qemu_rdma_init_ram_blocks(rdma); /* Build the hash that maps from offset to RAMBlock */ rdma->blockmap = g_hash_table_new(g_direct_hash, g_direct_equal); @@ -2527,9 +2488,10 @@ static int qemu_rdma_source_init(RDMAContext *rdma, bool pin_all, Error **errp) for (idx = 0; idx < RDMA_WRID_MAX; idx++) { ret = qemu_rdma_reg_control(rdma, idx); - if (ret) { - ERROR(temp, "rdma migration: error registering %d control!", - idx); + if (ret < 0) { + error_setg(errp, + "RDMA ERROR: rdma migration: error registering %d control!", + idx); goto err_rdma_source_init; } } @@ -2537,7 +2499,6 @@ static int qemu_rdma_source_init(RDMAContext *rdma, bool pin_all, Error **errp) return 0; err_rdma_source_init: - error_propagate(errp, local_err); qemu_rdma_cleanup(rdma); return -1; } @@ -2558,20 +2519,27 @@ static int qemu_get_cm_event_timeout(RDMAContext *rdma, } while (ret < 0 && errno == EINTR); if (ret == 0) { - ERROR(errp, "poll cm event timeout"); + error_setg(errp, "RDMA ERROR: poll cm event timeout"); return -1; } else if (ret < 0) { - ERROR(errp, "failed to poll cm event, errno=%i", errno); + error_setg(errp, "RDMA ERROR: failed to poll cm event, errno=%i", + errno); return -1; } else if (poll_fd.revents & POLLIN) { - return rdma_get_cm_event(rdma->channel, cm_event); + if (rdma_get_cm_event(rdma->channel, cm_event) < 0) { + error_setg(errp, "RDMA ERROR: failed to get cm event"); + return -1; + } + return 0; } else { - ERROR(errp, "no POLLIN event, revent=%x", poll_fd.revents); + error_setg(errp, "RDMA ERROR: no POLLIN event, revent=%x", + poll_fd.revents); return -1; } } -static int qemu_rdma_connect(RDMAContext *rdma, Error **errp, bool return_path) +static int qemu_rdma_connect(RDMAContext *rdma, bool return_path, + Error **errp) { RDMACapabilities cap = { .version = RDMA_CONTROL_VERSION_CURRENT, @@ -2596,16 +2564,15 @@ static int qemu_rdma_connect(RDMAContext *rdma, Error **errp, bool return_path) caps_to_network(&cap); - ret = qemu_rdma_post_recv_control(rdma, RDMA_WRID_READY); - if (ret) { - ERROR(errp, "posting second control recv"); + ret = qemu_rdma_post_recv_control(rdma, RDMA_WRID_READY, errp); + if (ret < 0) { goto err_rdma_source_connect; } ret = rdma_connect(rdma->cm_id, &conn_param); - if (ret) { - perror("rdma_connect"); - ERROR(errp, "connecting to destination!"); + if (ret < 0) { + error_setg_errno(errp, errno, + "RDMA ERROR: connecting to destination!"); goto err_rdma_source_connect; } @@ -2613,16 +2580,17 @@ static int qemu_rdma_connect(RDMAContext *rdma, Error **errp, bool return_path) ret = qemu_get_cm_event_timeout(rdma, &cm_event, 5000, errp); } else { ret = rdma_get_cm_event(rdma->channel, &cm_event); + if (ret < 0) { + error_setg_errno(errp, errno, + "RDMA ERROR: failed to get cm event"); + } } - if (ret) { - perror("rdma_get_cm_event after rdma_connect"); - ERROR(errp, "connecting to destination!"); + if (ret < 0) { goto err_rdma_source_connect; } if (cm_event->event != RDMA_CM_EVENT_ESTABLISHED) { - error_report("rdma_get_cm_event != EVENT_ESTABLISHED after rdma_connect"); - ERROR(errp, "connecting to destination!"); + error_setg(errp, "RDMA ERROR: connecting to destination!"); rdma_ack_cm_event(cm_event); goto err_rdma_source_connect; } @@ -2636,8 +2604,8 @@ static int qemu_rdma_connect(RDMAContext *rdma, Error **errp, bool return_path) * and disable them otherwise. */ if (rdma->pin_all && !(cap.flags & RDMA_CAPABILITY_PIN_ALL)) { - ERROR(errp, "Server cannot support pinning all memory. " - "Will register memory dynamically."); + warn_report("RDMA: Server cannot support pinning all memory. " + "Will register memory dynamically."); rdma->pin_all = false; } @@ -2656,6 +2624,7 @@ err_rdma_source_connect: static int qemu_rdma_dest_init(RDMAContext *rdma, Error **errp) { + Error *err = NULL; int ret, idx; struct rdma_cm_id *listen_id; char ip[40] = "unknown"; @@ -2669,22 +2638,22 @@ static int qemu_rdma_dest_init(RDMAContext *rdma, Error **errp) } if (!rdma->host || !rdma->host[0]) { - ERROR(errp, "RDMA host is not set!"); - rdma->error_state = -EINVAL; + error_setg(errp, "RDMA ERROR: RDMA host is not set!"); + rdma->errored = true; return -1; } /* create CM channel */ rdma->channel = rdma_create_event_channel(); if (!rdma->channel) { - ERROR(errp, "could not create rdma event channel"); - rdma->error_state = -EINVAL; + error_setg(errp, "RDMA ERROR: could not create rdma event channel"); + rdma->errored = true; return -1; } /* create CM id */ ret = rdma_create_id(rdma->channel, &listen_id, NULL, RDMA_PS_TCP); - if (ret) { - ERROR(errp, "could not create cm_id!"); + if (ret < 0) { + error_setg(errp, "RDMA ERROR: could not create cm_id!"); goto err_dest_init_create_listen_id; } @@ -2692,37 +2661,48 @@ static int qemu_rdma_dest_init(RDMAContext *rdma, Error **errp) port_str[15] = '\0'; ret = rdma_getaddrinfo(rdma->host, port_str, NULL, &res); - if (ret < 0) { - ERROR(errp, "could not rdma_getaddrinfo address %s", rdma->host); + if (ret) { + error_setg(errp, "RDMA ERROR: could not rdma_getaddrinfo address %s", + rdma->host); goto err_dest_init_bind_addr; } ret = rdma_set_option(listen_id, RDMA_OPTION_ID, RDMA_OPTION_ID_REUSEADDR, &reuse, sizeof reuse); - if (ret) { - ERROR(errp, "Error: could not set REUSEADDR option"); + if (ret < 0) { + error_setg(errp, "RDMA ERROR: Error: could not set REUSEADDR option"); goto err_dest_init_bind_addr; } + + /* Try all addresses, saving the first error in @err */ for (e = res; e != NULL; e = e->ai_next) { + Error **local_errp = err ? NULL : &err; + inet_ntop(e->ai_family, &((struct sockaddr_in *) e->ai_dst_addr)->sin_addr, ip, sizeof ip); trace_qemu_rdma_dest_init_trying(rdma->host, ip); ret = rdma_bind_addr(listen_id, e->ai_dst_addr); - if (ret) { + if (ret < 0) { continue; } if (e->ai_family == AF_INET6) { - ret = qemu_rdma_broken_ipv6_kernel(listen_id->verbs, errp); - if (ret) { + ret = qemu_rdma_broken_ipv6_kernel(listen_id->verbs, + local_errp); + if (ret < 0) { continue; } } + error_free(err); break; } rdma_freeaddrinfo(res); if (!e) { - ERROR(errp, "Error: could not rdma_bind_addr!"); + if (err) { + error_propagate(errp, err); + } else { + error_setg(errp, "RDMA ERROR: Error: could not rdma_bind_addr!"); + } goto err_dest_init_bind_addr; } @@ -2735,8 +2715,8 @@ err_dest_init_bind_addr: err_dest_init_create_listen_id: rdma_destroy_event_channel(rdma->channel); rdma->channel = NULL; - rdma->error_state = ret; - return ret; + rdma->errored = true; + return -1; } @@ -2759,30 +2739,28 @@ static void qemu_rdma_return_path_dest_init(RDMAContext *rdma_return_path, rdma_return_path->is_return_path = true; } -static void *qemu_rdma_data_init(const char *host_port, Error **errp) +static RDMAContext *qemu_rdma_data_init(const char *host_port, Error **errp) { RDMAContext *rdma = NULL; InetSocketAddress *addr; - if (host_port) { - rdma = g_new0(RDMAContext, 1); - rdma->current_index = -1; - rdma->current_chunk = -1; - - addr = g_new(InetSocketAddress, 1); - if (!inet_parse(addr, host_port, NULL)) { - rdma->port = atoi(addr->port); - rdma->host = g_strdup(addr->host); - rdma->host_port = g_strdup(host_port); - } else { - ERROR(errp, "bad RDMA migration address '%s'", host_port); - g_free(rdma); - rdma = NULL; - } + rdma = g_new0(RDMAContext, 1); + rdma->current_index = -1; + rdma->current_chunk = -1; - qapi_free_InetSocketAddress(addr); + addr = g_new(InetSocketAddress, 1); + if (!inet_parse(addr, host_port, NULL)) { + rdma->port = atoi(addr->port); + rdma->host = g_strdup(addr->host); + rdma->host_port = g_strdup(host_port); + } else { + error_setg(errp, "RDMA ERROR: bad RDMA migration address '%s'", + host_port); + g_free(rdma); + rdma = NULL; } + qapi_free_InetSocketAddress(addr); return rdma; } @@ -2803,8 +2781,7 @@ static ssize_t qio_channel_rdma_writev(QIOChannel *ioc, RDMAContext *rdma; int ret; ssize_t done = 0; - size_t i; - size_t len = 0; + size_t i, len; RCU_READ_LOCK_GUARD(); rdma = qatomic_rcu_read(&rioc->rdmaout); @@ -2814,16 +2791,19 @@ static ssize_t qio_channel_rdma_writev(QIOChannel *ioc, return -1; } - CHECK_ERROR_STATE(); + if (rdma->errored) { + error_setg(errp, + "RDMA is in an error state waiting migration to abort!"); + return -1; + } /* * Push out any writes that * we're queued up for VM's ram. */ - ret = qemu_rdma_write_flush(rdma); + ret = qemu_rdma_write_flush(rdma, errp); if (ret < 0) { - rdma->error_state = ret; - error_setg(errp, "qemu_rdma_write_flush returned %d", ret); + rdma->errored = true; return -1; } @@ -2839,11 +2819,11 @@ static ssize_t qio_channel_rdma_writev(QIOChannel *ioc, head.len = len; head.type = RDMA_CONTROL_QEMU_FILE; - ret = qemu_rdma_exchange_send(rdma, &head, data, NULL, NULL, NULL); + ret = qemu_rdma_exchange_send(rdma, &head, + data, NULL, NULL, NULL, errp); if (ret < 0) { - rdma->error_state = ret; - error_setg(errp, "qemu_rdma_exchange_send returned %d", ret); + rdma->errored = true; return -1; } @@ -2888,9 +2868,9 @@ static ssize_t qio_channel_rdma_readv(QIOChannel *ioc, QIOChannelRDMA *rioc = QIO_CHANNEL_RDMA(ioc); RDMAContext *rdma; RDMAControlHeader head; - int ret = 0; - ssize_t i; - size_t done = 0; + int ret; + ssize_t done = 0; + size_t i, len; RCU_READ_LOCK_GUARD(); rdma = qatomic_rcu_read(&rioc->rdmain); @@ -2900,7 +2880,11 @@ static ssize_t qio_channel_rdma_readv(QIOChannel *ioc, return -1; } - CHECK_ERROR_STATE(); + if (rdma->errored) { + error_setg(errp, + "RDMA is in an error state waiting migration to abort!"); + return -1; + } for (i = 0; i < niov; i++) { size_t want = iov[i].iov_len; @@ -2911,9 +2895,9 @@ static ssize_t qio_channel_rdma_readv(QIOChannel *ioc, * were given and dish out the bytes until we run * out of bytes. */ - ret = qemu_rdma_fill(rdma, data, want, 0); - done += ret; - want -= ret; + len = qemu_rdma_fill(rdma, data, want, 0); + done += len; + want -= len; /* Got what we needed, so go to next iovec */ if (want == 0) { continue; @@ -2929,20 +2913,20 @@ static ssize_t qio_channel_rdma_readv(QIOChannel *ioc, /* We've got nothing at all, so lets wait for * more to arrive */ - ret = qemu_rdma_exchange_recv(rdma, &head, RDMA_CONTROL_QEMU_FILE); + ret = qemu_rdma_exchange_recv(rdma, &head, RDMA_CONTROL_QEMU_FILE, + errp); if (ret < 0) { - rdma->error_state = ret; - error_setg(errp, "qemu_rdma_exchange_recv returned %d", ret); + rdma->errored = true; return -1; } /* * SEND was received with new bytes, now try again. */ - ret = qemu_rdma_fill(rdma, data, want, 0); - done += ret; - want -= ret; + len = qemu_rdma_fill(rdma, data, want, 0); + done += len; + want -= len; /* Still didn't get enough, so lets just return */ if (want) { @@ -2961,17 +2945,19 @@ static ssize_t qio_channel_rdma_readv(QIOChannel *ioc, */ static int qemu_rdma_drain_cq(RDMAContext *rdma) { + Error *err = NULL; int ret; - if (qemu_rdma_write_flush(rdma) < 0) { - return -EIO; + if (qemu_rdma_write_flush(rdma, &err) < 0) { + error_report_err(err); + return -1; } while (rdma->nb_sent) { ret = qemu_rdma_block_for_wrid(rdma, RDMA_WRID_RDMA_WRITE, NULL); if (ret < 0) { error_report("rdma migration: complete polling error!"); - return -EIO; + return -1; } } @@ -3095,7 +3081,7 @@ qio_channel_rdma_source_finalize(GSource *source) object_unref(OBJECT(ssource->rioc)); } -GSourceFuncs qio_channel_rdma_source_funcs = { +static GSourceFuncs qio_channel_rdma_source_funcs = { qio_channel_rdma_source_prepare, qio_channel_rdma_source_check, qio_channel_rdma_source_dispatch, @@ -3206,21 +3192,21 @@ qio_channel_rdma_shutdown(QIOChannel *ioc, switch (how) { case QIO_CHANNEL_SHUTDOWN_READ: if (rdmain) { - rdmain->error_state = -1; + rdmain->errored = true; } break; case QIO_CHANNEL_SHUTDOWN_WRITE: if (rdmaout) { - rdmaout->error_state = -1; + rdmaout->errored = true; } break; case QIO_CHANNEL_SHUTDOWN_BOTH: default: if (rdmain) { - rdmain->error_state = -1; + rdmain->errored = true; } if (rdmaout) { - rdmaout->error_state = -1; + rdmaout->errored = true; } break; } @@ -3250,6 +3236,7 @@ static int qemu_rdma_save_page(QEMUFile *f, ram_addr_t block_offset, ram_addr_t offset, size_t size) { QIOChannelRDMA *rioc = QIO_CHANNEL_RDMA(qemu_file_get_ioc(f)); + Error *err = NULL; RDMAContext *rdma; int ret; @@ -3261,10 +3248,12 @@ static int qemu_rdma_save_page(QEMUFile *f, ram_addr_t block_offset, rdma = qatomic_rcu_read(&rioc->rdmaout); if (!rdma) { - return -EIO; + return -1; } - CHECK_ERROR_STATE(); + if (rdma_errored(rdma)) { + return -1; + } qemu_fflush(f); @@ -3273,9 +3262,9 @@ static int qemu_rdma_save_page(QEMUFile *f, ram_addr_t block_offset, * is full, or the page doesn't belong to the current chunk, * an actual RDMA write will occur and a new chunk will be formed. */ - ret = qemu_rdma_write(rdma, block_offset, offset, size); + ret = qemu_rdma_write(rdma, block_offset, offset, size, &err); if (ret < 0) { - error_report("rdma migration: write error! %d", ret); + error_report_err(err); goto err; } @@ -3291,7 +3280,7 @@ static int qemu_rdma_save_page(QEMUFile *f, ram_addr_t block_offset, ret = qemu_rdma_poll(rdma, rdma->recv_cq, &wr_id_in, NULL); if (ret < 0) { - error_report("rdma migration: polling error! %d", ret); + error_report("rdma migration: polling error"); goto err; } @@ -3307,7 +3296,7 @@ static int qemu_rdma_save_page(QEMUFile *f, ram_addr_t block_offset, ret = qemu_rdma_poll(rdma, rdma->send_cq, &wr_id_in, NULL); if (ret < 0) { - error_report("rdma migration: polling error! %d", ret); + error_report("rdma migration: polling error"); goto err; } @@ -3319,9 +3308,10 @@ static int qemu_rdma_save_page(QEMUFile *f, ram_addr_t block_offset, } return RAM_SAVE_CONTROL_DELAYED; + err: - rdma->error_state = ret; - return ret; + rdma->errored = true; + return -1; } static void rdma_accept_incoming_migration(void *opaque); @@ -3334,20 +3324,20 @@ static void rdma_cm_poll_handler(void *opaque) MigrationIncomingState *mis = migration_incoming_get_current(); ret = rdma_get_cm_event(rdma->channel, &cm_event); - if (ret) { + if (ret < 0) { error_report("get_cm_event failed %d", errno); return; } if (cm_event->event == RDMA_CM_EVENT_DISCONNECTED || cm_event->event == RDMA_CM_EVENT_DEVICE_REMOVAL) { - if (!rdma->error_state && + if (!rdma->errored && migration_incoming_get_current()->state != MIGRATION_STATUS_COMPLETED) { error_report("receive cm event, cm event is %d", cm_event->event); - rdma->error_state = -EPIPE; + rdma->errored = true; if (rdma->return_path) { - rdma->return_path->error_state = -EPIPE; + rdma->return_path->errored = true; } } rdma_ack_cm_event(cm_event); @@ -3361,6 +3351,7 @@ static void rdma_cm_poll_handler(void *opaque) static int qemu_rdma_accept(RDMAContext *rdma) { + Error *err = NULL; RDMACapabilities cap; struct rdma_conn_param conn_param = { .responder_resources = 2, @@ -3370,11 +3361,11 @@ static int qemu_rdma_accept(RDMAContext *rdma) RDMAContext *rdma_return_path = NULL; struct rdma_cm_event *cm_event; struct ibv_context *verbs; - int ret = -EINVAL; + int ret; int idx; ret = rdma_get_cm_event(rdma->channel, &cm_event); - if (ret) { + if (ret < 0) { goto err_rdma_dest_wait; } @@ -3403,10 +3394,10 @@ static int qemu_rdma_accept(RDMAContext *rdma) network_to_caps(&cap); if (cap.version < 1 || cap.version > RDMA_CONTROL_VERSION_CURRENT) { - error_report("Unknown source RDMA version: %d, bailing...", - cap.version); - rdma_ack_cm_event(cm_event); - goto err_rdma_dest_wait; + error_report("Unknown source RDMA version: %d, bailing...", + cap.version); + rdma_ack_cm_event(cm_event); + goto err_rdma_dest_wait; } /* @@ -3436,34 +3427,30 @@ static int qemu_rdma_accept(RDMAContext *rdma) if (!rdma->verbs) { rdma->verbs = verbs; } else if (rdma->verbs != verbs) { - error_report("ibv context not matching %p, %p!", rdma->verbs, - verbs); - goto err_rdma_dest_wait; + error_report("ibv context not matching %p, %p!", rdma->verbs, + verbs); + goto err_rdma_dest_wait; } qemu_rdma_dump_id("dest_init", verbs); - ret = qemu_rdma_alloc_pd_cq(rdma); - if (ret) { - error_report("rdma migration: error allocating pd and cq!"); + ret = qemu_rdma_alloc_pd_cq(rdma, &err); + if (ret < 0) { + error_report_err(err); goto err_rdma_dest_wait; } ret = qemu_rdma_alloc_qp(rdma); - if (ret) { + if (ret < 0) { error_report("rdma migration: error allocating qp!"); goto err_rdma_dest_wait; } - ret = qemu_rdma_init_ram_blocks(rdma); - if (ret) { - error_report("rdma migration: error initializing ram blocks!"); - goto err_rdma_dest_wait; - } + qemu_rdma_init_ram_blocks(rdma); for (idx = 0; idx < RDMA_WRID_MAX; idx++) { ret = qemu_rdma_reg_control(rdma, idx); - if (ret) { + if (ret < 0) { error_report("rdma: error registering %d control", idx); goto err_rdma_dest_wait; } @@ -3481,14 +3468,14 @@ static int qemu_rdma_accept(RDMAContext *rdma) } ret = rdma_accept(rdma->cm_id, &conn_param); - if (ret) { - error_report("rdma_accept returns %d", ret); + if (ret < 0) { + error_report("rdma_accept failed"); goto err_rdma_dest_wait; } ret = rdma_get_cm_event(rdma->channel, &cm_event); - if (ret) { - error_report("rdma_accept get_cm_event failed %d", ret); + if (ret < 0) { + error_report("rdma_accept get_cm_event failed"); goto err_rdma_dest_wait; } @@ -3501,9 +3488,9 @@ static int qemu_rdma_accept(RDMAContext *rdma) rdma_ack_cm_event(cm_event); rdma->connected = true; - ret = qemu_rdma_post_recv_control(rdma, RDMA_WRID_READY); - if (ret) { - error_report("rdma migration: error posting second control recv"); + ret = qemu_rdma_post_recv_control(rdma, RDMA_WRID_READY, &err); + if (ret < 0) { + error_report_err(err); goto err_rdma_dest_wait; } @@ -3512,10 +3499,10 @@ static int qemu_rdma_accept(RDMAContext *rdma) return 0; err_rdma_dest_wait: - rdma->error_state = ret; + rdma->errored = true; qemu_rdma_cleanup(rdma); g_free(rdma_return_path); - return ret; + return -1; } static int dest_ram_sort_func(const void *a, const void *b) @@ -3548,6 +3535,7 @@ static int qemu_rdma_registration_handle(QEMUFile *f) RDMAControlHeader blocks = { .type = RDMA_CONTROL_RAM_BLOCKS_RESULT, .repeat = 1 }; QIOChannelRDMA *rioc = QIO_CHANNEL_RDMA(qemu_file_get_ioc(f)); + Error *err = NULL; RDMAContext *rdma; RDMALocalBlocks *local; RDMAControlHeader head; @@ -3557,7 +3545,7 @@ static int qemu_rdma_registration_handle(QEMUFile *f) static RDMARegisterResult results[RDMA_CONTROL_MAX_COMMANDS_PER_MESSAGE]; RDMALocalBlock *block; void *host_addr; - int ret = 0; + int ret; int idx = 0; int count = 0; int i = 0; @@ -3566,25 +3554,27 @@ static int qemu_rdma_registration_handle(QEMUFile *f) rdma = qatomic_rcu_read(&rioc->rdmain); if (!rdma) { - return -EIO; + return -1; } - CHECK_ERROR_STATE(); + if (rdma_errored(rdma)) { + return -1; + } local = &rdma->local_ram_blocks; do { trace_qemu_rdma_registration_handle_wait(); - ret = qemu_rdma_exchange_recv(rdma, &head, RDMA_CONTROL_NONE); + ret = qemu_rdma_exchange_recv(rdma, &head, RDMA_CONTROL_NONE, &err); if (ret < 0) { + error_report_err(err); break; } if (head.repeat > RDMA_CONTROL_MAX_COMMANDS_PER_MESSAGE) { error_report("rdma: Too many requests in this message (%d)." "Bailing.", head.repeat); - ret = -EIO; break; } @@ -3600,8 +3590,7 @@ static int qemu_rdma_registration_handle(QEMUFile *f) error_report("rdma: 'compress' bad block index %u (vs %d)", (unsigned int)comp->block_idx, rdma->local_ram_blocks.nb_blocks); - ret = -EIO; - goto out; + goto err; } block = &(rdma->local_ram_blocks.block[comp->block_idx]); @@ -3613,7 +3602,7 @@ static int qemu_rdma_registration_handle(QEMUFile *f) case RDMA_CONTROL_REGISTER_FINISHED: trace_qemu_rdma_registration_handle_finished(); - goto out; + return 0; case RDMA_CONTROL_RAM_BLOCKS_REQUEST: trace_qemu_rdma_registration_handle_ram_blocks(); @@ -3630,11 +3619,10 @@ static int qemu_rdma_registration_handle(QEMUFile *f) } if (rdma->pin_all) { - ret = qemu_rdma_reg_whole_ram_blocks(rdma); - if (ret) { - error_report("rdma migration: error dest " - "registering ram blocks"); - goto out; + ret = qemu_rdma_reg_whole_ram_blocks(rdma, &err); + if (ret < 0) { + error_report_err(err); + goto err; } } @@ -3669,11 +3657,12 @@ static int qemu_rdma_registration_handle(QEMUFile *f) ret = qemu_rdma_post_send_control(rdma, - (uint8_t *) rdma->dest_blocks, &blocks); + (uint8_t *) rdma->dest_blocks, &blocks, + &err); if (ret < 0) { - error_report("rdma migration: error sending remote info"); - goto out; + error_report_err(err); + goto err; } break; @@ -3699,8 +3688,7 @@ static int qemu_rdma_registration_handle(QEMUFile *f) error_report("rdma: 'register' bad block index %u (vs %d)", (unsigned int)reg->current_index, rdma->local_ram_blocks.nb_blocks); - ret = -ENOENT; - goto out; + goto err; } block = &(rdma->local_ram_blocks.block[reg->current_index]); if (block->is_ram_block) { @@ -3709,8 +3697,7 @@ static int qemu_rdma_registration_handle(QEMUFile *f) " offset: %" PRIx64 " current_addr: %" PRIx64, block->block_name, block->offset, reg->key.current_addr); - ret = -ERANGE; - goto out; + goto err; } host_addr = (block->local_host_addr + (reg->key.current_addr - block->offset)); @@ -3725,8 +3712,7 @@ static int qemu_rdma_registration_handle(QEMUFile *f) error_report("rdma: bad chunk for block %s" " chunk: %" PRIx64, block->block_name, reg->key.chunk); - ret = -ERANGE; - goto out; + goto err; } } chunk_start = ram_chunk_start(block, chunk); @@ -3737,8 +3723,7 @@ static int qemu_rdma_registration_handle(QEMUFile *f) (uintptr_t)host_addr, NULL, &tmp_rkey, chunk, chunk_start, chunk_end)) { error_report("cannot get rkey"); - ret = -EINVAL; - goto out; + goto err; } reg_result->rkey = tmp_rkey; @@ -3751,11 +3736,11 @@ static int qemu_rdma_registration_handle(QEMUFile *f) } ret = qemu_rdma_post_send_control(rdma, - (uint8_t *) results, ®_resp); + (uint8_t *) results, ®_resp, &err); if (ret < 0) { - error_report("Failed to send control buffer"); - goto out; + error_report_err(err); + goto err; } break; case RDMA_CONTROL_UNREGISTER_REQUEST: @@ -3776,9 +3761,9 @@ static int qemu_rdma_registration_handle(QEMUFile *f) block->pmr[reg->key.chunk] = NULL; if (ret != 0) { - perror("rdma unregistration chunk failed"); - ret = -ret; - goto out; + error_report("rdma unregistration chunk failed: %s", + strerror(errno)); + goto err; } rdma->total_registrations--; @@ -3787,28 +3772,25 @@ static int qemu_rdma_registration_handle(QEMUFile *f) reg->key.chunk); } - ret = qemu_rdma_post_send_control(rdma, NULL, &unreg_resp); + ret = qemu_rdma_post_send_control(rdma, NULL, &unreg_resp, &err); if (ret < 0) { - error_report("Failed to send control buffer"); - goto out; + error_report_err(err); + goto err; } break; case RDMA_CONTROL_REGISTER_RESULT: error_report("Invalid RESULT message at dest."); - ret = -EIO; - goto out; + goto err; default: error_report("Unknown control message %s", control_desc(head.type)); - ret = -EIO; - goto out; + goto err; } } while (1); -out: - if (ret < 0) { - rdma->error_state = ret; - } - return ret; + +err: + rdma->errored = true; + return -1; } /* Destination: @@ -3830,7 +3812,7 @@ rdma_block_notification_handle(QEMUFile *f, const char *name) rdma = qatomic_rcu_read(&rioc->rdmain); if (!rdma) { - return -EIO; + return -1; } /* Find the matching RAMBlock in our local list */ @@ -3843,7 +3825,7 @@ rdma_block_notification_handle(QEMUFile *f, const char *name) if (found == -1) { error_report("RAMBlock '%s' not found on destination", name); - return -ENOENT; + return -1; } rdma->local_ram_blocks.block[curr].src_index = rdma->next_src_index; @@ -3881,10 +3863,12 @@ static int qemu_rdma_registration_start(QEMUFile *f, RCU_READ_LOCK_GUARD(); rdma = qatomic_rcu_read(&rioc->rdmaout); if (!rdma) { - return -EIO; + return -1; } - CHECK_ERROR_STATE(); + if (rdma_errored(rdma)) { + return -1; + } trace_qemu_rdma_registration_start(flags); qemu_put_be64(f, RAM_SAVE_FLAG_HOOK); @@ -3901,9 +3885,10 @@ static int qemu_rdma_registration_stop(QEMUFile *f, uint64_t flags, void *data) { QIOChannelRDMA *rioc = QIO_CHANNEL_RDMA(qemu_file_get_ioc(f)); + Error *err = NULL; RDMAContext *rdma; RDMAControlHeader head = { .len = 0, .repeat = 1 }; - int ret = 0; + int ret; if (migration_in_postcopy()) { return 0; @@ -3912,10 +3897,12 @@ static int qemu_rdma_registration_stop(QEMUFile *f, RCU_READ_LOCK_GUARD(); rdma = qatomic_rcu_read(&rioc->rdmaout); if (!rdma) { - return -EIO; + return -1; } - CHECK_ERROR_STATE(); + if (rdma_errored(rdma)) { + return -1; + } qemu_fflush(f); ret = qemu_rdma_drain_cq(rdma); @@ -3942,10 +3929,11 @@ static int qemu_rdma_registration_stop(QEMUFile *f, */ ret = qemu_rdma_exchange_send(rdma, &head, NULL, &resp, ®_result_idx, rdma->pin_all ? - qemu_rdma_reg_whole_ram_blocks : NULL); + qemu_rdma_reg_whole_ram_blocks : NULL, + &err); if (ret < 0) { - fprintf(stderr, "receiving remote info!"); - return ret; + error_report_err(err); + return -1; } nb_dest_blocks = resp.len / sizeof(RDMADestBlock); @@ -3963,12 +3951,12 @@ static int qemu_rdma_registration_stop(QEMUFile *f, */ if (local->nb_blocks != nb_dest_blocks) { - fprintf(stderr, "ram blocks mismatch (Number of blocks %d vs %d) " - "Your QEMU command line parameters are probably " - "not identical on both the source and destination.", - local->nb_blocks, nb_dest_blocks); - rdma->error_state = -EINVAL; - return -EINVAL; + error_report("ram blocks mismatch (Number of blocks %d vs %d)", + local->nb_blocks, nb_dest_blocks); + error_printf("Your QEMU command line parameters are probably " + "not identical on both the source and destination."); + rdma->errored = true; + return -1; } qemu_rdma_move_header(rdma, reg_result_idx, &resp); @@ -3979,12 +3967,13 @@ static int qemu_rdma_registration_stop(QEMUFile *f, /* We require that the blocks are in the same order */ if (rdma->dest_blocks[i].length != local->block[i].length) { - fprintf(stderr, "Block %s/%d has a different length %" PRIu64 - "vs %" PRIu64, local->block[i].block_name, i, - local->block[i].length, - rdma->dest_blocks[i].length); - rdma->error_state = -EINVAL; - return -EINVAL; + error_report("Block %s/%d has a different length %" PRIu64 + "vs %" PRIu64, + local->block[i].block_name, i, + local->block[i].length, + rdma->dest_blocks[i].length); + rdma->errored = true; + return -1; } local->block[i].remote_host_addr = rdma->dest_blocks[i].remote_host_addr; @@ -3995,16 +3984,17 @@ static int qemu_rdma_registration_stop(QEMUFile *f, trace_qemu_rdma_registration_stop(flags); head.type = RDMA_CONTROL_REGISTER_FINISHED; - ret = qemu_rdma_exchange_send(rdma, &head, NULL, NULL, NULL, NULL); + ret = qemu_rdma_exchange_send(rdma, &head, NULL, NULL, NULL, NULL, &err); if (ret < 0) { + error_report_err(err); goto err; } return 0; err: - rdma->error_state = ret; - return ret; + rdma->errored = true; + return -1; } static const QEMUFileHooks rdma_read_hooks = { @@ -4096,8 +4086,8 @@ static void rdma_accept_incoming_migration(void *opaque) trace_qemu_rdma_accept_incoming_migration(); ret = qemu_rdma_accept(rdma); - if (ret) { - fprintf(stderr, "RDMA ERROR: Migration initialization failed\n"); + if (ret < 0) { + error_report("RDMA ERROR: Migration initialization failed"); return; } @@ -4109,7 +4099,7 @@ static void rdma_accept_incoming_migration(void *opaque) f = rdma_new_input(rdma); if (f == NULL) { - fprintf(stderr, "RDMA ERROR: could not open RDMA for input\n"); + error_report("RDMA ERROR: could not open RDMA for input"); qemu_rdma_cleanup(rdma); return; } @@ -4125,7 +4115,6 @@ void rdma_start_incoming_migration(const char *host_port, Error **errp) { int ret; RDMAContext *rdma; - Error *local_err = NULL; trace_rdma_start_incoming_migration(); @@ -4135,14 +4124,13 @@ void rdma_start_incoming_migration(const char *host_port, Error **errp) return; } - rdma = qemu_rdma_data_init(host_port, &local_err); + rdma = qemu_rdma_data_init(host_port, errp); if (rdma == NULL) { goto err; } - ret = qemu_rdma_dest_init(rdma, &local_err); - - if (ret) { + ret = qemu_rdma_dest_init(rdma, errp); + if (ret < 0) { goto err; } @@ -4150,8 +4138,8 @@ void rdma_start_incoming_migration(const char *host_port, Error **errp) ret = rdma_listen(rdma->listen_id, 5); - if (ret) { - ERROR(errp, "listening on socket!"); + if (ret < 0) { + error_setg(errp, "RDMA ERROR: listening on socket!"); goto cleanup_rdma; } @@ -4164,7 +4152,6 @@ void rdma_start_incoming_migration(const char *host_port, Error **errp) cleanup_rdma: qemu_rdma_cleanup(rdma); err: - error_propagate(errp, local_err); if (rdma) { g_free(rdma->host); g_free(rdma->host_port); @@ -4178,7 +4165,7 @@ void rdma_start_outgoing_migration(void *opaque, MigrationState *s = opaque; RDMAContext *rdma_return_path = NULL; RDMAContext *rdma; - int ret = 0; + int ret; /* Avoid ram_block_discard_disable(), cannot change during migration. */ if (ram_block_discard_is_required()) { @@ -4193,14 +4180,14 @@ void rdma_start_outgoing_migration(void *opaque, ret = qemu_rdma_source_init(rdma, migrate_rdma_pin_all(), errp); - if (ret) { + if (ret < 0) { goto err; } trace_rdma_start_outgoing_migration_after_rdma_source_init(); - ret = qemu_rdma_connect(rdma, errp, false); + ret = qemu_rdma_connect(rdma, false, errp); - if (ret) { + if (ret < 0) { goto err; } @@ -4215,13 +4202,13 @@ void rdma_start_outgoing_migration(void *opaque, ret = qemu_rdma_source_init(rdma_return_path, migrate_rdma_pin_all(), errp); - if (ret) { + if (ret < 0) { goto return_path_err; } - ret = qemu_rdma_connect(rdma_return_path, errp, true); + ret = qemu_rdma_connect(rdma_return_path, true, errp); - if (ret) { + if (ret < 0) { goto return_path_err; } diff --git a/migration/savevm.c b/migration/savevm.c index 60eec7c31f..497ce02bd7 100644 --- a/migration/savevm.c +++ b/migration/savevm.c @@ -2734,7 +2734,8 @@ static bool postcopy_pause_incoming(MigrationIncomingState *mis) qemu_mutex_unlock(&mis->postcopy_prio_thread_mutex); } - migrate_set_state(&mis->state, MIGRATION_STATUS_POSTCOPY_ACTIVE, + /* Current state can be either ACTIVE or RECOVER */ + migrate_set_state(&mis->state, mis->state, MIGRATION_STATUS_POSTCOPY_PAUSED); /* Notify the fault thread for the invalidated file handle */ diff --git a/migration/trace-events b/migration/trace-events index 002abe3a4e..ee9c8f4d63 100644 --- a/migration/trace-events +++ b/migration/trace-events @@ -208,12 +208,14 @@ qemu_rdma_accept_incoming_migration(void) "" qemu_rdma_accept_incoming_migration_accepted(void) "" qemu_rdma_accept_pin_state(bool pin) "%d" qemu_rdma_accept_pin_verbsc(void *verbs) "Verbs context after listen: %p" -qemu_rdma_block_for_wrid_miss(const char *wcompstr, int wcomp, const char *gcompstr, uint64_t req) "A Wanted wrid %s (%d) but got %s (%" PRIu64 ")" +qemu_rdma_block_for_wrid_miss(uint64_t wcomp, uint64_t req) "A Wanted wrid %" PRIu64 " but got %" PRIu64 qemu_rdma_cleanup_disconnect(void) "" qemu_rdma_close(void) "" qemu_rdma_connect_pin_all_requested(void) "" qemu_rdma_connect_pin_all_outcome(bool pin) "%d" qemu_rdma_dest_init_trying(const char *host, const char *ip) "%s => %s" +qemu_rdma_dump_id_failed(const char *who) "%s RDMA Device opened, but can't query port information" +qemu_rdma_dump_id(const char *who, const char *name, const char *dev_name, const char *dev_path, const char *ibdev_path, int transport, const char *transport_name) "%s RDMA Device opened: kernel name %s uverbs device name %s, infiniband_verbs class device path %s, infiniband class device path %s, transport: (%d) %s" qemu_rdma_dump_gid(const char *who, const char *src, const char *dst) "%s Source GID: %s, Dest GID: %s" qemu_rdma_exchange_get_response_start(const char *desc) "CONTROL: %s receiving..." qemu_rdma_exchange_get_response_none(const char *desc, int type) "Surprise: got %s (%d)" @@ -222,9 +224,9 @@ qemu_rdma_exchange_send_waiting(const char *desc) "Waiting for response %s" qemu_rdma_exchange_send_received(const char *desc) "Response %s received." qemu_rdma_fill(size_t control_len, size_t size) "RDMA %zd of %zd bytes already in buffer" qemu_rdma_init_ram_blocks(int blocks) "Allocated %d local ram block structures" -qemu_rdma_poll_recv(const char *compstr, int64_t comp, int64_t id, int sent) "completion %s #%" PRId64 " received (%" PRId64 ") left %d" -qemu_rdma_poll_write(const char *compstr, int64_t comp, int left, uint64_t block, uint64_t chunk, void *local, void *remote) "completions %s (%" PRId64 ") left %d, block %" PRIu64 ", chunk: %" PRIu64 " %p %p" -qemu_rdma_poll_other(const char *compstr, int64_t comp, int left) "other completion %s (%" PRId64 ") received left %d" +qemu_rdma_poll_recv(uint64_t comp, int64_t id, int sent) "completion %" PRIu64 " received (%" PRId64 ") left %d" +qemu_rdma_poll_write(uint64_t comp, int left, uint64_t block, uint64_t chunk, void *local, void *remote) "completions %" PRIu64 " left %d, block %" PRIu64 ", chunk: %" PRIu64 " %p %p" +qemu_rdma_poll_other(uint64_t comp, int left) "other completion %" PRIu64 " received left %d" qemu_rdma_post_send_control(const char *desc) "CONTROL: sending %s.." qemu_rdma_register_and_get_keys(uint64_t len, void *start) "Registering %" PRIu64 " bytes @ %p" qemu_rdma_register_odp_mr(const char *name) "Try to register On-Demand Paging memory region: %s" diff --git a/qapi/migration.json b/qapi/migration.json index d8f3bbd7b0..d7dfaa5db9 100644 --- a/qapi/migration.json +++ b/qapi/migration.json @@ -230,9 +230,8 @@ # throttled during auto-converge. This is only present when # auto-converge has started throttling guest cpus. (Since 2.7) # -# @error-desc: the human readable error description string, when -# @status is 'failed'. Clients should not attempt to parse the -# error strings. (Since 2.7) +# @error-desc: the human readable error description string. Clients +# should not attempt to parse the error strings. (Since 2.7) # # @postcopy-blocktime: total time when all vCPU were blocked during # postcopy live migration. This is only present when the diff --git a/tests/qtest/libqtest.c b/tests/qtest/libqtest.c index 3f94a4f477..dc7a55634c 100644 --- a/tests/qtest/libqtest.c +++ b/tests/qtest/libqtest.c @@ -1259,6 +1259,28 @@ void qtest_memset(QTestState *s, uint64_t addr, uint8_t pattern, size_t size) qtest_rsp(s); } +QDict *qtest_vqmp_assert_failure_ref(QTestState *qts, + const char *fmt, va_list args) +{ + QDict *response; + QDict *ret; + + response = qtest_vqmp(qts, fmt, args); + + g_assert(response); + if (!qdict_haskey(response, "error")) { + g_autoptr(GString) s = qobject_to_json_pretty(QOBJECT(response), true); + g_test_message("%s", s->str); + } + g_assert(qdict_haskey(response, "error")); + g_assert(!qdict_haskey(response, "return")); + ret = qdict_get_qdict(response, "error"); + qobject_ref(ret); + qobject_unref(response); + + return ret; +} + QDict *qtest_vqmp_assert_success_ref(QTestState *qts, const char *fmt, va_list args) { @@ -1321,6 +1343,17 @@ void qtest_vqmp_fds_assert_success(QTestState *qts, int *fds, size_t nfds, } #endif /* !_WIN32 */ +QDict *qtest_qmp_assert_failure_ref(QTestState *qts, const char *fmt, ...) +{ + QDict *response; + va_list ap; + + va_start(ap, fmt); + response = qtest_vqmp_assert_failure_ref(qts, fmt, ap); + va_end(ap); + return response; +} + QDict *qtest_qmp_assert_success_ref(QTestState *qts, const char *fmt, ...) { QDict *response; diff --git a/tests/qtest/libqtest.h b/tests/qtest/libqtest.h index e53e350e3a..5fe3d13466 100644 --- a/tests/qtest/libqtest.h +++ b/tests/qtest/libqtest.h @@ -811,6 +811,34 @@ void qtest_vqmp_fds_assert_success(QTestState *qts, int *fds, size_t nfds, #endif /* !_WIN32 */ /** + * qtest_qmp_assert_failure_ref: + * @qts: QTestState instance to operate on + * @fmt: QMP message to send to qemu, formatted like + * qobject_from_jsonf_nofail(). See parse_interpolation() for what's + * supported after '%'. + * + * Sends a QMP message to QEMU, asserts that an 'error' key is present in + * the response, and returns the response. + */ +QDict *qtest_qmp_assert_failure_ref(QTestState *qts, const char *fmt, ...) + G_GNUC_PRINTF(2, 3); + +/** + * qtest_vqmp_assert_failure_ref: + * @qts: QTestState instance to operate on + * @fmt: QMP message to send to qemu, formatted like + * qobject_from_jsonf_nofail(). See parse_interpolation() for what's + * supported after '%'. + * @args: variable arguments for @fmt + * + * Sends a QMP message to QEMU, asserts that an 'error' key is present in + * the response, and returns the response. + */ +QDict *qtest_vqmp_assert_failure_ref(QTestState *qts, + const char *fmt, va_list args) + G_GNUC_PRINTF(2, 0); + +/** * qtest_qmp_assert_success_ref: * @qts: QTestState instance to operate on * @fmt: QMP message to send to qemu, formatted like diff --git a/tests/qtest/meson.build b/tests/qtest/meson.build index 1fba07f4ed..66795cfcd2 100644 --- a/tests/qtest/meson.build +++ b/tests/qtest/meson.build @@ -317,6 +317,7 @@ qtests = { 'tpm-tis-i2c-test': [io, tpmemu_files, 'qtest_aspeed.c'], 'tpm-tis-device-swtpm-test': [io, tpmemu_files, 'tpm-tis-util.c'], 'tpm-tis-device-test': [io, tpmemu_files, 'tpm-tis-util.c'], + 'virtio-net-failover': files('migration-helpers.c'), 'vmgenid-test': files('boot-sector.c', 'acpi-utils.c'), 'netdev-socket': files('netdev-socket.c', '../unit/socket-helpers.c'), } diff --git a/tests/qtest/migration-helpers.c b/tests/qtest/migration-helpers.c index be00c52d00..0c185db450 100644 --- a/tests/qtest/migration-helpers.c +++ b/tests/qtest/migration-helpers.c @@ -49,6 +49,26 @@ bool migrate_watch_for_resume(QTestState *who, const char *name, return false; } +void migrate_qmp_fail(QTestState *who, const char *uri, const char *fmt, ...) +{ + va_list ap; + QDict *args, *err; + + va_start(ap, fmt); + args = qdict_from_vjsonf_nofail(fmt, ap); + va_end(ap); + + g_assert(!qdict_haskey(args, "uri")); + qdict_put_str(args, "uri", uri); + + err = qtest_qmp_assert_failure_ref( + who, "{ 'execute': 'migrate', 'arguments': %p}", args); + + g_assert(qdict_haskey(err, "desc")); + + qobject_unref(err); +} + /* * Send QMP command "migrate". * Arguments are built from @fmt... (formatted like @@ -70,6 +90,46 @@ void migrate_qmp(QTestState *who, const char *uri, const char *fmt, ...) "{ 'execute': 'migrate', 'arguments': %p}", args); } +void migrate_set_capability(QTestState *who, const char *capability, + bool value) +{ + qtest_qmp_assert_success(who, + "{ 'execute': 'migrate-set-capabilities'," + "'arguments': { " + "'capabilities': [ { " + "'capability': %s, 'state': %i } ] } }", + capability, value); +} + +void migrate_incoming_qmp(QTestState *to, const char *uri, const char *fmt, ...) +{ + va_list ap; + QDict *args, *rsp, *data; + + va_start(ap, fmt); + args = qdict_from_vjsonf_nofail(fmt, ap); + va_end(ap); + + g_assert(!qdict_haskey(args, "uri")); + qdict_put_str(args, "uri", uri); + + migrate_set_capability(to, "events", true); + + rsp = qtest_qmp(to, "{ 'execute': 'migrate-incoming', 'arguments': %p}", + args); + g_assert(qdict_haskey(rsp, "return")); + qobject_unref(rsp); + + rsp = qtest_qmp_eventwait_ref(to, "MIGRATION"); + g_assert(qdict_haskey(rsp, "data")); + + data = qdict_get_qdict(rsp, "data"); + g_assert(qdict_haskey(data, "status")); + g_assert_cmpstr(qdict_get_str(data, "status"), ==, "setup"); + + qobject_unref(rsp); +} + /* * Note: caller is responsible to free the returned object via * qobject_unref() after use diff --git a/tests/qtest/migration-helpers.h b/tests/qtest/migration-helpers.h index 009e250e90..4f51d0f8bc 100644 --- a/tests/qtest/migration-helpers.h +++ b/tests/qtest/migration-helpers.h @@ -23,6 +23,16 @@ bool migrate_watch_for_resume(QTestState *who, const char *name, G_GNUC_PRINTF(3, 4) void migrate_qmp(QTestState *who, const char *uri, const char *fmt, ...); +G_GNUC_PRINTF(3, 4) +void migrate_incoming_qmp(QTestState *who, const char *uri, + const char *fmt, ...); + +G_GNUC_PRINTF(3, 4) +void migrate_qmp_fail(QTestState *who, const char *uri, const char *fmt, ...); + +void migrate_set_capability(QTestState *who, const char *capability, + bool value); + QDict *migrate_query(QTestState *who); QDict *migrate_query_not_failed(QTestState *who); diff --git a/tests/qtest/migration-test.c b/tests/qtest/migration-test.c index 46f1c275a2..8eb2053dbb 100644 --- a/tests/qtest/migration-test.c +++ b/tests/qtest/migration-test.c @@ -605,17 +605,6 @@ static void migrate_cancel(QTestState *who) qtest_qmp_assert_success(who, "{ 'execute': 'migrate_cancel' }"); } -static void migrate_set_capability(QTestState *who, const char *capability, - bool value) -{ - qtest_qmp_assert_success(who, - "{ 'execute': 'migrate-set-capabilities'," - "'arguments': { " - "'capabilities': [ { " - "'capability': %s, 'state': %i } ] } }", - capability, value); -} - static void migrate_postcopy_start(QTestState *from, QTestState *to) { qtest_qmp_assert_success(from, "{ 'execute': 'migrate-start-postcopy' }"); @@ -708,6 +697,8 @@ typedef struct { MIG_TEST_FAIL, /* This test should fail, dest qemu should fail with abnormal status */ MIG_TEST_FAIL_DEST_QUIT_ERR, + /* The QMP command for this migration should fail with an error */ + MIG_TEST_QMP_ERROR, } result; /* @@ -1514,6 +1505,7 @@ static void test_precopy_common(MigrateCommon *args) { QTestState *from, *to; void *data_hook = NULL; + g_autofree char *connect_uri = NULL; if (test_migrate_start(&from, &to, args->listen_uri, &args->start)) { return; @@ -1548,13 +1540,17 @@ static void test_precopy_common(MigrateCommon *args) } if (!args->connect_uri) { - g_autofree char *local_connect_uri = - migrate_get_socket_address(to, "socket-address"); - migrate_qmp(from, local_connect_uri, "{}"); + connect_uri = migrate_get_socket_address(to, "socket-address"); } else { - migrate_qmp(from, args->connect_uri, "{}"); + connect_uri = g_strdup(args->connect_uri); + } + + if (args->result == MIG_TEST_QMP_ERROR) { + migrate_qmp_fail(from, connect_uri, "{}"); + goto finish; } + migrate_qmp(from, connect_uri, "{}"); if (args->result != MIG_TEST_SUCCEED) { bool allow_active = args->result == MIG_TEST_FAIL; @@ -1606,6 +1602,7 @@ static void test_precopy_common(MigrateCommon *args) wait_for_serial("dest_serial"); } +finish: if (args->finish_hook) { args->finish_hook(from, to, data_hook); } @@ -1981,8 +1978,7 @@ static void *test_migrate_fd_start_hook(QTestState *from, close(pair[0]); /* Start incoming migration from the 1st socket */ - qtest_qmp_assert_success(to, "{ 'execute': 'migrate-incoming'," - " 'arguments': { 'uri': 'fd:fd-mig' }}"); + migrate_incoming_qmp(to, "fd:fd-mig", "{}"); /* Send the 2nd socket to the target */ qtest_qmp_fds_assert_success(from, &pair[1], 1, @@ -2204,8 +2200,7 @@ test_migrate_precopy_tcp_multifd_start_common(QTestState *from, migrate_set_capability(to, "multifd", true); /* Start incoming migration from the 1st socket */ - qtest_qmp_assert_success(to, "{ 'execute': 'migrate-incoming'," - " 'arguments': { 'uri': 'tcp:127.0.0.1:0' }}"); + migrate_incoming_qmp(to, "tcp:127.0.0.1:0", "{}"); return NULL; } @@ -2458,8 +2453,7 @@ static void test_multifd_tcp_cancel(void) migrate_set_capability(to, "multifd", true); /* Start incoming migration from the 1st socket */ - qtest_qmp_assert_success(to, "{ 'execute': 'migrate-incoming'," - " 'arguments': { 'uri': 'tcp:127.0.0.1:0' }}"); + migrate_incoming_qmp(to, "tcp:127.0.0.1:0", "{}"); /* Wait for the first serial output from the source */ wait_for_serial("src_serial"); @@ -2489,8 +2483,7 @@ static void test_multifd_tcp_cancel(void) migrate_set_capability(to2, "multifd", true); /* Start incoming migration from the 1st socket */ - qtest_qmp_assert_success(to2, "{ 'execute': 'migrate-incoming'," - " 'arguments': { 'uri': 'tcp:127.0.0.1:0' }}"); + migrate_incoming_qmp(to2, "tcp:127.0.0.1:0", "{}"); g_free(uri); uri = migrate_get_socket_address(to2, "socket-address"); diff --git a/tests/qtest/virtio-net-failover.c b/tests/qtest/virtio-net-failover.c index 4a809590bf..0d40bc1f2d 100644 --- a/tests/qtest/virtio-net-failover.c +++ b/tests/qtest/virtio-net-failover.c @@ -11,6 +11,7 @@ #include "libqtest.h" #include "libqos/pci.h" #include "libqos/pci-pc.h" +#include "migration-helpers.h" #include "qapi/qmp/qdict.h" #include "qapi/qmp/qlist.h" #include "qapi/qmp/qjson.h" @@ -736,26 +737,10 @@ static void test_migrate_out(gconstpointer opaque) machine_stop(qts); } -static QDict *get_migration_event(QTestState *qts) -{ - QDict *resp; - QDict *data; - - resp = qtest_qmp_eventwait_ref(qts, "MIGRATION"); - g_assert(qdict_haskey(resp, "data")); - - data = qdict_get_qdict(resp, "data"); - g_assert(qdict_haskey(data, "status")); - qobject_ref(data); - qobject_unref(resp); - - return data; -} - static void test_migrate_in(gconstpointer opaque) { QTestState *qts; - QDict *resp, *args, *ret; + QDict *resp, *ret; g_autofree gchar *uri = g_strdup_printf("exec: cat %s", (gchar *)opaque); qts = machine_start(BASE_MACHINE @@ -787,18 +772,7 @@ static void test_migrate_in(gconstpointer opaque) check_one_card(qts, true, "standby0", MAC_STANDBY0); check_one_card(qts, false, "primary0", MAC_PRIMARY0); - args = qdict_from_jsonf_nofail("{}"); - g_assert_nonnull(args); - qdict_put_str(args, "uri", uri); - - resp = qtest_qmp(qts, "{ 'execute': 'migrate-incoming', 'arguments': %p}", - args); - g_assert(qdict_haskey(resp, "return")); - qobject_unref(resp); - - resp = get_migration_event(qts); - g_assert_cmpstr(qdict_get_str(resp, "status"), ==, "setup"); - qobject_unref(resp); + migrate_incoming_qmp(qts, uri, "{}"); resp = get_failover_negociated_event(qts); g_assert_cmpstr(qdict_get_str(resp, "device-id"), ==, "standby0"); @@ -888,7 +862,7 @@ static void test_off_migrate_out(gconstpointer opaque) static void test_off_migrate_in(gconstpointer opaque) { QTestState *qts; - QDict *resp, *args, *ret; + QDict *ret; g_autofree gchar *uri = g_strdup_printf("exec: cat %s", (gchar *)opaque); qts = machine_start(BASE_MACHINE @@ -920,18 +894,7 @@ static void test_off_migrate_in(gconstpointer opaque) check_one_card(qts, true, "standby0", MAC_STANDBY0); check_one_card(qts, true, "primary0", MAC_PRIMARY0); - args = qdict_from_jsonf_nofail("{}"); - g_assert_nonnull(args); - qdict_put_str(args, "uri", uri); - - resp = qtest_qmp(qts, "{ 'execute': 'migrate-incoming', 'arguments': %p}", - args); - g_assert(qdict_haskey(resp, "return")); - qobject_unref(resp); - - resp = get_migration_event(qts); - g_assert_cmpstr(qdict_get_str(resp, "status"), ==, "setup"); - qobject_unref(resp); + migrate_incoming_qmp(qts, uri, "{}"); check_one_card(qts, true, "standby0", MAC_STANDBY0); check_one_card(qts, true, "primary0", MAC_PRIMARY0); @@ -1026,7 +989,7 @@ static void test_guest_off_migrate_out(gconstpointer opaque) static void test_guest_off_migrate_in(gconstpointer opaque) { QTestState *qts; - QDict *resp, *args, *ret; + QDict *ret; g_autofree gchar *uri = g_strdup_printf("exec: cat %s", (gchar *)opaque); qts = machine_start(BASE_MACHINE @@ -1058,18 +1021,7 @@ static void test_guest_off_migrate_in(gconstpointer opaque) check_one_card(qts, true, "standby0", MAC_STANDBY0); check_one_card(qts, false, "primary0", MAC_PRIMARY0); - args = qdict_from_jsonf_nofail("{}"); - g_assert_nonnull(args); - qdict_put_str(args, "uri", uri); - - resp = qtest_qmp(qts, "{ 'execute': 'migrate-incoming', 'arguments': %p}", - args); - g_assert(qdict_haskey(resp, "return")); - qobject_unref(resp); - - resp = get_migration_event(qts); - g_assert_cmpstr(qdict_get_str(resp, "status"), ==, "setup"); - qobject_unref(resp); + migrate_incoming_qmp(qts, uri, "{}"); check_one_card(qts, true, "standby0", MAC_STANDBY0); check_one_card(qts, false, "primary0", MAC_PRIMARY0); @@ -1728,7 +1680,7 @@ static void test_multi_out(gconstpointer opaque) static void test_multi_in(gconstpointer opaque) { QTestState *qts; - QDict *resp, *args, *ret; + QDict *resp, *ret; g_autofree gchar *uri = g_strdup_printf("exec: cat %s", (gchar *)opaque); qts = machine_start(BASE_MACHINE @@ -1794,18 +1746,7 @@ static void test_multi_in(gconstpointer opaque) check_one_card(qts, true, "standby1", MAC_STANDBY1); check_one_card(qts, false, "primary1", MAC_PRIMARY1); - args = qdict_from_jsonf_nofail("{}"); - g_assert_nonnull(args); - qdict_put_str(args, "uri", uri); - - resp = qtest_qmp(qts, "{ 'execute': 'migrate-incoming', 'arguments': %p}", - args); - g_assert(qdict_haskey(resp, "return")); - qobject_unref(resp); - - resp = get_migration_event(qts); - g_assert_cmpstr(qdict_get_str(resp, "status"), ==, "setup"); - qobject_unref(resp); + migrate_incoming_qmp(qts, uri, "{}"); resp = get_failover_negociated_event(qts); g_assert_cmpstr(qdict_get_str(resp, "device-id"), ==, "standby0"); |