/* * QEMU System Emulator * * Copyright (c) 2003-2008 Fabrice Bellard * * Permission is hereby granted, free of charge, to any person obtaining a copy * of this software and associated documentation files (the "Software"), to deal * in the Software without restriction, including without limitation the rights * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell * copies of the Software, and to permit persons to whom the Software is * furnished to do so, subject to the following conditions: * * The above copyright notice and this permission notice shall be included in * all copies or substantial portions of the Software. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN * THE SOFTWARE. */ #include "config-host.h" #include "qemu-common.h" #include "hw/hw.h" #include "hw/qdev.h" #include "net/net.h" #include "monitor/monitor.h" #include "sysemu/sysemu.h" #include "qemu/timer.h" #include "audio/audio.h" #include "migration/migration.h" #include "qemu/sockets.h" #include "qemu/queue.h" #include "sysemu/cpus.h" #include "exec/memory.h" #include "qmp-commands.h" #include "trace.h" #include "qemu/iov.h" #include "block/snapshot.h" #include "block/qapi.h" #define SELF_ANNOUNCE_ROUNDS 5 #ifndef ETH_P_RARP #define ETH_P_RARP 0x8035 #endif #define ARP_HTYPE_ETH 0x0001 #define ARP_PTYPE_IP 0x0800 #define ARP_OP_REQUEST_REV 0x3 static int announce_self_create(uint8_t *buf, uint8_t *mac_addr) { /* Ethernet header. */ memset(buf, 0xff, 6); /* destination MAC addr */ memcpy(buf + 6, mac_addr, 6); /* source MAC addr */ *(uint16_t *)(buf + 12) = htons(ETH_P_RARP); /* ethertype */ /* RARP header. */ *(uint16_t *)(buf + 14) = htons(ARP_HTYPE_ETH); /* hardware addr space */ *(uint16_t *)(buf + 16) = htons(ARP_PTYPE_IP); /* protocol addr space */ *(buf + 18) = 6; /* hardware addr length (ethernet) */ *(buf + 19) = 4; /* protocol addr length (IPv4) */ *(uint16_t *)(buf + 20) = htons(ARP_OP_REQUEST_REV); /* opcode */ memcpy(buf + 22, mac_addr, 6); /* source hw addr */ memset(buf + 28, 0x00, 4); /* source protocol addr */ memcpy(buf + 32, mac_addr, 6); /* target hw addr */ memset(buf + 38, 0x00, 4); /* target protocol addr */ /* Padding to get up to 60 bytes (ethernet min packet size, minus FCS). */ memset(buf + 42, 0x00, 18); return 60; /* len (FCS will be added by hardware) */ } static void qemu_announce_self_iter(NICState *nic, void *opaque) { uint8_t buf[60]; int len; len = announce_self_create(buf, nic->conf->macaddr.a); qemu_send_packet_raw(qemu_get_queue(nic), buf, len); } static void qemu_announce_self_once(void *opaque) { static int count = SELF_ANNOUNCE_ROUNDS; QEMUTimer *timer = *(QEMUTimer **)opaque; qemu_foreach_nic(qemu_announce_self_iter, NULL); if (--count) { /* delay 50ms, 150ms, 250ms, ... */ timer_mod(timer, qemu_clock_get_ms(QEMU_CLOCK_REALTIME) + 50 + (SELF_ANNOUNCE_ROUNDS - count - 1) * 100); } else { timer_del(timer); timer_free(timer); } } void qemu_announce_self(void) { static QEMUTimer *timer; timer = timer_new_ms(QEMU_CLOCK_REALTIME, qemu_announce_self_once, &timer); qemu_announce_self_once(&timer); } /***********************************************************/ /* savevm/loadvm support */ #define IO_BUF_SIZE 32768 #define MAX_IOV_SIZE MIN(IOV_MAX, 64) struct QEMUFile { const QEMUFileOps *ops; void *opaque; int64_t bytes_xfer; int64_t xfer_limit; int64_t pos; /* start of buffer when writing, end of buffer when reading */ int buf_index; int buf_size; /* 0 when writing */ uint8_t buf[IO_BUF_SIZE]; struct iovec iov[MAX_IOV_SIZE]; unsigned int iovcnt; int last_error; }; typedef struct QEMUFileStdio { FILE *stdio_file; QEMUFile *file; } QEMUFileStdio; typedef struct QEMUFileSocket { int fd; QEMUFile *file; } QEMUFileSocket; static ssize_t socket_writev_buffer(void *opaque, struct iovec *iov, int iovcnt, int64_t pos) { QEMUFileSocket *s = opaque; ssize_t len; ssize_t size = iov_size(iov, iovcnt); len = iov_send(s->fd, iov, iovcnt, 0, size); if (len < size) { len = -socket_error(); } return len; } static int socket_get_fd(void *opaque) { QEMUFileSocket *s = opaque; return s->fd; } static int socket_get_buffer(void *opaque, uint8_t *buf, int64_t pos, int size) { QEMUFileSocket *s = opaque; ssize_t len; for (;;) { len = qemu_recv(s->fd, buf, size, 0); if (len != -1) { break; } if (socket_error() == EAGAIN) { yield_until_fd_readable(s->fd); } else if (socket_error() != EINTR) { break; } } if (len == -1) { len = -socket_error(); } return len; } static int socket_close(void *opaque) { QEMUFileSocket *s = opaque; closesocket(s->fd); g_free(s); return 0; } static int stdio_get_fd(void *opaque) { QEMUFileStdio *s = opaque; return fileno(s->stdio_file); } static int stdio_put_buffer(void *opaque, const uint8_t *buf, int64_t pos, int size) { QEMUFileStdio *s = opaque; return fwrite(buf, 1, size, s->stdio_file); } static int stdio_get_buffer(void *opaque, uint8_t *buf, int64_t pos, int size) { QEMUFileStdio *s = opaque; FILE *fp = s->stdio_file; int bytes; for (;;) { clearerr(fp); bytes = fread(buf, 1, size, fp); if (bytes != 0 || !ferror(fp)) { break; } if (errno == EAGAIN) { yield_until_fd_readable(fileno(fp)); } else if (errno != EINTR) { break; } } return bytes; } static int stdio_pclose(void *opaque) { QEMUFileStdio *s = opaque; int ret; ret = pclose(s->stdio_file); if (ret == -1) { ret = -errno; } else if (!WIFEXITED(ret) || WEXITSTATUS(ret) != 0) { /* close succeeded, but non-zero exit code: */ ret = -EIO; /* fake errno value */ } g_free(s); return ret; } static int stdio_fclose(void *opaque) { QEMUFileStdio *s = opaque; int ret = 0; if (s->file->ops->put_buffer || s->file->ops->writev_buffer) { int fd = fileno(s->stdio_file); struct stat st; ret = fstat(fd, &st); if (ret == 0 && S_ISREG(st.st_mode)) { /* * If the file handle is a regular file make sure the * data is flushed to disk before signaling success. */ ret = fsync(fd); if (ret != 0) { ret = -errno; return ret; } } } if (fclose(s->stdio_file) == EOF) { ret = -errno; } g_free(s); return ret; } static const QEMUFileOps stdio_pipe_read_ops = { .get_fd = stdio_get_fd, .get_buffer = stdio_get_buffer, .close = stdio_pclose }; static const QEMUFileOps stdio_pipe_write_ops = { .get_fd = stdio_get_fd, .put_buffer = stdio_put_buffer, .close = stdio_pclose }; QEMUFile *qemu_popen_cmd(const char *command, const char *mode) { FILE *stdio_file; QEMUFileStdio *s; if (mode == NULL || (mode[0] != 'r' && mode[0] != 'w') || mode[1] != 0) { fprintf(stderr, "qemu_popen: Argument validity check failed\n"); return NULL; } stdio_file = popen(command, mode); if (stdio_file == NULL) { return NULL; } s = g_malloc0(sizeof(QEMUFileStdio)); s->stdio_file = stdio_file; if (mode[0] == 'r') { s->file = qemu_fopen_ops(s, &stdio_pipe_read_ops); } else { s->file = qemu_fopen_ops(s, &stdio_pipe_write_ops); } return s->file; } static const QEMUFileOps stdio_file_read_ops = { .get_fd = stdio_get_fd, .get_buffer = stdio_get_buffer, .close = stdio_fclose }; static const QEMUFileOps stdio_file_write_ops = { .get_fd = stdio_get_fd, .put_buffer = stdio_put_buffer, .close = stdio_fclose }; static ssize_t unix_writev_buffer(void *opaque, struct iovec *iov, int iovcnt, int64_t pos) { QEMUFileSocket *s = opaque; ssize_t len, offset; ssize_t size = iov_size(iov, iovcnt); ssize_t total = 0; assert(iovcnt > 0); offset = 0; while (size > 0) { /* Find the next start position; skip all full-sized vector elements */ while (offset >= iov[0].iov_len) { offset -= iov[0].iov_len; iov++, iovcnt--; } /* skip `offset' bytes from the (now) first element, undo it on exit */ assert(iovcnt > 0); iov[0].iov_base += offset; iov[0].iov_len -= offset; do { len = writev(s->fd, iov, iovcnt); } while (len == -1 && errno == EINTR); if (len == -1) { return -errno; } /* Undo the changes above */ iov[0].iov_base -= offset; iov[0].iov_len += offset; /* Prepare for the next iteration */ offset += len; total += len; size -= len; } return total; } static int unix_get_buffer(void *opaque, uint8_t *buf, int64_t pos, int size) { QEMUFileSocket *s = opaque; ssize_t len; for (;;) { len = read(s->fd, buf, size); if (len != -1) { break; } if (errno == EAGAIN) { yield_until_fd_readable(s->fd); } else if (errno != EINTR) { break; } } if (len == -1) { len = -errno; } return len; } static int unix_close(void *opaque) { QEMUFileSocket *s = opaque; close(s->fd); g_free(s); return 0; } static const QEMUFileOps unix_read_ops = { .get_fd = socket_get_fd, .get_buffer = unix_get_buffer, .close = unix_close }; static const QEMUFileOps unix_write_ops = { .get_fd = socket_get_fd, .writev_buffer = unix_writev_buffer, .close = unix_close }; QEMUFile *qemu_fdopen(int fd, const char *mode) { QEMUFileSocket *s; if (mode == NULL || (mode[0] != 'r' && mode[0] != 'w') || mode[1] != 'b' || mode[2] != 0) { fprintf(stderr, "qemu_fdopen: Argument validity check failed\n"); return NULL; } s = g_malloc0(sizeof(QEMUFileSocket)); s->fd = fd; if (mode[0] == 'r') { s->file = qemu_fopen_ops(s, &unix_read_ops); } else { s->file = qemu_fopen_ops(s, &unix_write_ops); } return s->file; } static const QEMUFileOps socket_read_ops = { .get_fd = socket_get_fd, .get_buffer = socket_get_buffer, .close = socket_close }; static const QEMUFileOps socket_write_ops = { .get_fd = socket_get_fd, .writev_buffer = socket_writev_buffer, .close = socket_close }; bool qemu_file_mode_is_not_valid(const char *mode) { if (mode == NULL || (mode[0] != 'r' && mode[0] != 'w') || mode[1] != 'b' || mode[2] != 0) { fprintf(stderr, "qemu_fopen: Argument validity check failed\n"); return true; } return false; } QEMUFile *qemu_fopen_socket(int fd, const char *mode) { QEMUFileSocket *s; if (qemu_file_mode_is_not_valid(mode)) { return NULL; } s = g_malloc0(sizeof(QEMUFileSocket)); s->fd = fd; if (mode[0] == 'w') { qemu_set_block(s->fd); s->file = qemu_fopen_ops(s, &socket_write_ops); } else { s->file = qemu_fopen_ops(s, &socket_read_ops); } return s->file; } QEMUFile *qemu_fopen(const char *filename, const char *mode) { QEMUFileStdio *s; if (qemu_file_mode_is_not_valid(mode)) { return NULL; } s = g_malloc0(sizeof(QEMUFileStdio)); s->stdio_file = fopen(filename, mode); if (!s->stdio_file) { goto fail; } if (mode[0] == 'w') { s->file = qemu_fopen_ops(s, &stdio_file_write_ops); } else { s->file = qemu_fopen_ops(s, &stdio_file_read_ops); } return s->file; fail: g_free(s); return NULL; } static ssize_t block_writev_buffer(void *opaque, struct iovec *iov, int iovcnt, int64_t pos) { int ret; QEMUIOVector qiov; qemu_iovec_init_external(&qiov, iov, iovcnt); ret = bdrv_writev_vmstate(opaque, &qiov, pos); if (ret < 0) { return ret; } return qiov.size; } static int block_put_buffer(void *opaque, const uint8_t *buf, int64_t pos, int size) { bdrv_save_vmstate(opaque, buf, pos, size); return size; } static int block_get_buffer(void *opaque, uint8_t *buf, int64_t pos, int size) { return bdrv_load_vmstate(opaque, buf, pos, size); } static int bdrv_fclose(void *opaque) { return bdrv_flush(opaque); } static const QEMUFileOps bdrv_read_ops = { .get_buffer = block_get_buffer, .close = bdrv_fclose }; static const QEMUFileOps bdrv_write_ops = { .put_buffer = block_put_buffer, .writev_buffer = block_writev_buffer, .close = bdrv_fclose }; static QEMUFile *qemu_fopen_bdrv(BlockDriverState *bs, int is_writable) { if (is_writable) { return qemu_fopen_ops(bs, &bdrv_write_ops); } return qemu_fopen_ops(bs, &bdrv_read_ops); } QEMUFile *qemu_fopen_ops(void *opaque, const QEMUFileOps *ops) { QEMUFile *f; f = g_malloc0(sizeof(QEMUFile)); f->opaque = opaque; f->ops = ops; return f; } /* * Get last error for stream f * * Return negative error value if there has been an error on previous * operations, return 0 if no error happened. * */ int qemu_file_get_error(QEMUFile *f) { return f->last_error; } void qemu_file_set_error(QEMUFile *f, int ret) { if (f->last_error == 0) { f->last_error = ret; } } static inline bool qemu_file_is_writable(QEMUFile *f) { return f->ops->writev_buffer || f->ops->put_buffer; } /** * Flushes QEMUFile buffer * * If there is writev_buffer QEMUFileOps it uses it otherwise uses * put_buffer ops. */ void qemu_fflush(QEMUFile *f) { ssize_t ret = 0; if (!qemu_file_is_writable(f)) { return; } if (f->ops->writev_buffer) { if (f->iovcnt > 0) { ret = f->ops->writev_buffer(f->opaque, f->iov, f->iovcnt, f->pos); } } else { if (f->buf_index > 0) { ret = f->ops->put_buffer(f->opaque, f->buf, f->pos, f->buf_index); } } if (ret >= 0) { f->pos += ret; } f->buf_index = 0; f->iovcnt = 0; if (ret < 0) { qemu_file_set_error(f, ret); } } void ram_control_before_iterate(QEMUFile *f, uint64_t flags) { int ret = 0; if (f->ops->before_ram_iterate) { ret = f->ops->before_ram_iterate(f, f->opaque, flags); if (ret < 0) { qemu_file_set_error(f, ret); } } } void ram_control_after_iterate(QEMUFile *f, uint64_t flags) { int ret = 0; if (f->ops->after_ram_iterate) { ret = f->ops->after_ram_iterate(f, f->opaque, flags); if (ret < 0) { qemu_file_set_error(f, ret); } } } void ram_control_load_hook(QEMUFile *f, uint64_t flags) { int ret = -EINVAL; if (f->ops->hook_ram_load) { ret = f->ops->hook_ram_load(f, f->opaque, flags); if (ret < 0) { qemu_file_set_error(f, ret); } } else { qemu_file_set_error(f, ret); } } size_t ram_control_save_page(QEMUFile *f, ram_addr_t block_offset, ram_addr_t offset, size_t size, int *bytes_sent) { if (f->ops->save_page) { int ret = f->ops->save_page(f, f->opaque, block_offset, offset, size, bytes_sent); if (ret != RAM_SAVE_CONTROL_DELAYED) { if (bytes_sent && *bytes_sent > 0) { qemu_update_position(f, *bytes_sent); } else if (ret < 0) { qemu_file_set_error(f, ret); } } return ret; } return RAM_SAVE_CONTROL_NOT_SUPP; } static void qemu_fill_buffer(QEMUFile *f) { int len; int pending; assert(!qemu_file_is_writable(f)); pending = f->buf_size - f->buf_index; if (pending > 0) { memmove(f->buf, f->buf + f->buf_index, pending); } f->buf_index = 0; f->buf_size = pending; len = f->ops->get_buffer(f->opaque, f->buf + pending, f->pos, IO_BUF_SIZE - pending); if (len > 0) { f->buf_size += len; f->pos += len; } else if (len == 0) { qemu_file_set_error(f, -EIO); } else if (len != -EAGAIN) { qemu_file_set_error(f, len); } } int qemu_get_fd(QEMUFile *f) { if (f->ops->get_fd) { return f->ops->get_fd(f->opaque); } return -1; } void qemu_update_position(QEMUFile *f, size_t size) { f->pos += size; } /** Closes the file * * Returns negative error value if any error happened on previous operations or * while closing the file. Returns 0 or positive number on success. * * The meaning of return value on success depends on the specific backend * being used. */ int qemu_fclose(QEMUFile *f) { int ret; qemu_fflush(f); ret = qemu_file_get_error(f); if (f->ops->close) { int ret2 = f->ops->close(f->opaque); if (ret >= 0) { ret = ret2; } } /* If any error was spotted before closing, we should report it * instead of the close() return value. */ if (f->last_error) { ret = f->last_error; } g_free(f); return ret; } static void add_to_iovec(QEMUFile *f, const uint8_t *buf, int size) { /* check for adjacent buffer and coalesce them */ if (f->iovcnt > 0 && buf == f->iov[f->iovcnt - 1].iov_base + f->iov[f->iovcnt - 1].iov_len) { f->iov[f->iovcnt - 1].iov_len += size; } else { f->iov[f->iovcnt].iov_base = (uint8_t *)buf; f->iov[f->iovcnt++].iov_len = size; } if (f->iovcnt >= MAX_IOV_SIZE) { qemu_fflush(f); } } void qemu_put_buffer_async(QEMUFile *f, const uint8_t *buf, int size) { if (!f->ops->writev_buffer) { qemu_put_buffer(f, buf, size); return; } if (f->last_error) { return; } f->bytes_xfer += size; add_to_iovec(f, buf, size); } void qemu_put_buffer(QEMUFile *f, const uint8_t *buf, int size) { int l; if (f->last_error) { return; } while (size > 0) { l = IO_BUF_SIZE - f->buf_index; if (l > size) { l = size; } memcpy(f->buf + f->buf_index, buf, l); f->bytes_xfer += l; if (f->ops->writev_buffer) { add_to_iovec(f, f->buf + f->buf_index, l); } f->buf_index += l; if (f->buf_index == IO_BUF_SIZE) { qemu_fflush(f); } if (qemu_file_get_error(f)) { break; } buf += l; size -= l; } } void qemu_put_byte(QEMUFile *f, int v) { if (f->last_error) { return; } f->buf[f->buf_index] = v; f->bytes_xfer++; if (f->ops->writev_buffer) { add_to_iovec(f, f->buf + f->buf_index, 1); } f->buf_index++; if (f->buf_index == IO_BUF_SIZE) { qemu_fflush(f); } } void qemu_file_skip(QEMUFile *f, int size) { if (f->buf_index + size <= f->buf_size) { f->buf_index += size; } } int qemu_peek_buffer(QEMUFile *f, uint8_t *buf, int size, size_t offset) { int pending; int index; assert(!qemu_file_is_writable(f)); index = f->buf_index + offset; pending = f->buf_size - index; if (pending < size) { qemu_fill_buffer(f); index = f->buf_index + offset; pending = f->buf_size - index; } if (pending <= 0) { return 0; } if (size > pending) { size = pending; } memcpy(buf, f->buf + index, size); return size; } int qemu_get_buffer(QEMUFile *f, uint8_t *buf, int size) { int pending = size; int done = 0; while (pending > 0) { int res; res = qemu_peek_buffer(f, buf, pending, 0); if (res == 0) { return done; } qemu_file_skip(f, res); buf += res; pending -= res; done += res; } return done; } int qemu_peek_byte(QEMUFile *f, int offset) { int index = f->buf_index + offset; assert(!qemu_file_is_writable(f)); if (index >= f->buf_size) { qemu_fill_buffer(f); index = f->buf_index + offset; if (index >= f->buf_size) { return 0; } } return f->buf[index]; } int qemu_get_byte(QEMUFile *f) { int result; result = qemu_peek_byte(f, 0); qemu_file_skip(f, 1); return result; } int64_t qemu_ftell(QEMUFile *f) { qemu_fflush(f); return f->pos; } int qemu_file_rate_limit(QEMUFile *f) { if (qemu_file_get_error(f)) { return 1; } if (f->xfer_limit > 0 && f->bytes_xfer > f->xfer_limit) { return 1; } return 0; } int64_t qemu_file_get_rate_limit(QEMUFile *f) { return f->xfer_limit; } void qemu_file_set_rate_limit(QEMUFile *f, int64_t limit) { f->xfer_limit = limit; } void qemu_file_reset_rate_limit(QEMUFile *f) { f->bytes_xfer = 0; } void qemu_put_be16(QEMUFile *f, unsigned int v) { qemu_put_byte(f, v >> 8); qemu_put_byte(f, v); } void qemu_put_be32(QEMUFile *f, unsigned int v) { qemu_put_byte(f, v >> 24); qemu_put_byte(f, v >> 16); qemu_put_byte(f, v >> 8); qemu_put_byte(f, v); } void qemu_put_be64(QEMUFile *f, uint64_t v) { qemu_put_be32(f, v >> 32); qemu_put_be32(f, v); } unsigned int qemu_get_be16(QEMUFile *f) { unsigned int v; v = qemu_get_byte(f) << 8; v |= qemu_get_byte(f); return v; } unsigned int qemu_get_be32(QEMUFile *f) { unsigned int v; v = qemu_get_byte(f) << 24; v |= qemu_get_byte(f) << 16; v |= qemu_get_byte(f) << 8; v |= qemu_get_byte(f); return v; } uint64_t qemu_get_be64(QEMUFile *f) { uint64_t v; v = (uint64_t)qemu_get_be32(f) << 32; v |= qemu_get_be32(f); return v; } /* timer */ void timer_put(QEMUFile *f, QEMUTimer *ts) { uint64_t expire_time; expire_time = timer_expire_time_ns(ts); qemu_put_be64(f, expire_time); } void timer_get(QEMUFile *f, QEMUTimer *ts) { uint64_t expire_time; expire_time = qemu_get_be64(f); if (expire_time != -1) { timer_mod_ns(ts, expire_time); } else { timer_del(ts); } } /* timers */ static int get_timer(QEMUFile *f, void *pv, size_t size) { QEMUTimer *v = pv; timer_get(f, v); return 0; } static void put_timer(QEMUFile *f, void *pv, size_t size) { QEMUTimer *v = pv; timer_put(f, v); } const VMStateInfo vmstate_info_timer = { .name = "timer", .get = get_timer, .put = put_timer, }; typedef struct CompatEntry { char idstr[256]; int instance_id; } CompatEntry; typedef struct SaveStateEntry { QTAILQ_ENTRY(SaveStateEntry) entry; char idstr[256]; int instance_id; int alias_id; int version_id; int section_id; SaveVMHandlers *ops; const VMStateDescription *vmsd; void *opaque; CompatEntry *compat; int no_migrate; int is_ram; } SaveStateEntry; static QTAILQ_HEAD(savevm_handlers, SaveStateEntry) savevm_handlers = QTAILQ_HEAD_INITIALIZER(savevm_handlers); static int global_section_id; static int calculate_new_instance_id(const char *idstr) { SaveStateEntry *se; int instance_id = 0; QTAILQ_FOREACH(se, &savevm_handlers, entry) { if (strcmp(idstr, se->idstr) == 0 && instance_id <= se->instance_id) { instance_id = se->instance_id + 1; } } return instance_id; } static int calculate_compat_instance_id(const char *idstr) { SaveStateEntry *se; int instance_id = 0; QTAILQ_FOREACH(se, &savevm_handlers, entry) { if (!se->compat) { continue; } if (strcmp(idstr, se->compat->idstr) == 0 && instance_id <= se->compat->instance_id) { instance_id = se->compat->instance_id + 1; } } return instance_id; } /* TODO: Individual devices generally have very little idea about the rest of the system, so instance_id should be removed/replaced. Meanwhile pass -1 as instance_id if you do not already have a clearly distinguishing id for all instances of your device class. */ int register_savevm_live(DeviceState *dev, const char *idstr, int instance_id, int version_id, SaveVMHandlers *ops, void *opaque) { SaveStateEntry *se; se = g_malloc0(sizeof(SaveStateEntry)); se->version_id = version_id; se->section_id = global_section_id++; se->ops = ops; se->opaque = opaque; se->vmsd = NULL; se->no_migrate = 0; /* if this is a live_savem then set is_ram */ if (ops->save_live_setup != NULL) { se->is_ram = 1; } if (dev) { char *id = qdev_get_dev_path(dev); if (id) { pstrcpy(se->idstr, sizeof(se->idstr), id); pstrcat(se->idstr, sizeof(se->idstr), "/"); g_free(id); se->compat = g_malloc0(sizeof(CompatEntry)); pstrcpy(se->compat->idstr, sizeof(se->compat->idstr), idstr); se->compat->instance_id = instance_id == -1 ? calculate_compat_instance_id(idstr) : instance_id; instance_id = -1; } } pstrcat(se->idstr, sizeof(se->idstr), idstr); if (instance_id == -1) { se->instance_id = calculate_new_instance_id(se->idstr); } else { se->instance_id = instance_id; } assert(!se->compat || se->instance_id == 0); /* add at the end of list */ QTAILQ_INSERT_TAIL(&savevm_handlers, se, entry); return 0; } int register_savevm(DeviceState *dev, const char *idstr, int instance_id, int version_id, SaveStateHandler *save_state, LoadStateHandler *load_state, void *opaque) { SaveVMHandlers *ops = g_malloc0(sizeof(SaveVMHandlers)); ops->save_state = save_state; ops->load_state = load_state; return register_savevm_live(dev, idstr, instance_id, version_id, ops, opaque); } void unregister_savevm(DeviceState *dev, const char *idstr, void *opaque) { SaveStateEntry *se, *new_se; char id[256] = ""; if (dev) { char *path = qdev_get_dev_path(dev); if (path) { pstrcpy(id, sizeof(id), path); pstrcat(id, sizeof(id), "/"); g_free(path); } } pstrcat(id, sizeof(id), idstr); QTAILQ_FOREACH_SAFE(se, &savevm_handlers, entry, new_se) { if (strcmp(se->idstr, id) == 0 && se->opaque == opaque) { QTAILQ_REMOVE(&savevm_handlers, se, entry); if (se->compat) { g_free(se->compat); } g_free(se->ops); g_free(se); } } } int vmstate_register_with_alias_id(DeviceState *dev, int instance_id, const VMStateDescription *vmsd, void *opaque, int alias_id, int required_for_version) { SaveStateEntry *se; /* If this triggers, alias support can be dropped for the vmsd. */ assert(alias_id == -1 || required_for_version >= vmsd->minimum_version_id); se = g_malloc0(sizeof(SaveStateEntry)); se->version_id = vmsd->version_id; se->section_id = global_section_id++; se->opaque = opaque; se->vmsd = vmsd; se->alias_id = alias_id; se->no_migrate = vmsd->unmigratable; if (dev) { char *id = qdev_get_dev_path(dev); if (id) { pstrcpy(se->idstr, sizeof(se->idstr), id); pstrcat(se->idstr, sizeof(se->idstr), "/"); g_free(id); se->compat = g_malloc0(sizeof(CompatEntry)); pstrcpy(se->compat->idstr, sizeof(se->compat->idstr), vmsd->name); se->compat->instance_id = instance_id == -1 ? calculate_compat_instance_id(vmsd->name) : instance_id; instance_id = -1; } } pstrcat(se->idstr, sizeof(se->idstr), vmsd->name); if (instance_id == -1) { se->instance_id = calculate_new_instance_id(se->idstr); } else { se->instance_id = instance_id; } assert(!se->compat || se->instance_id == 0); /* add at the end of list */ QTAILQ_INSERT_TAIL(&savevm_handlers, se, entry); return 0; } void vmstate_unregister(DeviceState *dev, const VMStateDescription *vmsd, void *opaque) { SaveStateEntry *se, *new_se; QTAILQ_FOREACH_SAFE(se, &savevm_handlers, entry, new_se) { if (se->vmsd == vmsd && se->opaque == opaque) { QTAILQ_REMOVE(&savevm_handlers, se, entry); if (se->compat) { g_free(se->compat); } g_free(se); } } } static int vmstate_load(QEMUFile *f, SaveStateEntry *se, int version_id) { if (!se->vmsd) { /* Old style */ return se->ops->load_state(f, se->opaque, version_id); } return vmstate_load_state(f, se->vmsd, se->opaque, version_id); } static void vmstate_save(QEMUFile *f, SaveStateEntry *se) { if (!se->vmsd) { /* Old style */ se->ops->save_state(f, se->opaque); return; } vmstate_save_state(f, se->vmsd, se->opaque); } bool qemu_savevm_state_blocked(Error **errp) { SaveStateEntry *se; QTAILQ_FOREACH(se, &savevm_handlers, entry) { if (se->no_migrate) { error_set(errp, QERR_MIGRATION_NOT_SUPPORTED, se->idstr); return true; } } return false; } void qemu_savevm_state_begin(QEMUFile *f, const MigrationParams *params) { SaveStateEntry *se; int ret; QTAILQ_FOREACH(se, &savevm_handlers, entry) { if (!se->ops || !se->ops->set_params) { continue; } se->ops->set_params(params, se->opaque); } qemu_put_be32(f, QEMU_VM_FILE_MAGIC); qemu_put_be32(f, QEMU_VM_FILE_VERSION); QTAILQ_FOREACH(se, &savevm_handlers, entry) { int len; if (!se->ops || !se->ops->save_live_setup) { continue; } if (se->ops && se->ops->is_active) { if (!se->ops->is_active(se->opaque)) { continue; } } /* Section type */ qemu_put_byte(f, QEMU_VM_SECTION_START); qemu_put_be32(f, se->section_id); /* ID string */ len = strlen(se->idstr); qemu_put_byte(f, len); qemu_put_buffer(f, (uint8_t *)se->idstr, len); qemu_put_be32(f, se->instance_id); qemu_put_be32(f, se->version_id); ret = se->ops->save_live_setup(f, se->opaque); if (ret < 0) { qemu_file_set_error(f, ret); break; } } } /* * this function has three return values: * negative: there was one error, and we have -errno. * 0 : We haven't finished, caller have to go again * 1 : We have finished, we can go to complete phase */ int qemu_savevm_state_iterate(QEMUFile *f) { SaveStateEntry *se; int ret = 1; QTAILQ_FOREACH(se, &savevm_handlers, entry) { if (!se->ops || !se->ops->save_live_iterate) { continue; } if (se->ops && se->ops->is_active) { if (!se->ops->is_active(se->opaque)) { continue; } } if (qemu_file_rate_limit(f)) { return 0; } trace_savevm_section_start(); /* Section type */ qemu_put_byte(f, QEMU_VM_SECTION_PART); qemu_put_be32(f, se->section_id); ret = se->ops->save_live_iterate(f, se->opaque); trace_savevm_section_end(se->section_id); if (ret < 0) { qemu_file_set_error(f, ret); } if (ret <= 0) { /* Do not proceed to the next vmstate before this one reported completion of the current stage. This serializes the migration and reduces the probability that a faster changing state is synchronized over and over again. */ break; } } return ret; } void qemu_savevm_state_complete(QEMUFile *f) { SaveStateEntry *se; int ret; cpu_synchronize_all_states(); QTAILQ_FOREACH(se, &savevm_handlers, entry) { if (!se->ops || !se->ops->save_live_complete) { continue; } if (se->ops && se->ops->is_active) { if (!se->ops->is_active(se->opaque)) { continue; } } trace_savevm_section_start(); /* Section type */ qemu_put_byte(f, QEMU_VM_SECTION_END); qemu_put_be32(f, se->section_id); ret = se->ops->save_live_complete(f, se->opaque); trace_savevm_section_end(se->section_id); if (ret < 0) { qemu_file_set_error(f, ret); return; } } QTAILQ_FOREACH(se, &savevm_handlers, entry) { int len; if ((!se->ops || !se->ops->save_state) && !se->vmsd) { continue; } trace_savevm_section_start(); /* Section type */ qemu_put_byte(f, QEMU_VM_SECTION_FULL); qemu_put_be32(f, se->section_id); /* ID string */ len = strlen(se->idstr); qemu_put_byte(f, len); qemu_put_buffer(f, (uint8_t *)se->idstr, len); qemu_put_be32(f, se->instance_id); qemu_put_be32(f, se->version_id); vmstate_save(f, se); trace_savevm_section_end(se->section_id); } qemu_put_byte(f, QEMU_VM_EOF); qemu_fflush(f); } uint64_t qemu_savevm_state_pending(QEMUFile *f, uint64_t max_size) { SaveStateEntry *se; uint64_t ret = 0; QTAILQ_FOREACH(se, &savevm_handlers, entry) { if (!se->ops || !se->ops->save_live_pending) { continue; } if (se->ops && se->ops->is_active) { if (!se->ops->is_active(se->opaque)) { continue; } } ret += se->ops->save_live_pending(f, se->opaque, max_size); } return ret; } void qemu_savevm_state_cancel(void) { SaveStateEntry *se; QTAILQ_FOREACH(se, &savevm_handlers, entry) { if (se->ops && se->ops->cancel) { se->ops->cancel(se->opaque); } } } static int qemu_savevm_state(QEMUFile *f) { int ret; MigrationParams params = { .blk = 0, .shared = 0 }; if (qemu_savevm_state_blocked(NULL)) { return -EINVAL; } qemu_mutex_unlock_iothread(); qemu_savevm_state_begin(f, ¶ms); qemu_mutex_lock_iothread(); while (qemu_file_get_error(f) == 0) { if (qemu_savevm_state_iterate(f) > 0) { break; } } ret = qemu_file_get_error(f); if (ret == 0) { qemu_savevm_state_complete(f); ret = qemu_file_get_error(f); } if (ret != 0) { qemu_savevm_state_cancel(); } return ret; } static int qemu_save_device_state(QEMUFile *f) { SaveStateEntry *se; qemu_put_be32(f, QEMU_VM_FILE_MAGIC); qemu_put_be32(f, QEMU_VM_FILE_VERSION); cpu_synchronize_all_states(); QTAILQ_FOREACH(se, &savevm_handlers, entry) { int len; if (se->is_ram) { continue; } if ((!se->ops || !se->ops->save_state) && !se->vmsd) { continue; } /* Section type */ qemu_put_byte(f, QEMU_VM_SECTION_FULL); qemu_put_be32(f, se->section_id); /* ID string */ len = strlen(se->idstr); qemu_put_byte(f, len); qemu_put_buffer(f, (uint8_t *)se->idstr, len); qemu_put_be32(f, se->instance_id); qemu_put_be32(f, se->version_id); vmstate_save(f, se); } qemu_put_byte(f, QEMU_VM_EOF); return qemu_file_get_error(f); } static SaveStateEntry *find_se(const char *idstr, int instance_id) { SaveStateEntry *se; QTAILQ_FOREACH(se, &savevm_handlers, entry) { if (!strcmp(se->idstr, idstr) && (instance_id == se->instance_id || instance_id == se->alias_id)) return se; /* Migrating from an older version? */ if (strstr(se->idstr, idstr) && se->compat) { if (!strcmp(se->compat->idstr, idstr) && (instance_id == se->compat->instance_id || instance_id == se->alias_id)) return se; } } return NULL; } typedef struct LoadStateEntry { QLIST_ENTRY(LoadStateEntry) entry; SaveStateEntry *se; int section_id; int version_id; } LoadStateEntry; int qemu_loadvm_state(QEMUFile *f) { QLIST_HEAD(, LoadStateEntry) loadvm_handlers = QLIST_HEAD_INITIALIZER(loadvm_handlers); LoadStateEntry *le, *new_le; uint8_t section_type; unsigned int v; int ret; if (qemu_savevm_state_blocked(NULL)) { return -EINVAL; } v = qemu_get_be32(f); if (v != QEMU_VM_FILE_MAGIC) { return -EINVAL; } v = qemu_get_be32(f); if (v == QEMU_VM_FILE_VERSION_COMPAT) { fprintf(stderr, "SaveVM v2 format is obsolete and don't work anymore\n"); return -ENOTSUP; } if (v != QEMU_VM_FILE_VERSION) { return -ENOTSUP; } while ((section_type = qemu_get_byte(f)) != QEMU_VM_EOF) { uint32_t instance_id, version_id, section_id; SaveStateEntry *se; char idstr[257]; int len; switch (section_type) { case QEMU_VM_SECTION_START: case QEMU_VM_SECTION_FULL: /* Read section start */ section_id = qemu_get_be32(f); len = qemu_get_byte(f); qemu_get_buffer(f, (uint8_t *)idstr, len); idstr[len] = 0; instance_id = qemu_get_be32(f); version_id = qemu_get_be32(f); /* Find savevm section */ se = find_se(idstr, instance_id); if (se == NULL) { fprintf(stderr, "Unknown savevm section or instance '%s' %d\n", idstr, instance_id); ret = -EINVAL; goto out; } /* Validate version */ if (version_id > se->version_id) { fprintf(stderr, "savevm: unsupported version %d for '%s' v%d\n", version_id, idstr, se->version_id); ret = -EINVAL; goto out; } /* Add entry */ le = g_malloc0(sizeof(*le)); le->se = se; le->section_id = section_id; le->version_id = version_id; QLIST_INSERT_HEAD(&loadvm_handlers, le, entry); ret = vmstate_load(f, le->se, le->version_id); if (ret < 0) { fprintf(stderr, "qemu: warning: error while loading state for instance 0x%x of device '%s'\n", instance_id, idstr); goto out; } break; case QEMU_VM_SECTION_PART: case QEMU_VM_SECTION_END: section_id = qemu_get_be32(f); QLIST_FOREACH(le, &loadvm_handlers, entry) { if (le->section_id == section_id) { break; } } if (le == NULL) { fprintf(stderr, "Unknown savevm section %d\n", section_id); ret = -EINVAL; goto out; } ret = vmstate_load(f, le->se, le->version_id); if (ret < 0) { fprintf(stderr, "qemu: warning: error while loading state section id %d\n", section_id); goto out; } break; default: fprintf(stderr, "Unknown savevm section type %d\n", section_type); ret = -EINVAL; goto out; } } cpu_synchronize_all_post_init(); ret = 0; out: QLIST_FOREACH_SAFE(le, &loadvm_handlers, entry, new_le) { QLIST_REMOVE(le, entry); g_free(le); } if (ret == 0) { ret = qemu_file_get_error(f); } return ret; } static BlockDriverState *find_vmstate_bs(void) { BlockDriverState *bs = NULL; while ((bs = bdrv_next(bs))) { if (bdrv_can_snapshot(bs)) { return bs; } } return NULL; } /* * Deletes snapshots of a given name in all opened images. */ static int del_existing_snapshots(Monitor *mon, const char *name) { BlockDriverState *bs; QEMUSnapshotInfo sn1, *snapshot = &sn1; Error *err = NULL; bs = NULL; while ((bs = bdrv_next(bs))) { if (bdrv_can_snapshot(bs) && bdrv_snapshot_find(bs, snapshot, name) >= 0) { bdrv_snapshot_delete_by_id_or_name(bs, name, &err); if (error_is_set(&err)) { monitor_printf(mon, "Error while deleting snapshot on device '%s':" " %s\n", bdrv_get_device_name(bs), error_get_pretty(err)); error_free(err); return -1; } } } return 0; } void do_savevm(Monitor *mon, const QDict *qdict) { BlockDriverState *bs, *bs1; QEMUSnapshotInfo sn1, *sn = &sn1, old_sn1, *old_sn = &old_sn1; int ret; QEMUFile *f; int saved_vm_running; uint64_t vm_state_size; qemu_timeval tv; struct tm tm; const char *name = qdict_get_try_str(qdict, "name"); /* Verify if there is a device that doesn't support snapshots and is writable */ bs = NULL; while ((bs = bdrv_next(bs))) { if (!bdrv_is_inserted(bs) || bdrv_is_read_only(bs)) { continue; } if (!bdrv_can_snapshot(bs)) { monitor_printf(mon, "Device '%s' is writable but does not support snapshots.\n", bdrv_get_device_name(bs)); return; } } bs = find_vmstate_bs(); if (!bs) { monitor_printf(mon, "No block device can accept snapshots\n"); return; } saved_vm_running = runstate_is_running(); vm_stop(RUN_STATE_SAVE_VM); memset(sn, 0, sizeof(*sn)); /* fill auxiliary fields */ qemu_gettimeofday(&tv); sn->date_sec = tv.tv_sec; sn->date_nsec = tv.tv_usec * 1000; sn->vm_clock_nsec = qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL); if (name) { ret = bdrv_snapshot_find(bs, old_sn, name); if (ret >= 0) { pstrcpy(sn->name, sizeof(sn->name), old_sn->name); pstrcpy(sn->id_str, sizeof(sn->id_str), old_sn->id_str); } else { pstrcpy(sn->name, sizeof(sn->name), name); } } else { /* cast below needed for OpenBSD where tv_sec is still 'long' */ localtime_r((const time_t *)&tv.tv_sec, &tm); strftime(sn->name, sizeof(sn->name), "vm-%Y%m%d%H%M%S", &tm); } /* Delete old snapshots of the same name */ if (name && del_existing_snapshots(mon, name) < 0) { goto the_end; } /* save the VM state */ f = qemu_fopen_bdrv(bs, 1); if (!f) { monitor_printf(mon, "Could not open VM state file\n"); goto the_end; } ret = qemu_savevm_state(f); vm_state_size = qemu_ftell(f); qemu_fclose(f); if (ret < 0) { monitor_printf(mon, "Error %d while writing VM\n", ret); goto the_end; } /* create the snapshots */ bs1 = NULL; while ((bs1 = bdrv_next(bs1))) { if (bdrv_can_snapshot(bs1)) { /* Write VM state size only to the image that contains the state */ sn->vm_state_size = (bs == bs1 ? vm_state_size : 0); ret = bdrv_snapshot_create(bs1, sn); if (ret < 0) { monitor_printf(mon, "Error while creating snapshot on '%s'\n", bdrv_get_device_name(bs1)); } } } the_end: if (saved_vm_running) { vm_start(); } } void qmp_xen_save_devices_state(const char *filename, Error **errp) { QEMUFile *f; int saved_vm_running; int ret; saved_vm_running = runstate_is_running(); vm_stop(RUN_STATE_SAVE_VM); f = qemu_fopen(filename, "wb"); if (!f) { error_setg_file_open(errp, errno, filename); goto the_end; } ret = qemu_save_device_state(f); qemu_fclose(f); if (ret < 0) { error_set(errp, QERR_IO_ERROR); } the_end: if (saved_vm_running) { vm_start(); } } int load_vmstate(const char *name) { BlockDriverState *bs, *bs_vm_state; QEMUSnapshotInfo sn; QEMUFile *f; int ret; bs_vm_state = find_vmstate_bs(); if (!bs_vm_state) { error_report("No block device supports snapshots"); return -ENOTSUP; } /* Don't even try to load empty VM states */ ret = bdrv_snapshot_find(bs_vm_state, &sn, name); if (ret < 0) { return ret; } else if (sn.vm_state_size == 0) { error_report("This is a disk-only snapshot. Revert to it offline " "using qemu-img."); return -EINVAL; } /* Verify if there is any device that doesn't support snapshots and is writable and check if the requested snapshot is available too. */ bs = NULL; while ((bs = bdrv_next(bs))) { if (!bdrv_is_inserted(bs) || bdrv_is_read_only(bs)) { continue; } if (!bdrv_can_snapshot(bs)) { error_report("Device '%s' is writable but does not support snapshots.", bdrv_get_device_name(bs)); return -ENOTSUP; } ret = bdrv_snapshot_find(bs, &sn, name); if (ret < 0) { error_report("Device '%s' does not have the requested snapshot '%s'", bdrv_get_device_name(bs), name); return ret; } } /* Flush all IO requests so they don't interfere with the new state. */ bdrv_drain_all(); bs = NULL; while ((bs = bdrv_next(bs))) { if (bdrv_can_snapshot(bs)) { ret = bdrv_snapshot_goto(bs, name); if (ret < 0) { error_report("Error %d while activating snapshot '%s' on '%s'", ret, name, bdrv_get_device_name(bs)); return ret; } } } /* restore the VM state */ f = qemu_fopen_bdrv(bs_vm_state, 0); if (!f) { error_report("Could not open VM state file"); return -EINVAL; } qemu_system_reset(VMRESET_SILENT); ret = qemu_loadvm_state(f); qemu_fclose(f); if (ret < 0) { error_report("Error %d while loading VM state", ret); return ret; } return 0; } void do_delvm(Monitor *mon, const QDict *qdict) { BlockDriverState *bs, *bs1; Error *err = NULL; const char *name = qdict_get_str(qdict, "name"); bs = find_vmstate_bs(); if (!bs) { monitor_printf(mon, "No block device supports snapshots\n"); return; } bs1 = NULL; while ((bs1 = bdrv_next(bs1))) { if (bdrv_can_snapshot(bs1)) { bdrv_snapshot_delete_by_id_or_name(bs, name, &err); if (error_is_set(&err)) { monitor_printf(mon, "Error while deleting snapshot on device '%s':" " %s\n", bdrv_get_device_name(bs), error_get_pretty(err)); error_free(err); } } } } void do_info_snapshots(Monitor *mon, const QDict *qdict) { BlockDriverState *bs, *bs1; QEMUSnapshotInfo *sn_tab, *sn, s, *sn_info = &s; int nb_sns, i, ret, available; int total; int *available_snapshots; bs = find_vmstate_bs(); if (!bs) { monitor_printf(mon, "No available block device supports snapshots\n"); return; } nb_sns = bdrv_snapshot_list(bs, &sn_tab); if (nb_sns < 0) { monitor_printf(mon, "bdrv_snapshot_list: error %d\n", nb_sns); return; } if (nb_sns == 0) { monitor_printf(mon, "There is no snapshot available.\n"); return; } available_snapshots = g_malloc0(sizeof(int) * nb_sns); total = 0; for (i = 0; i < nb_sns; i++) { sn = &sn_tab[i]; available = 1; bs1 = NULL; while ((bs1 = bdrv_next(bs1))) { if (bdrv_can_snapshot(bs1) && bs1 != bs) { ret = bdrv_snapshot_find(bs1, sn_info, sn->id_str); if (ret < 0) { available = 0; break; } } } if (available) { available_snapshots[total] = i; total++; } } if (total > 0) { bdrv_snapshot_dump((fprintf_function)monitor_printf, mon, NULL); monitor_printf(mon, "\n"); for (i = 0; i < total; i++) { sn = &sn_tab[available_snapshots[i]]; bdrv_snapshot_dump((fprintf_function)monitor_printf, mon, sn); monitor_printf(mon, "\n"); } } else { monitor_printf(mon, "There is no suitable snapshot available\n"); } g_free(sn_tab); g_free(available_snapshots); } void vmstate_register_ram(MemoryRegion *mr, DeviceState *dev) { qemu_ram_set_idstr(memory_region_get_ram_addr(mr) & TARGET_PAGE_MASK, memory_region_name(mr), dev); } void vmstate_unregister_ram(MemoryRegion *mr, DeviceState *dev) { /* Nothing do to while the implementation is in RAMBlock */ } void vmstate_register_ram_global(MemoryRegion *mr) { vmstate_register_ram(mr, NULL); }