diff options
63 files changed, 1670 insertions, 364 deletions
diff --git a/Makefile.target b/Makefile.target index 8a9c427b55..c8281e90a9 100644 --- a/Makefile.target +++ b/Makefile.target @@ -190,6 +190,9 @@ obj-$(CONFIG_USB_OHCI) += usb-ohci.o obj-y += rtl8139.o obj-y += e1000.o +# Inter-VM PCI shared memory +obj-$(CONFIG_KVM) += ivshmem.o + # Hardware support obj-i386-y += vga.o obj-i386-y += mc146818rtc.o i8259.o pc.o diff --git a/QMP/README b/QMP/README index 35a80c73e4..948d4453d5 100644 --- a/QMP/README +++ b/QMP/README @@ -7,60 +7,85 @@ Introduction The QEMU Monitor Protocol (QMP) allows applications to communicate with QEMU's Monitor. -QMP is JSON[1] based and has the following features: +QMP is JSON[1] based and currently has the following features: - Lightweight, text-based, easy to parse data format -- Asynchronous events support -- Stability +- Asynchronous messages support (ie. events) +- Capabilities Negotiation -For more information, please, refer to the following files: +For detailed information on QMP's usage, please, refer to the following files: o qmp-spec.txt QEMU Monitor Protocol current specification -o qmp-commands.txt QMP supported commands +o qmp-commands.txt QMP supported commands (auto-generated at build-time) o qmp-events.txt List of available asynchronous events There are also two simple Python scripts available: -o qmp-shell A shell -o vm-info Show some information about the Virtual Machine +o qmp-shell A shell +o vm-info Show some information about the Virtual Machine + +IMPORTANT: It's strongly recommended to read the 'Stability Considerations' +section in the qmp-commands.txt file before making any serious use of QMP. + [1] http://www.json.org Usage ----- -To enable QMP, QEMU has to be started in "control mode". There are -two ways of doing this, the simplest one is using the the '-qmp' -command-line option. +To enable QMP, you need a QEMU monitor instance in "control mode". There are +two ways of doing this. + +The simplest one is using the '-qmp' command-line option. The following +example makes QMP available on localhost port 4444: -For example: + $ qemu [...] -qmp tcp:localhost:4444,server -$ qemu [...] -qmp tcp:localhost:4444,server +However, in order to have more complex combinations, like multiple monitors, +the '-mon' command-line option should be used along with the '-chardev' one. +For instance, the following example creates one user monitor on stdio and one +QMP monitor on localhost port 4444. -Will start QEMU in control mode, waiting for a client TCP connection -on localhost port 4444. + $ qemu [...] -chardev stdio,id=mon0 -mon chardev=mon0,mode=readline \ + -chardev socket,id=mon1,host=localhost,port=4444,server \ + -mon chardev=mon1,mode=control -It is also possible to use the '-mon' command-line option to have -more complex combinations. Please, refer to the QEMU's manpage for -more information. +Please, refer to QEMU's manpage for more information. Simple Testing -------------- -To manually test QMP one can connect with telnet and issue commands: +To manually test QMP one can connect with telnet and issue commands by hand: $ telnet localhost 4444 Trying 127.0.0.1... Connected to localhost. Escape character is '^]'. -{"QMP": {"version": {"qemu": "0.12.50", "package": ""}, "capabilities": []}} +{"QMP": {"version": {"qemu": {"micro": 50, "minor": 13, "major": 0}, "package": ""}, "capabilities": []}} { "execute": "qmp_capabilities" } {"return": {}} { "execute": "query-version" } -{"return": {"qemu": "0.12.50", "package": ""}} +{"return": {"qemu": {"micro": 50, "minor": 13, "major": 0}, "package": ""}} + +Development Process +------------------- + +When changing QMP's interface (by adding new commands, events or modifying +existing ones) it's mandatory to update the relevant documentation, which is +one (or more) of the files listed in the 'Introduction' section*. + +Also, it's strongly recommended to send the documentation patch first, before +doing any code change. This is so because: + + 1. Avoids the code dictating the interface + + 2. Review can improve your interface. Letting that happen before + you implement it can save you work. + +* The qmp-commands.txt file is generated from the qemu-monitor.hx one, which + is the file that should be edited. -Contact -------- +Homepage +-------- http://www.linux-kvm.org/page/MonitorProtocol -Luiz Fernando N. Capitulino <lcapitulino@redhat.com> diff --git a/arch_init.c b/arch_init.c index 47bb4b2d8f..e468c0c7e2 100644 --- a/arch_init.c +++ b/arch_init.c @@ -82,12 +82,12 @@ const uint32_t arch_type = QEMU_ARCH; /***********************************************************/ /* ram save/restore */ -#define RAM_SAVE_FLAG_FULL 0x01 /* Obsolete, not used anymore */ -#define RAM_SAVE_FLAG_COMPRESS 0x02 -#define RAM_SAVE_FLAG_MEM_SIZE 0x04 -#define RAM_SAVE_FLAG_PAGE 0x08 -#define RAM_SAVE_FLAG_EOS 0x10 -#define RAM_SAVE_FLAG_CONTINUE 0x20 +#define RAM_SAVE_FLAG_FULL 0x01 /* Obsolete, not used anymore */ +#define RAM_SAVE_FLAG_COMPRESS 0x02 +#define RAM_SAVE_FLAG_MEM_SIZE 0x04 +#define RAM_SAVE_FLAG_PAGE 0x08 +#define RAM_SAVE_FLAG_EOS 0x10 +#define RAM_SAVE_FLAG_CONTINUE 0x20 static int is_dup_page(uint8_t *page, uint8_t ch) { @@ -104,10 +104,11 @@ static int is_dup_page(uint8_t *page, uint8_t ch) return 1; } +static RAMBlock *last_block; +static ram_addr_t last_offset; + static int ram_save_block(QEMUFile *f) { - static RAMBlock *last_block = NULL; - static ram_addr_t last_offset = 0; RAMBlock *block = last_block; ram_addr_t offset = last_offset; ram_addr_t current_addr; @@ -231,6 +232,8 @@ int ram_save_live(Monitor *mon, QEMUFile *f, int stage, void *opaque) if (stage == 1) { RAMBlock *block; bytes_transferred = 0; + last_block = NULL; + last_offset = 0; /* Make sure all dirty bits are set */ QLIST_FOREACH(block, &ram_list.blocks, next) { diff --git a/audio/audio_pt_int.c b/audio/audio_pt_int.c index e889a983b1..f15cc7015b 100644 --- a/audio/audio_pt_int.c +++ b/audio/audio_pt_int.c @@ -6,6 +6,8 @@ #include "audio_int.h" #include "audio_pt_int.h" +#include <signal.h> + static void logerr (struct audio_pt *pt, int err, const char *fmt, ...) { va_list ap; @@ -23,9 +25,16 @@ int audio_pt_init (struct audio_pt *p, void *(*func) (void *), { int err, err2; const char *efunc; + sigset_t set, old_set; p->drv = drv; + err = sigfillset (&set); + if (err) { + logerr (p, errno, "%s(%s): sigfillset failed", cap, AUDIO_FUNC); + return -1; + } + err = pthread_mutex_init (&p->mutex, NULL); if (err) { efunc = "pthread_mutex_init"; @@ -38,7 +47,23 @@ int audio_pt_init (struct audio_pt *p, void *(*func) (void *), goto err1; } + err = pthread_sigmask (SIG_BLOCK, &set, &old_set); + if (err) { + efunc = "pthread_sigmask"; + goto err2; + } + err = pthread_create (&p->thread, NULL, func, opaque); + + err2 = pthread_sigmask (SIG_SETMASK, &old_set, NULL); + if (err2) { + logerr (p, err2, "%s(%s): pthread_sigmask (restore) failed", + cap, AUDIO_FUNC); + /* We have failed to restore original signal mask, all bets are off, + so terminate the process */ + exit (EXIT_FAILURE); + } + if (err) { efunc = "pthread_create"; goto err2; diff --git a/audio/esdaudio.c b/audio/esdaudio.c index 79142d1706..9a1f2f8617 100644 --- a/audio/esdaudio.c +++ b/audio/esdaudio.c @@ -24,7 +24,6 @@ #include <esd.h> #include "qemu-common.h" #include "audio.h" -#include <signal.h> #define AUDIO_CAP "esd" #include "audio_int.h" @@ -190,10 +189,6 @@ static int qesd_init_out (HWVoiceOut *hw, struct audsettings *as) ESDVoiceOut *esd = (ESDVoiceOut *) hw; struct audsettings obt_as = *as; int esdfmt = ESD_STREAM | ESD_PLAY; - int err; - sigset_t set, old_set; - - sigfillset (&set); esdfmt |= (as->nchannels == 2) ? ESD_STEREO : ESD_MONO; switch (as->fmt) { @@ -231,43 +226,25 @@ static int qesd_init_out (HWVoiceOut *hw, struct audsettings *as) return -1; } - esd->fd = -1; - err = pthread_sigmask (SIG_BLOCK, &set, &old_set); - if (err) { - qesd_logerr (err, "pthread_sigmask failed\n"); - goto fail1; - } - esd->fd = esd_play_stream (esdfmt, as->freq, conf.dac_host, NULL); if (esd->fd < 0) { qesd_logerr (errno, "esd_play_stream failed\n"); - goto fail2; + goto fail1; } if (audio_pt_init (&esd->pt, qesd_thread_out, esd, AUDIO_CAP, AUDIO_FUNC)) { - goto fail3; - } - - err = pthread_sigmask (SIG_SETMASK, &old_set, NULL); - if (err) { - qesd_logerr (err, "pthread_sigmask(restore) failed\n"); + goto fail2; } return 0; - fail3: + fail2: if (close (esd->fd)) { qesd_logerr (errno, "%s: close on esd socket(%d) failed\n", AUDIO_FUNC, esd->fd); } esd->fd = -1; - fail2: - err = pthread_sigmask (SIG_SETMASK, &old_set, NULL); - if (err) { - qesd_logerr (err, "pthread_sigmask(restore) failed\n"); - } - fail1: qemu_free (esd->pcm_buf); esd->pcm_buf = NULL; @@ -423,10 +400,6 @@ static int qesd_init_in (HWVoiceIn *hw, struct audsettings *as) ESDVoiceIn *esd = (ESDVoiceIn *) hw; struct audsettings obt_as = *as; int esdfmt = ESD_STREAM | ESD_RECORD; - int err; - sigset_t set, old_set; - - sigfillset (&set); esdfmt |= (as->nchannels == 2) ? ESD_STEREO : ESD_MONO; switch (as->fmt) { @@ -461,44 +434,25 @@ static int qesd_init_in (HWVoiceIn *hw, struct audsettings *as) return -1; } - esd->fd = -1; - - err = pthread_sigmask (SIG_BLOCK, &set, &old_set); - if (err) { - qesd_logerr (err, "pthread_sigmask failed\n"); - goto fail1; - } - esd->fd = esd_record_stream (esdfmt, as->freq, conf.adc_host, NULL); if (esd->fd < 0) { qesd_logerr (errno, "esd_record_stream failed\n"); - goto fail2; + goto fail1; } if (audio_pt_init (&esd->pt, qesd_thread_in, esd, AUDIO_CAP, AUDIO_FUNC)) { - goto fail3; - } - - err = pthread_sigmask (SIG_SETMASK, &old_set, NULL); - if (err) { - qesd_logerr (err, "pthread_sigmask(restore) failed\n"); + goto fail2; } return 0; - fail3: + fail2: if (close (esd->fd)) { qesd_logerr (errno, "%s: close on esd socket(%d) failed\n", AUDIO_FUNC, esd->fd); } esd->fd = -1; - fail2: - err = pthread_sigmask (SIG_SETMASK, &old_set, NULL); - if (err) { - qesd_logerr (err, "pthread_sigmask(restore) failed\n"); - } - fail1: qemu_free (esd->pcm_buf); esd->pcm_buf = NULL; diff --git a/audio/sdlaudio.c b/audio/sdlaudio.c index c353016c1f..b74dcfa734 100644 --- a/audio/sdlaudio.c +++ b/audio/sdlaudio.c @@ -184,11 +184,20 @@ static int sdl_open (SDL_AudioSpec *req, SDL_AudioSpec *obt) { int status; #ifndef _WIN32 + int err; sigset_t new, old; /* Make sure potential threads created by SDL don't hog signals. */ - sigfillset (&new); - pthread_sigmask (SIG_BLOCK, &new, &old); + err = sigfillset (&new); + if (err) { + dolog ("sdl_open: sigfillset failed: %s\n", strerror (errno)); + return -1; + } + err = pthread_sigmask (SIG_BLOCK, &new, &old); + if (err) { + dolog ("sdl_open: pthread_sigmask failed: %s\n", strerror (err)); + return -1; + } #endif status = SDL_OpenAudio (req, obt); @@ -197,7 +206,14 @@ static int sdl_open (SDL_AudioSpec *req, SDL_AudioSpec *obt) } #ifndef _WIN32 - pthread_sigmask (SIG_SETMASK, &old, NULL); + err = pthread_sigmask (SIG_SETMASK, &old, NULL); + if (err) { + dolog ("sdl_open: pthread_sigmask (restore) failed: %s\n", + strerror (errno)); + /* We have failed to restore original signal mask, all bets are off, + so exit the process */ + exit (EXIT_FAILURE); + } #endif return status; } diff --git a/block-migration.c b/block-migration.c index 8eda307d7f..0bfdb73c8b 100644 --- a/block-migration.c +++ b/block-migration.c @@ -346,7 +346,7 @@ static int mig_save_device_dirty(Monitor *mon, QEMUFile *f, blk->iov.iov_len = nr_sectors * BDRV_SECTOR_SIZE; qemu_iovec_init_external(&blk->qiov, &blk->iov, 1); - blk->time = qemu_get_clock_ns(rt_clock); + blk->time = qemu_get_clock_ns(rt_clock); blk->aiocb = bdrv_aio_readv(bmds->bs, sector, &blk->qiov, nr_sectors, blk_mig_read_cb, blk); @@ -449,13 +449,13 @@ static int is_stage2_completed(void) if (block_mig_state.bulk_completed == 1) { remaining_dirty = get_remaining_dirty(); - if (remaining_dirty == 0) { - return 1; - } + if (remaining_dirty == 0) { + return 1; + } - bwidth = compute_read_bwidth(); + bwidth = compute_read_bwidth(); - if ((remaining_dirty / bwidth) <= + if ((remaining_dirty / bwidth) <= migrate_max_downtime()) { /* finish stage2 because we think that we can finish remaing work below max_downtime */ @@ -739,14 +739,16 @@ int bdrv_check(BlockDriverState *bs, BdrvCheckResult *res) return bs->drv->bdrv_check(bs, res); } +#define COMMIT_BUF_SECTORS 2048 + /* commit COW file into the raw image */ int bdrv_commit(BlockDriverState *bs) { BlockDriver *drv = bs->drv; - int64_t i, total_sectors; - int n, j, ro, open_flags; + int64_t sector, total_sectors; + int n, ro, open_flags; int ret = 0, rw_ret = 0; - unsigned char sector[BDRV_SECTOR_SIZE]; + uint8_t *buf; char filename[1024]; BlockDriverState *bs_rw, *bs_ro; @@ -789,22 +791,20 @@ int bdrv_commit(BlockDriverState *bs) } total_sectors = bdrv_getlength(bs) >> BDRV_SECTOR_BITS; - for (i = 0; i < total_sectors;) { - if (drv->bdrv_is_allocated(bs, i, 65536, &n)) { - for(j = 0; j < n; j++) { - if (bdrv_read(bs, i, sector, 1) != 0) { - ret = -EIO; - goto ro_cleanup; - } + buf = qemu_malloc(COMMIT_BUF_SECTORS * BDRV_SECTOR_SIZE); - if (bdrv_write(bs->backing_hd, i, sector, 1) != 0) { - ret = -EIO; - goto ro_cleanup; - } - i++; - } - } else { - i += n; + for (sector = 0; sector < total_sectors; sector += n) { + if (drv->bdrv_is_allocated(bs, sector, COMMIT_BUF_SECTORS, &n)) { + + if (bdrv_read(bs, sector, buf, n) != 0) { + ret = -EIO; + goto ro_cleanup; + } + + if (bdrv_write(bs->backing_hd, sector, buf, n) != 0) { + ret = -EIO; + goto ro_cleanup; + } } } @@ -821,6 +821,7 @@ int bdrv_commit(BlockDriverState *bs) bdrv_flush(bs->backing_hd); ro_cleanup: + qemu_free(buf); if (ro) { /* re-open as RO */ @@ -1476,10 +1477,8 @@ int bdrv_has_zero_init(BlockDriverState *bs) { assert(bs->drv); - if (bs->drv->no_zero_init) { - return 0; - } else if (bs->file) { - return bdrv_has_zero_init(bs->file); + if (bs->drv->bdrv_has_zero_init) { + return bs->drv->bdrv_has_zero_init(bs); } return 1; @@ -2518,7 +2517,7 @@ int bdrv_is_inserted(BlockDriverState *bs) if (!drv) return 0; if (!drv->bdrv_is_inserted) - return 1; + return !bs->tray_open; ret = drv->bdrv_is_inserted(bs); return ret; } @@ -2560,10 +2559,11 @@ int bdrv_eject(BlockDriverState *bs, int eject_flag) ret = drv->bdrv_eject(bs, eject_flag); } if (ret == -ENOTSUP) { - if (eject_flag) - bdrv_close(bs); ret = 0; } + if (ret >= 0) { + bs->tray_open = eject_flag; + } return ret; } diff --git a/block/raw-posix.c b/block/raw-posix.c index a11170ed16..72fb8cebd4 100644 --- a/block/raw-posix.c +++ b/block/raw-posix.c @@ -993,6 +993,11 @@ static int hdev_create(const char *filename, QEMUOptionParameter *options) return ret; } +static int hdev_has_zero_init(BlockDriverState *bs) +{ + return 0; +} + static BlockDriver bdrv_host_device = { .format_name = "host_device", .protocol_name = "host_device", @@ -1002,7 +1007,7 @@ static BlockDriver bdrv_host_device = { .bdrv_close = raw_close, .bdrv_create = hdev_create, .create_options = raw_create_options, - .no_zero_init = 1, + .bdrv_has_zero_init = hdev_has_zero_init, .bdrv_flush = raw_flush, .bdrv_aio_readv = raw_aio_readv, @@ -1117,7 +1122,7 @@ static BlockDriver bdrv_host_floppy = { .bdrv_close = raw_close, .bdrv_create = hdev_create, .create_options = raw_create_options, - .no_zero_init = 1, + .bdrv_has_zero_init = hdev_has_zero_init, .bdrv_flush = raw_flush, .bdrv_aio_readv = raw_aio_readv, @@ -1217,7 +1222,7 @@ static BlockDriver bdrv_host_cdrom = { .bdrv_close = raw_close, .bdrv_create = hdev_create, .create_options = raw_create_options, - .no_zero_init = 1, + .bdrv_has_zero_init = hdev_has_zero_init, .bdrv_flush = raw_flush, .bdrv_aio_readv = raw_aio_readv, @@ -1340,7 +1345,7 @@ static BlockDriver bdrv_host_cdrom = { .bdrv_close = raw_close, .bdrv_create = hdev_create, .create_options = raw_create_options, - .no_zero_init = 1, + .bdrv_has_zero_init = hdev_has_zero_init, .bdrv_flush = raw_flush, .bdrv_aio_readv = raw_aio_readv, diff --git a/block/raw-win32.c b/block/raw-win32.c index 745bbde673..503ed3959a 100644 --- a/block/raw-win32.c +++ b/block/raw-win32.c @@ -394,6 +394,11 @@ static int raw_set_locked(BlockDriverState *bs, int locked) } #endif +static int hdev_has_zero_init(BlockDriverState *bs) +{ + return 0; +} + static BlockDriver bdrv_host_device = { .format_name = "host_device", .protocol_name = "host_device", @@ -402,6 +407,7 @@ static BlockDriver bdrv_host_device = { .bdrv_file_open = hdev_open, .bdrv_close = raw_close, .bdrv_flush = raw_flush, + .bdrv_has_zero_init = hdev_has_zero_init, .bdrv_read = raw_read, .bdrv_write = raw_write, diff --git a/block/raw.c b/block/raw.c index 1414e777b3..61e674856d 100644 --- a/block/raw.c +++ b/block/raw.c @@ -237,6 +237,11 @@ static QEMUOptionParameter raw_create_options[] = { { NULL } }; +static int raw_has_zero_init(BlockDriverState *bs) +{ + return bdrv_has_zero_init(bs->file); +} + static BlockDriver bdrv_raw = { .format_name = "raw", @@ -264,6 +269,7 @@ static BlockDriver bdrv_raw = { .bdrv_create = raw_create, .create_options = raw_create_options, + .bdrv_has_zero_init = raw_has_zero_init, }; static void bdrv_raw_init(void) diff --git a/block_int.h b/block_int.h index f075a8cba5..b863451774 100644 --- a/block_int.h +++ b/block_int.h @@ -127,8 +127,11 @@ struct BlockDriver { void (*bdrv_debug_event)(BlockDriverState *bs, BlkDebugEvent event); - /* Set if newly created images are not guaranteed to contain only zeros */ - int no_zero_init; + /* + * Returns 1 if newly created images are guaranteed to contain only + * zeros, 0 otherwise. + */ + int (*bdrv_has_zero_init)(BlockDriverState *bs); QLIST_ENTRY(BlockDriver) list; }; @@ -141,6 +144,7 @@ struct BlockDriverState { int open_flags; /* flags used to open the file, re-used for re-open */ int removable; /* if true, the media can be removed */ int locked; /* if true, the media cannot temporarily be ejected */ + int tray_open; /* if true, the virtual tray is open */ int encrypted; /* if true, the media is encrypted */ int valid_key; /* if true, a valid encryption key has been set */ int sg; /* if true, the device is a /dev/sg* */ diff --git a/blockdev.c b/blockdev.c index 01e402bf3d..ff7602be2c 100644 --- a/blockdev.c +++ b/blockdev.c @@ -50,7 +50,7 @@ QemuOpts *drive_add(const char *file, const char *fmt, ...) vsnprintf(optstr, sizeof(optstr), fmt, ap); va_end(ap); - opts = qemu_opts_parse(&qemu_drive_opts, optstr, 0); + opts = qemu_opts_parse(qemu_find_opts("drive"), optstr, 0); if (!opts) { return NULL; } @@ -451,7 +451,7 @@ DriveInfo *drive_init(QemuOpts *opts, int default_to_scsi, int *fatal_error) break; case IF_VIRTIO: /* add virtio block device */ - opts = qemu_opts_create(&qemu_device_opts, NULL, 0); + opts = qemu_opts_create(qemu_find_opts("device"), NULL, 0); qemu_opt_set(opts, "driver", "virtio-blk-pci"); qemu_opt_set(opts, "drive", dinfo->id); if (devaddr) diff --git a/buffered_file.c b/buffered_file.c index 54dc6c29ba..1836e7e242 100644 --- a/buffered_file.c +++ b/buffered_file.c @@ -156,6 +156,14 @@ static int buffered_put_buffer(void *opaque, const uint8_t *buf, int64_t pos, in offset = size; } + if (pos == 0 && size == 0) { + DPRINTF("file is ready\n"); + if (s->bytes_xfer <= s->xfer_limit) { + DPRINTF("notifying client\n"); + s->put_ready(s->opaque); + } + } + return offset; } @@ -222,8 +230,10 @@ static void buffered_rate_tick(void *opaque) { QEMUFileBuffered *s = opaque; - if (s->has_error) + if (s->has_error) { + buffered_close(s); return; + } qemu_mod_timer(s->timer, qemu_get_clock(rt_clock) + 100); diff --git a/cpu-common.h b/cpu-common.h index 71e7933c51..0426bc8e27 100644 --- a/cpu-common.h +++ b/cpu-common.h @@ -40,6 +40,8 @@ static inline void cpu_register_physical_memory(target_phys_addr_t start_addr, } ram_addr_t cpu_get_physical_page_desc(target_phys_addr_t addr); +ram_addr_t qemu_ram_alloc_from_ptr(DeviceState *dev, const char *name, + ram_addr_t size, void *host); ram_addr_t qemu_ram_alloc(DeviceState *dev, const char *name, ram_addr_t size); void qemu_ram_free(ram_addr_t addr); /* This should only be used for ram local to a device. */ diff --git a/cpu-exec.c b/cpu-exec.c index d170566cfd..dbdfdccd8c 100644 --- a/cpu-exec.c +++ b/cpu-exec.c @@ -448,7 +448,7 @@ int cpu_exec(CPUState *env1) } #elif defined(TARGET_MIPS) if ((interrupt_request & CPU_INTERRUPT_HARD) && - (env->CP0_Status & env->CP0_Cause & CP0Ca_IP_mask) && + cpu_mips_hw_interrupts_pending(env) && (env->CP0_Status & (1 << CP0St_IE)) && !(env->CP0_Status & (1 << CP0St_EXL)) && !(env->CP0_Status & (1 << CP0St_ERL)) && diff --git a/docs/specs/ivshmem_device_spec.txt b/docs/specs/ivshmem_device_spec.txt new file mode 100644 index 0000000000..23dd2ba89f --- /dev/null +++ b/docs/specs/ivshmem_device_spec.txt @@ -0,0 +1,96 @@ + +Device Specification for Inter-VM shared memory device +------------------------------------------------------ + +The Inter-VM shared memory device is designed to share a region of memory to +userspace in multiple virtual guests. The memory region does not belong to any +guest, but is a POSIX memory object on the host. Optionally, the device may +support sending interrupts to other guests sharing the same memory region. + + +The Inter-VM PCI device +----------------------- + +*BARs* + +The device supports three BARs. BAR0 is a 1 Kbyte MMIO region to support +registers. BAR1 is used for MSI-X when it is enabled in the device. BAR2 is +used to map the shared memory object from the host. The size of BAR2 is +specified when the guest is started and must be a power of 2 in size. + +*Registers* + +The device currently supports 4 registers of 32-bits each. Registers +are used for synchronization between guests sharing the same memory object when +interrupts are supported (this requires using the shared memory server). + +The server assigns each VM an ID number and sends this ID number to the Qemu +process when the guest starts. + +enum ivshmem_registers { + IntrMask = 0, + IntrStatus = 4, + IVPosition = 8, + Doorbell = 12 +}; + +The first two registers are the interrupt mask and status registers. Mask and +status are only used with pin-based interrupts. They are unused with MSI +interrupts. + +Status Register: The status register is set to 1 when an interrupt occurs. + +Mask Register: The mask register is bitwise ANDed with the interrupt status +and the result will raise an interrupt if it is non-zero. However, since 1 is +the only value the status will be set to, it is only the first bit of the mask +that has any effect. Therefore interrupts can be masked by setting the first +bit to 0 and unmasked by setting the first bit to 1. + +IVPosition Register: The IVPosition register is read-only and reports the +guest's ID number. The guest IDs are non-negative integers. When using the +server, since the server is a separate process, the VM ID will only be set when +the device is ready (shared memory is received from the server and accessible via +the device). If the device is not ready, the IVPosition will return -1. +Applications should ensure that they have a valid VM ID before accessing the +shared memory. + +Doorbell Register: To interrupt another guest, a guest must write to the +Doorbell register. The doorbell register is 32-bits, logically divided into +two 16-bit fields. The high 16-bits are the guest ID to interrupt and the low +16-bits are the interrupt vector to trigger. The semantics of the value +written to the doorbell depends on whether the device is using MSI or a regular +pin-based interrupt. In short, MSI uses vectors while regular interrupts set the +status register. + +Regular Interrupts + +If regular interrupts are used (due to either a guest not supporting MSI or the +user specifying not to use them on startup) then the value written to the lower +16-bits of the Doorbell register results is arbitrary and will trigger an +interrupt in the destination guest. + +Message Signalled Interrupts + +A ivshmem device may support multiple MSI vectors. If so, the lower 16-bits +written to the Doorbell register must be between 0 and the maximum number of +vectors the guest supports. The lower 16 bits written to the doorbell is the +MSI vector that will be raised in the destination guest. The number of MSI +vectors is configurable but it is set when the VM is started. + +The important thing to remember with MSI is that it is only a signal, no status +is set (since MSI interrupts are not shared). All information other than the +interrupt itself should be communicated via the shared memory region. Devices +supporting multiple MSI vectors can use different vectors to indicate different +events have occurred. The semantics of interrupt vectors are left to the +user's discretion. + + +Usage in the Guest +------------------ + +The shared memory device is intended to be used with the provided UIO driver. +Very little configuration is needed. The guest should map BAR0 to access the +registers (an array of 32-bit ints allows simple writing) and map BAR2 to +access the shared memory region itself. The size of the shared memory region +is specified when the guest (or shared memory server) is started. A guest may +map the whole shared memory region or only part of it. @@ -1706,8 +1706,8 @@ static QLIST_HEAD(memory_client_list, CPUPhysMemoryClient) memory_client_list = QLIST_HEAD_INITIALIZER(memory_client_list); static void cpu_notify_set_memory(target_phys_addr_t start_addr, - ram_addr_t size, - ram_addr_t phys_offset) + ram_addr_t size, + ram_addr_t phys_offset) { CPUPhysMemoryClient *client; QLIST_FOREACH(client, &memory_client_list, list) { @@ -1716,7 +1716,7 @@ static void cpu_notify_set_memory(target_phys_addr_t start_addr, } static int cpu_notify_sync_dirty_bitmap(target_phys_addr_t start, - target_phys_addr_t end) + target_phys_addr_t end) { CPUPhysMemoryClient *client; QLIST_FOREACH(client, &memory_client_list, list) { @@ -1803,17 +1803,17 @@ int cpu_str_to_log_mask(const char *str) p1 = strchr(p, ','); if (!p1) p1 = p + strlen(p); - if(cmp1(p,p1-p,"all")) { - for(item = cpu_log_items; item->mask != 0; item++) { - mask |= item->mask; - } - } else { - for(item = cpu_log_items; item->mask != 0; item++) { - if (cmp1(p, p1 - p, item->name)) - goto found; + if(cmp1(p,p1-p,"all")) { + for(item = cpu_log_items; item->mask != 0; item++) { + mask |= item->mask; + } + } else { + for(item = cpu_log_items; item->mask != 0; item++) { + if (cmp1(p, p1 - p, item->name)) + goto found; + } + return 0; } - return 0; - } found: mask |= item->mask; if (*p1 != ',') @@ -1907,11 +1907,11 @@ static inline void tlb_flush_jmp_cache(CPUState *env, target_ulong addr) overlap the flushed page. */ i = tb_jmp_cache_hash_page(addr - TARGET_PAGE_SIZE); memset (&env->tb_jmp_cache[i], 0, - TB_JMP_PAGE_SIZE * sizeof(TranslationBlock *)); + TB_JMP_PAGE_SIZE * sizeof(TranslationBlock *)); i = tb_jmp_cache_hash_page(addr); memset (&env->tb_jmp_cache[i], 0, - TB_JMP_PAGE_SIZE * sizeof(TranslationBlock *)); + TB_JMP_PAGE_SIZE * sizeof(TranslationBlock *)); } static CPUTLBEntry s_cputlb_empty_entry = { @@ -2687,16 +2687,16 @@ static long gethugepagesize(const char *path) int ret; do { - ret = statfs(path, &fs); + ret = statfs(path, &fs); } while (ret != 0 && errno == EINTR); if (ret != 0) { - perror(path); - return 0; + perror(path); + return 0; } if (fs.f_type != HUGETLBFS_MAGIC) - fprintf(stderr, "Warning: path not on HugeTLBFS: %s\n", path); + fprintf(stderr, "Warning: path not on HugeTLBFS: %s\n", path); return fs.f_bsize; } @@ -2715,7 +2715,7 @@ static void *file_ram_alloc(RAMBlock *block, hpagesize = gethugepagesize(path); if (!hpagesize) { - return NULL; + return NULL; } if (memory < hpagesize) { @@ -2728,14 +2728,14 @@ static void *file_ram_alloc(RAMBlock *block, } if (asprintf(&filename, "%s/qemu_back_mem.XXXXXX", path) == -1) { - return NULL; + return NULL; } fd = mkstemp(filename); if (fd < 0) { - perror("unable to create backing store for hugepages"); - free(filename); - return NULL; + perror("unable to create backing store for hugepages"); + free(filename); + return NULL; } unlink(filename); free(filename); @@ -2749,7 +2749,7 @@ static void *file_ram_alloc(RAMBlock *block, * mmap will fail. */ if (ftruncate(fd, memory)) - perror("ftruncate"); + perror("ftruncate"); #ifdef MAP_POPULATE /* NB: MAP_POPULATE won't exhaustively alloc all phys pages in the case @@ -2762,9 +2762,9 @@ static void *file_ram_alloc(RAMBlock *block, area = mmap(0, memory, PROT_READ | PROT_WRITE, MAP_PRIVATE, fd, 0); #endif if (area == MAP_FAILED) { - perror("file_ram_alloc: can't mmap RAM pages"); - close(fd); - return (NULL); + perror("file_ram_alloc: can't mmap RAM pages"); + close(fd); + return (NULL); } block->fd = fd; return area; @@ -2808,7 +2808,8 @@ static ram_addr_t last_ram_offset(void) return last; } -ram_addr_t qemu_ram_alloc(DeviceState *dev, const char *name, ram_addr_t size) +ram_addr_t qemu_ram_alloc_from_ptr(DeviceState *dev, const char *name, + ram_addr_t size, void *host) { RAMBlock *new_block, *block; @@ -2832,32 +2833,37 @@ ram_addr_t qemu_ram_alloc(DeviceState *dev, const char *name, ram_addr_t size) } } - if (mem_path) { + if (host) { + new_block->host = host; + } else { + if (mem_path) { #if defined (__linux__) && !defined(TARGET_S390X) - new_block->host = file_ram_alloc(new_block, size, mem_path); - if (!new_block->host) { - new_block->host = qemu_vmalloc(size); + new_block->host = file_ram_alloc(new_block, size, mem_path); + if (!new_block->host) { + new_block->host = qemu_vmalloc(size); #ifdef MADV_MERGEABLE - madvise(new_block->host, size, MADV_MERGEABLE); + madvise(new_block->host, size, MADV_MERGEABLE); #endif - } + } #else - fprintf(stderr, "-mem-path option unsupported\n"); - exit(1); + fprintf(stderr, "-mem-path option unsupported\n"); + exit(1); #endif - } else { + } else { #if defined(TARGET_S390X) && defined(CONFIG_KVM) - /* XXX S390 KVM requires the topmost vma of the RAM to be < 256GB */ - new_block->host = mmap((void*)0x1000000, size, - PROT_EXEC|PROT_READ|PROT_WRITE, - MAP_SHARED | MAP_ANONYMOUS, -1, 0); + /* XXX S390 KVM requires the topmost vma of the RAM to be < 256GB */ + new_block->host = mmap((void*)0x1000000, size, + PROT_EXEC|PROT_READ|PROT_WRITE, + MAP_SHARED | MAP_ANONYMOUS, -1, 0); #else - new_block->host = qemu_vmalloc(size); + new_block->host = qemu_vmalloc(size); #endif #ifdef MADV_MERGEABLE - madvise(new_block->host, size, MADV_MERGEABLE); + madvise(new_block->host, size, MADV_MERGEABLE); #endif + } } + new_block->offset = find_ram_offset(size); new_block->length = size; @@ -2874,6 +2880,11 @@ ram_addr_t qemu_ram_alloc(DeviceState *dev, const char *name, ram_addr_t size) return new_block->offset; } +ram_addr_t qemu_ram_alloc(DeviceState *dev, const char *name, ram_addr_t size) +{ + return qemu_ram_alloc_from_ptr(dev, name, size, NULL); +} + void qemu_ram_free(ram_addr_t addr) { RAMBlock *block; @@ -65,6 +65,8 @@ * 2006-Aug-10 Igor Kovalenko : Renamed KBDQueue to SERIOQueue, implemented * serial mouse queue. * Implemented serial mouse protocol. + * + * 2010-May-23 Artyom Tarasenko: Reworked IUS logic */ #ifdef DEBUG_SERIAL @@ -279,7 +281,7 @@ static uint32_t get_queue(void *opaque) static int escc_update_irq_chn(ChannelState *s) { - if ((((s->wregs[W_INTR] & INTR_TXINT) && s->txint == 1) || + if ((((s->wregs[W_INTR] & INTR_TXINT) && (s->txint == 1)) || // tx ints enabled, pending ((((s->wregs[W_INTR] & INTR_RXMODEMSK) == INTR_RXINT1ST) || ((s->wregs[W_INTR] & INTR_RXMODEMSK) == INTR_RXINTALL)) && @@ -342,24 +344,22 @@ static void escc_reset(DeviceState *d) static inline void set_rxint(ChannelState *s) { s->rxint = 1; - if (!s->txint_under_svc) { - s->rxint_under_svc = 1; - if (s->chn == chn_a) { - if (s->wregs[W_MINTR] & MINTR_STATUSHI) - s->otherchn->rregs[R_IVEC] = IVEC_HIRXINTA; - else - s->otherchn->rregs[R_IVEC] = IVEC_LORXINTA; - } else { - if (s->wregs[W_MINTR] & MINTR_STATUSHI) - s->rregs[R_IVEC] = IVEC_HIRXINTB; - else - s->rregs[R_IVEC] = IVEC_LORXINTB; - } - } - if (s->chn == chn_a) + /* XXX: missing daisy chainnig: chn_b rx should have a lower priority + than chn_a rx/tx/special_condition service*/ + s->rxint_under_svc = 1; + if (s->chn == chn_a) { s->rregs[R_INTR] |= INTR_RXINTA; - else + if (s->wregs[W_MINTR] & MINTR_STATUSHI) + s->otherchn->rregs[R_IVEC] = IVEC_HIRXINTA; + else + s->otherchn->rregs[R_IVEC] = IVEC_LORXINTA; + } else { s->otherchn->rregs[R_INTR] |= INTR_RXINTB; + if (s->wregs[W_MINTR] & MINTR_STATUSHI) + s->rregs[R_IVEC] = IVEC_HIRXINTB; + else + s->rregs[R_IVEC] = IVEC_LORXINTB; + } escc_update_irq(s); } @@ -369,19 +369,17 @@ static inline void set_txint(ChannelState *s) if (!s->rxint_under_svc) { s->txint_under_svc = 1; if (s->chn == chn_a) { + s->rregs[R_INTR] |= INTR_TXINTA; if (s->wregs[W_MINTR] & MINTR_STATUSHI) s->otherchn->rregs[R_IVEC] = IVEC_HITXINTA; else s->otherchn->rregs[R_IVEC] = IVEC_LOTXINTA; } else { s->rregs[R_IVEC] = IVEC_TXINTB; + s->otherchn->rregs[R_INTR] |= INTR_TXINTB; } - } - if (s->chn == chn_a) - s->rregs[R_INTR] |= INTR_TXINTA; - else - s->otherchn->rregs[R_INTR] |= INTR_TXINTB; escc_update_irq(s); + } } static inline void clr_rxint(ChannelState *s) @@ -417,6 +415,7 @@ static inline void clr_txint(ChannelState *s) s->otherchn->rregs[R_IVEC] = IVEC_LONOINT; s->rregs[R_INTR] &= ~INTR_TXINTA; } else { + s->otherchn->rregs[R_INTR] &= ~INTR_TXINTB; if (s->wregs[W_MINTR] & MINTR_STATUSHI) s->rregs[R_IVEC] = IVEC_HINOINT; else @@ -515,10 +514,15 @@ static void escc_mem_writeb(void *opaque, target_phys_addr_t addr, uint32_t val) clr_txint(s); break; case CMD_CLR_IUS: - if (s->rxint_under_svc) - clr_rxint(s); - else if (s->txint_under_svc) - clr_txint(s); + if (s->rxint_under_svc) { + s->rxint_under_svc = 0; + if (s->txint) { + set_txint(s); + } + } else if (s->txint_under_svc) { + s->txint_under_svc = 0; + } + escc_update_irq(s); break; default: break; @@ -264,6 +264,8 @@ int register_savevm_live(DeviceState *dev, void *opaque); void unregister_savevm(DeviceState *dev, const char *idstr, void *opaque); +void register_device_unmigratable(DeviceState *dev, const char *idstr, + void *opaque); typedef void QEMUResetHandler(void *opaque); diff --git a/hw/ide/pci.c b/hw/ide/pci.c index 4331d77232..ec90f266e9 100644 --- a/hw/ide/pci.c +++ b/hw/ide/pci.c @@ -40,8 +40,27 @@ void bmdma_cmd_writeb(void *opaque, uint32_t addr, uint32_t val) printf("%s: 0x%08x\n", __func__, val); #endif if (!(val & BM_CMD_START)) { - /* XXX: do it better */ - ide_dma_cancel(bm); + /* + * We can't cancel Scatter Gather DMA in the middle of the + * operation or a partial (not full) DMA transfer would reach + * the storage so we wait for completion instead (we beahve + * like if the DMA was completed by the time the guest trying + * to cancel dma with bmdma_cmd_writeb with BM_CMD_START not + * set). + * + * In the future we'll be able to safely cancel the I/O if the + * whole DMA operation will be submitted to disk with a single + * aio operation with preadv/pwritev. + */ + if (bm->aiocb) { + qemu_aio_flush(); +#ifdef DEBUG_IDE + if (bm->aiocb) + printf("ide_dma_cancel: aiocb still pending"); + if (bm->status & BM_STATUS_DMAING) + printf("ide_dma_cancel: BM_STATUS_DMAING still pending"); +#endif + } bm->cmd = val & 0x09; } else { if (!(bm->status & BM_STATUS_DMAING)) { diff --git a/hw/ivshmem.c b/hw/ivshmem.c new file mode 100644 index 0000000000..bbb5cbaa16 --- /dev/null +++ b/hw/ivshmem.c @@ -0,0 +1,828 @@ +/* + * Inter-VM Shared Memory PCI device. + * + * Author: + * Cam Macdonell <cam@cs.ualberta.ca> + * + * Based On: cirrus_vga.c + * Copyright (c) 2004 Fabrice Bellard + * Copyright (c) 2004 Makoto Suzuki (suzu) + * + * and rtl8139.c + * Copyright (c) 2006 Igor Kovalenko + * + * This code is licensed under the GNU GPL v2. + */ +#include "hw.h" +#include "pc.h" +#include "pci.h" +#include "msix.h" +#include "kvm.h" + +#include <sys/mman.h> +#include <sys/types.h> + +#define IVSHMEM_IOEVENTFD 0 +#define IVSHMEM_MSI 1 + +#define IVSHMEM_PEER 0 +#define IVSHMEM_MASTER 1 + +#define IVSHMEM_REG_BAR_SIZE 0x100 + +//#define DEBUG_IVSHMEM +#ifdef DEBUG_IVSHMEM +#define IVSHMEM_DPRINTF(fmt, ...) \ + do {printf("IVSHMEM: " fmt, ## __VA_ARGS__); } while (0) +#else +#define IVSHMEM_DPRINTF(fmt, ...) +#endif + +typedef struct Peer { + int nb_eventfds; + int *eventfds; +} Peer; + +typedef struct EventfdEntry { + PCIDevice *pdev; + int vector; +} EventfdEntry; + +typedef struct IVShmemState { + PCIDevice dev; + uint32_t intrmask; + uint32_t intrstatus; + uint32_t doorbell; + + CharDriverState **eventfd_chr; + CharDriverState *server_chr; + int ivshmem_mmio_io_addr; + + pcibus_t mmio_addr; + pcibus_t shm_pci_addr; + uint64_t ivshmem_offset; + uint64_t ivshmem_size; /* size of shared memory region */ + int shm_fd; /* shared memory file descriptor */ + + Peer *peers; + int nb_peers; /* how many guests we have space for */ + int max_peer; /* maximum numbered peer */ + + int vm_id; + uint32_t vectors; + uint32_t features; + EventfdEntry *eventfd_table; + + char * shmobj; + char * sizearg; + char * role; + int role_val; /* scalar to avoid multiple string comparisons */ +} IVShmemState; + +/* registers for the Inter-VM shared memory device */ +enum ivshmem_registers { + INTRMASK = 0, + INTRSTATUS = 4, + IVPOSITION = 8, + DOORBELL = 12, +}; + +static inline uint32_t ivshmem_has_feature(IVShmemState *ivs, + unsigned int feature) { + return (ivs->features & (1 << feature)); +} + +static inline bool is_power_of_two(uint64_t x) { + return (x & (x - 1)) == 0; +} + +static void ivshmem_map(PCIDevice *pci_dev, int region_num, + pcibus_t addr, pcibus_t size, int type) +{ + IVShmemState *s = DO_UPCAST(IVShmemState, dev, pci_dev); + + s->shm_pci_addr = addr; + + if (s->ivshmem_offset > 0) { + cpu_register_physical_memory(s->shm_pci_addr, s->ivshmem_size, + s->ivshmem_offset); + } + + IVSHMEM_DPRINTF("guest pci addr = %" FMT_PCIBUS ", guest h/w addr = %" + PRIu64 ", size = %" FMT_PCIBUS "\n", addr, s->ivshmem_offset, size); + +} + +/* accessing registers - based on rtl8139 */ +static void ivshmem_update_irq(IVShmemState *s, int val) +{ + int isr; + isr = (s->intrstatus & s->intrmask) & 0xffffffff; + + /* don't print ISR resets */ + if (isr) { + IVSHMEM_DPRINTF("Set IRQ to %d (%04x %04x)\n", + isr ? 1 : 0, s->intrstatus, s->intrmask); + } + + qemu_set_irq(s->dev.irq[0], (isr != 0)); +} + +static void ivshmem_IntrMask_write(IVShmemState *s, uint32_t val) +{ + IVSHMEM_DPRINTF("IntrMask write(w) val = 0x%04x\n", val); + + s->intrmask = val; + + ivshmem_update_irq(s, val); +} + +static uint32_t ivshmem_IntrMask_read(IVShmemState *s) +{ + uint32_t ret = s->intrmask; + + IVSHMEM_DPRINTF("intrmask read(w) val = 0x%04x\n", ret); + + return ret; +} + +static void ivshmem_IntrStatus_write(IVShmemState *s, uint32_t val) +{ + IVSHMEM_DPRINTF("IntrStatus write(w) val = 0x%04x\n", val); + + s->intrstatus = val; + + ivshmem_update_irq(s, val); + return; +} + +static uint32_t ivshmem_IntrStatus_read(IVShmemState *s) +{ + uint32_t ret = s->intrstatus; + + /* reading ISR clears all interrupts */ + s->intrstatus = 0; + + ivshmem_update_irq(s, 0); + + return ret; +} + +static void ivshmem_io_writew(void *opaque, target_phys_addr_t addr, + uint32_t val) +{ + + IVSHMEM_DPRINTF("We shouldn't be writing words\n"); +} + +static void ivshmem_io_writel(void *opaque, target_phys_addr_t addr, + uint32_t val) +{ + IVShmemState *s = opaque; + + uint64_t write_one = 1; + uint16_t dest = val >> 16; + uint16_t vector = val & 0xff; + + addr &= 0xfc; + + IVSHMEM_DPRINTF("writing to addr " TARGET_FMT_plx "\n", addr); + switch (addr) + { + case INTRMASK: + ivshmem_IntrMask_write(s, val); + break; + + case INTRSTATUS: + ivshmem_IntrStatus_write(s, val); + break; + + case DOORBELL: + /* check that dest VM ID is reasonable */ + if ((dest < 0) || (dest > s->max_peer)) { + IVSHMEM_DPRINTF("Invalid destination VM ID (%d)\n", dest); + break; + } + + /* check doorbell range */ + if ((vector >= 0) && (vector < s->peers[dest].nb_eventfds)) { + IVSHMEM_DPRINTF("Writing %" PRId64 " to VM %d on vector %d\n", + write_one, dest, vector); + if (write(s->peers[dest].eventfds[vector], + &(write_one), 8) != 8) { + IVSHMEM_DPRINTF("error writing to eventfd\n"); + } + } + break; + default: + IVSHMEM_DPRINTF("Invalid VM Doorbell VM %d\n", dest); + } +} + +static void ivshmem_io_writeb(void *opaque, target_phys_addr_t addr, + uint32_t val) +{ + IVSHMEM_DPRINTF("We shouldn't be writing bytes\n"); +} + +static uint32_t ivshmem_io_readw(void *opaque, target_phys_addr_t addr) +{ + + IVSHMEM_DPRINTF("We shouldn't be reading words\n"); + return 0; +} + +static uint32_t ivshmem_io_readl(void *opaque, target_phys_addr_t addr) +{ + + IVShmemState *s = opaque; + uint32_t ret; + + switch (addr) + { + case INTRMASK: + ret = ivshmem_IntrMask_read(s); + break; + + case INTRSTATUS: + ret = ivshmem_IntrStatus_read(s); + break; + + case IVPOSITION: + /* return my VM ID if the memory is mapped */ + if (s->shm_fd > 0) { + ret = s->vm_id; + } else { + ret = -1; + } + break; + + default: + IVSHMEM_DPRINTF("why are we reading " TARGET_FMT_plx "\n", addr); + ret = 0; + } + + return ret; +} + +static uint32_t ivshmem_io_readb(void *opaque, target_phys_addr_t addr) +{ + IVSHMEM_DPRINTF("We shouldn't be reading bytes\n"); + + return 0; +} + +static CPUReadMemoryFunc * const ivshmem_mmio_read[3] = { + ivshmem_io_readb, + ivshmem_io_readw, + ivshmem_io_readl, +}; + +static CPUWriteMemoryFunc * const ivshmem_mmio_write[3] = { + ivshmem_io_writeb, + ivshmem_io_writew, + ivshmem_io_writel, +}; + +static void ivshmem_receive(void *opaque, const uint8_t *buf, int size) +{ + IVShmemState *s = opaque; + + ivshmem_IntrStatus_write(s, *buf); + + IVSHMEM_DPRINTF("ivshmem_receive 0x%02x\n", *buf); +} + +static int ivshmem_can_receive(void * opaque) +{ + return 8; +} + +static void ivshmem_event(void *opaque, int event) +{ + IVSHMEM_DPRINTF("ivshmem_event %d\n", event); +} + +static void fake_irqfd(void *opaque, const uint8_t *buf, int size) { + + EventfdEntry *entry = opaque; + PCIDevice *pdev = entry->pdev; + + IVSHMEM_DPRINTF("interrupt on vector %p %d\n", pdev, entry->vector); + msix_notify(pdev, entry->vector); +} + +static CharDriverState* create_eventfd_chr_device(void * opaque, int eventfd, + int vector) +{ + /* create a event character device based on the passed eventfd */ + IVShmemState *s = opaque; + CharDriverState * chr; + + chr = qemu_chr_open_eventfd(eventfd); + + if (chr == NULL) { + fprintf(stderr, "creating eventfd for eventfd %d failed\n", eventfd); + exit(-1); + } + + /* if MSI is supported we need multiple interrupts */ + if (ivshmem_has_feature(s, IVSHMEM_MSI)) { + s->eventfd_table[vector].pdev = &s->dev; + s->eventfd_table[vector].vector = vector; + + qemu_chr_add_handlers(chr, ivshmem_can_receive, fake_irqfd, + ivshmem_event, &s->eventfd_table[vector]); + } else { + qemu_chr_add_handlers(chr, ivshmem_can_receive, ivshmem_receive, + ivshmem_event, s); + } + + return chr; + +} + +static int check_shm_size(IVShmemState *s, int fd) { + /* check that the guest isn't going to try and map more memory than the + * the object has allocated return -1 to indicate error */ + + struct stat buf; + + fstat(fd, &buf); + + if (s->ivshmem_size > buf.st_size) { + fprintf(stderr, "IVSHMEM ERROR: Requested memory size greater"); + fprintf(stderr, " than shared object size (%" PRIu64 " > %ld)\n", + s->ivshmem_size, buf.st_size); + return -1; + } else { + return 0; + } +} + +/* create the shared memory BAR when we are not using the server, so we can + * create the BAR and map the memory immediately */ +static void create_shared_memory_BAR(IVShmemState *s, int fd) { + + void * ptr; + + s->shm_fd = fd; + + ptr = mmap(0, s->ivshmem_size, PROT_READ|PROT_WRITE, MAP_SHARED, fd, 0); + + s->ivshmem_offset = qemu_ram_alloc_from_ptr(&s->dev.qdev, "ivshmem.bar2", + s->ivshmem_size, ptr); + + /* region for shared memory */ + pci_register_bar(&s->dev, 2, s->ivshmem_size, + PCI_BASE_ADDRESS_SPACE_MEMORY, ivshmem_map); +} + +static void close_guest_eventfds(IVShmemState *s, int posn) +{ + int i, guest_curr_max; + + guest_curr_max = s->peers[posn].nb_eventfds; + + for (i = 0; i < guest_curr_max; i++) { + kvm_set_ioeventfd_mmio_long(s->peers[posn].eventfds[i], + s->mmio_addr + DOORBELL, (posn << 16) | i, 0); + close(s->peers[posn].eventfds[i]); + } + + qemu_free(s->peers[posn].eventfds); + s->peers[posn].nb_eventfds = 0; +} + +static void setup_ioeventfds(IVShmemState *s) { + + int i, j; + + for (i = 0; i <= s->max_peer; i++) { + for (j = 0; j < s->peers[i].nb_eventfds; j++) { + kvm_set_ioeventfd_mmio_long(s->peers[i].eventfds[j], + s->mmio_addr + DOORBELL, (i << 16) | j, 1); + } + } +} + +/* this function increase the dynamic storage need to store data about other + * guests */ +static void increase_dynamic_storage(IVShmemState *s, int new_min_size) { + + int j, old_nb_alloc; + + old_nb_alloc = s->nb_peers; + + while (new_min_size >= s->nb_peers) + s->nb_peers = s->nb_peers * 2; + + IVSHMEM_DPRINTF("bumping storage to %d guests\n", s->nb_peers); + s->peers = qemu_realloc(s->peers, s->nb_peers * sizeof(Peer)); + + /* zero out new pointers */ + for (j = old_nb_alloc; j < s->nb_peers; j++) { + s->peers[j].eventfds = NULL; + s->peers[j].nb_eventfds = 0; + } +} + +static void ivshmem_read(void *opaque, const uint8_t * buf, int flags) +{ + IVShmemState *s = opaque; + int incoming_fd, tmp_fd; + int guest_max_eventfd; + long incoming_posn; + + memcpy(&incoming_posn, buf, sizeof(long)); + /* pick off s->server_chr->msgfd and store it, posn should accompany msg */ + tmp_fd = qemu_chr_get_msgfd(s->server_chr); + IVSHMEM_DPRINTF("posn is %ld, fd is %d\n", incoming_posn, tmp_fd); + + /* make sure we have enough space for this guest */ + if (incoming_posn >= s->nb_peers) { + increase_dynamic_storage(s, incoming_posn); + } + + if (tmp_fd == -1) { + /* if posn is positive and unseen before then this is our posn*/ + if ((incoming_posn >= 0) && + (s->peers[incoming_posn].eventfds == NULL)) { + /* receive our posn */ + s->vm_id = incoming_posn; + return; + } else { + /* otherwise an fd == -1 means an existing guest has gone away */ + IVSHMEM_DPRINTF("posn %ld has gone away\n", incoming_posn); + close_guest_eventfds(s, incoming_posn); + return; + } + } + + /* because of the implementation of get_msgfd, we need a dup */ + incoming_fd = dup(tmp_fd); + + if (incoming_fd == -1) { + fprintf(stderr, "could not allocate file descriptor %s\n", + strerror(errno)); + return; + } + + /* if the position is -1, then it's shared memory region fd */ + if (incoming_posn == -1) { + + void * map_ptr; + + s->max_peer = 0; + + if (check_shm_size(s, incoming_fd) == -1) { + exit(-1); + } + + /* mmap the region and map into the BAR2 */ + map_ptr = mmap(0, s->ivshmem_size, PROT_READ|PROT_WRITE, MAP_SHARED, + incoming_fd, 0); + s->ivshmem_offset = qemu_ram_alloc_from_ptr(&s->dev.qdev, + "ivshmem.bar2", s->ivshmem_size, map_ptr); + + IVSHMEM_DPRINTF("guest pci addr = %" FMT_PCIBUS ", guest h/w addr = %" + PRIu64 ", size = %" PRIu64 "\n", s->shm_pci_addr, + s->ivshmem_offset, s->ivshmem_size); + + if (s->shm_pci_addr > 0) { + /* map memory into BAR2 */ + cpu_register_physical_memory(s->shm_pci_addr, s->ivshmem_size, + s->ivshmem_offset); + } + + /* only store the fd if it is successfully mapped */ + s->shm_fd = incoming_fd; + + return; + } + + /* each guest has an array of eventfds, and we keep track of how many + * guests for each VM */ + guest_max_eventfd = s->peers[incoming_posn].nb_eventfds; + + if (guest_max_eventfd == 0) { + /* one eventfd per MSI vector */ + s->peers[incoming_posn].eventfds = (int *) qemu_malloc(s->vectors * + sizeof(int)); + } + + /* this is an eventfd for a particular guest VM */ + IVSHMEM_DPRINTF("eventfds[%ld][%d] = %d\n", incoming_posn, + guest_max_eventfd, incoming_fd); + s->peers[incoming_posn].eventfds[guest_max_eventfd] = incoming_fd; + + /* increment count for particular guest */ + s->peers[incoming_posn].nb_eventfds++; + + /* keep track of the maximum VM ID */ + if (incoming_posn > s->max_peer) { + s->max_peer = incoming_posn; + } + + if (incoming_posn == s->vm_id) { + s->eventfd_chr[guest_max_eventfd] = create_eventfd_chr_device(s, + s->peers[s->vm_id].eventfds[guest_max_eventfd], + guest_max_eventfd); + } + + if (ivshmem_has_feature(s, IVSHMEM_IOEVENTFD)) { + if (kvm_set_ioeventfd_mmio_long(incoming_fd, s->mmio_addr + DOORBELL, + (incoming_posn << 16) | guest_max_eventfd, 1) < 0) { + fprintf(stderr, "ivshmem: ioeventfd not available\n"); + } + } + + return; +} + +static void ivshmem_reset(DeviceState *d) +{ + IVShmemState *s = DO_UPCAST(IVShmemState, dev.qdev, d); + + s->intrstatus = 0; + return; +} + +static void ivshmem_mmio_map(PCIDevice *pci_dev, int region_num, + pcibus_t addr, pcibus_t size, int type) +{ + IVShmemState *s = DO_UPCAST(IVShmemState, dev, pci_dev); + + s->mmio_addr = addr; + cpu_register_physical_memory(addr + 0, IVSHMEM_REG_BAR_SIZE, + s->ivshmem_mmio_io_addr); + + if (ivshmem_has_feature(s, IVSHMEM_IOEVENTFD)) { + setup_ioeventfds(s); + } +} + +static uint64_t ivshmem_get_size(IVShmemState * s) { + + uint64_t value; + char *ptr; + + value = strtoull(s->sizearg, &ptr, 10); + switch (*ptr) { + case 0: case 'M': case 'm': + value <<= 20; + break; + case 'G': case 'g': + value <<= 30; + break; + default: + fprintf(stderr, "qemu: invalid ram size: %s\n", s->sizearg); + exit(1); + } + + /* BARs must be a power of 2 */ + if (!is_power_of_two(value)) { + fprintf(stderr, "ivshmem: size must be power of 2\n"); + exit(1); + } + + return value; +} + +static void ivshmem_setup_msi(IVShmemState * s) { + + int i; + + /* allocate the MSI-X vectors */ + + if (!msix_init(&s->dev, s->vectors, 1, 0)) { + pci_register_bar(&s->dev, 1, + msix_bar_size(&s->dev), + PCI_BASE_ADDRESS_SPACE_MEMORY, + msix_mmio_map); + IVSHMEM_DPRINTF("msix initialized (%d vectors)\n", s->vectors); + } else { + IVSHMEM_DPRINTF("msix initialization failed\n"); + exit(1); + } + + /* 'activate' the vectors */ + for (i = 0; i < s->vectors; i++) { + msix_vector_use(&s->dev, i); + } + + /* allocate Qemu char devices for receiving interrupts */ + s->eventfd_table = qemu_mallocz(s->vectors * sizeof(EventfdEntry)); +} + +static void ivshmem_save(QEMUFile* f, void *opaque) +{ + IVShmemState *proxy = opaque; + + IVSHMEM_DPRINTF("ivshmem_save\n"); + pci_device_save(&proxy->dev, f); + + if (ivshmem_has_feature(proxy, IVSHMEM_MSI)) { + msix_save(&proxy->dev, f); + } else { + qemu_put_be32(f, proxy->intrstatus); + qemu_put_be32(f, proxy->intrmask); + } + +} + +static int ivshmem_load(QEMUFile* f, void *opaque, int version_id) +{ + IVSHMEM_DPRINTF("ivshmem_load\n"); + + IVShmemState *proxy = opaque; + int ret, i; + + if (version_id > 0) { + return -EINVAL; + } + + if (proxy->role_val == IVSHMEM_PEER) { + fprintf(stderr, "ivshmem: 'peer' devices are not migratable\n"); + return -EINVAL; + } + + ret = pci_device_load(&proxy->dev, f); + if (ret) { + return ret; + } + + if (ivshmem_has_feature(proxy, IVSHMEM_MSI)) { + msix_load(&proxy->dev, f); + for (i = 0; i < proxy->vectors; i++) { + msix_vector_use(&proxy->dev, i); + } + } else { + proxy->intrstatus = qemu_get_be32(f); + proxy->intrmask = qemu_get_be32(f); + } + + return 0; +} + +static int pci_ivshmem_init(PCIDevice *dev) +{ + IVShmemState *s = DO_UPCAST(IVShmemState, dev, dev); + uint8_t *pci_conf; + + if (s->sizearg == NULL) + s->ivshmem_size = 4 << 20; /* 4 MB default */ + else { + s->ivshmem_size = ivshmem_get_size(s); + } + + register_savevm(&s->dev.qdev, "ivshmem", 0, 0, ivshmem_save, ivshmem_load, + dev); + + /* IRQFD requires MSI */ + if (ivshmem_has_feature(s, IVSHMEM_IOEVENTFD) && + !ivshmem_has_feature(s, IVSHMEM_MSI)) { + fprintf(stderr, "ivshmem: ioeventfd/irqfd requires MSI\n"); + exit(1); + } + + /* check that role is reasonable */ + if (s->role) { + if (strncmp(s->role, "peer", 5) == 0) { + s->role_val = IVSHMEM_PEER; + } else if (strncmp(s->role, "master", 7) == 0) { + s->role_val = IVSHMEM_MASTER; + } else { + fprintf(stderr, "ivshmem: 'role' must be 'peer' or 'master'\n"); + exit(1); + } + } else { + s->role_val = IVSHMEM_MASTER; /* default */ + } + + if (s->role_val == IVSHMEM_PEER) { + register_device_unmigratable(&s->dev.qdev, "ivshmem", s); + } + + pci_conf = s->dev.config; + pci_config_set_vendor_id(pci_conf, PCI_VENDOR_ID_REDHAT_QUMRANET); + pci_conf[0x02] = 0x10; + pci_conf[0x03] = 0x11; + pci_conf[PCI_COMMAND] = PCI_COMMAND_IO | PCI_COMMAND_MEMORY; + pci_config_set_class(pci_conf, PCI_CLASS_MEMORY_RAM); + pci_conf[PCI_HEADER_TYPE] = PCI_HEADER_TYPE_NORMAL; + + pci_config_set_interrupt_pin(pci_conf, 1); + + s->shm_pci_addr = 0; + s->ivshmem_offset = 0; + s->shm_fd = 0; + + s->ivshmem_mmio_io_addr = cpu_register_io_memory(ivshmem_mmio_read, + ivshmem_mmio_write, s); + /* region for registers*/ + pci_register_bar(&s->dev, 0, IVSHMEM_REG_BAR_SIZE, + PCI_BASE_ADDRESS_SPACE_MEMORY, ivshmem_mmio_map); + + if ((s->server_chr != NULL) && + (strncmp(s->server_chr->filename, "unix:", 5) == 0)) { + /* if we get a UNIX socket as the parameter we will talk + * to the ivshmem server to receive the memory region */ + + if (s->shmobj != NULL) { + fprintf(stderr, "WARNING: do not specify both 'chardev' " + "and 'shm' with ivshmem\n"); + } + + IVSHMEM_DPRINTF("using shared memory server (socket = %s)\n", + s->server_chr->filename); + + if (ivshmem_has_feature(s, IVSHMEM_MSI)) { + ivshmem_setup_msi(s); + } + + /* we allocate enough space for 16 guests and grow as needed */ + s->nb_peers = 16; + s->vm_id = -1; + + /* allocate/initialize space for interrupt handling */ + s->peers = qemu_mallocz(s->nb_peers * sizeof(Peer)); + + pci_register_bar(&s->dev, 2, s->ivshmem_size, + PCI_BASE_ADDRESS_SPACE_MEMORY, ivshmem_map); + + s->eventfd_chr = qemu_mallocz(s->vectors * sizeof(CharDriverState *)); + + qemu_chr_add_handlers(s->server_chr, ivshmem_can_receive, ivshmem_read, + ivshmem_event, s); + } else { + /* just map the file immediately, we're not using a server */ + int fd; + + if (s->shmobj == NULL) { + fprintf(stderr, "Must specify 'chardev' or 'shm' to ivshmem\n"); + } + + IVSHMEM_DPRINTF("using shm_open (shm object = %s)\n", s->shmobj); + + /* try opening with O_EXCL and if it succeeds zero the memory + * by truncating to 0 */ + if ((fd = shm_open(s->shmobj, O_CREAT|O_RDWR|O_EXCL, + S_IRWXU|S_IRWXG|S_IRWXO)) > 0) { + /* truncate file to length PCI device's memory */ + if (ftruncate(fd, s->ivshmem_size) != 0) { + fprintf(stderr, "ivshmem: could not truncate shared file\n"); + } + + } else if ((fd = shm_open(s->shmobj, O_CREAT|O_RDWR, + S_IRWXU|S_IRWXG|S_IRWXO)) < 0) { + fprintf(stderr, "ivshmem: could not open shared file\n"); + exit(-1); + + } + + if (check_shm_size(s, fd) == -1) { + exit(-1); + } + + create_shared_memory_BAR(s, fd); + + } + + return 0; +} + +static int pci_ivshmem_uninit(PCIDevice *dev) +{ + IVShmemState *s = DO_UPCAST(IVShmemState, dev, dev); + + cpu_unregister_io_memory(s->ivshmem_mmio_io_addr); + unregister_savevm(&dev->qdev, "ivshmem", s); + + return 0; +} + +static PCIDeviceInfo ivshmem_info = { + .qdev.name = "ivshmem", + .qdev.size = sizeof(IVShmemState), + .qdev.reset = ivshmem_reset, + .init = pci_ivshmem_init, + .exit = pci_ivshmem_uninit, + .qdev.props = (Property[]) { + DEFINE_PROP_CHR("chardev", IVShmemState, server_chr), + DEFINE_PROP_STRING("size", IVShmemState, sizearg), + DEFINE_PROP_UINT32("vectors", IVShmemState, vectors, 1), + DEFINE_PROP_BIT("ioeventfd", IVShmemState, features, IVSHMEM_IOEVENTFD, false), + DEFINE_PROP_BIT("msi", IVShmemState, features, IVSHMEM_MSI, true), + DEFINE_PROP_STRING("shm", IVShmemState, shmobj), + DEFINE_PROP_STRING("role", IVShmemState, role), + DEFINE_PROP_END_OF_LIST(), + } +}; + +static void ivshmem_register_devices(void) +{ + pci_qdev_register(&ivshmem_info); +} + +device_init(ivshmem_register_devices) diff --git a/hw/pci-hotplug.c b/hw/pci-hotplug.c index c38f47fbf1..6a5e3b883b 100644 --- a/hw/pci-hotplug.c +++ b/hw/pci-hotplug.c @@ -51,7 +51,7 @@ static PCIDevice *qemu_pci_hot_add_nic(Monitor *mon, return NULL; } - opts = qemu_opts_parse(&qemu_net_opts, opts_str ? opts_str : "", 0); + opts = qemu_opts_parse(qemu_find_opts("net"), opts_str ? opts_str : "", 0); if (!opts) { return NULL; } diff --git a/hw/pckbd.c b/hw/pckbd.c index 0533b1d9e3..6e4e4062ad 100644 --- a/hw/pckbd.c +++ b/hw/pckbd.c @@ -56,7 +56,9 @@ #define KBD_CCMD_WRITE_MOUSE 0xD4 /* Write the following byte to the mouse */ #define KBD_CCMD_DISABLE_A20 0xDD /* HP vectra only ? */ #define KBD_CCMD_ENABLE_A20 0xDF /* HP vectra only ? */ -#define KBD_CCMD_RESET 0xFE +#define KBD_CCMD_PULSE_BITS_3_0 0xF0 /* Pulse bits 3-0 of the output port P2. */ +#define KBD_CCMD_RESET 0xFE /* Pulse bit 0 of the output port P2 = CPU reset. */ +#define KBD_CCMD_NO_OP 0xFF /* Pulse no bits of the output port P2. */ /* Keyboard Commands */ #define KBD_CMD_SET_LEDS 0xED /* Set keyboard leds */ @@ -238,6 +240,21 @@ static void kbd_write_command(void *opaque, uint32_t addr, uint32_t val) KBDState *s = opaque; DPRINTF("kbd: write cmd=0x%02x\n", val); + + /* Bits 3-0 of the output port P2 of the keyboard controller may be pulsed + * low for approximately 6 micro seconds. Bits 3-0 of the KBD_CCMD_PULSE + * command specify the output port bits to be pulsed. + * 0: Bit should be pulsed. 1: Bit should not be modified. + * The only useful version of this command is pulsing bit 0, + * which does a CPU reset. + */ + if((val & KBD_CCMD_PULSE_BITS_3_0) == KBD_CCMD_PULSE_BITS_3_0) { + if(!(val & 1)) + val = KBD_CCMD_RESET; + else + val = KBD_CCMD_NO_OP; + } + switch(val) { case KBD_CCMD_READ_MODE: kbd_queue(s, s->mode, 0); @@ -294,8 +311,8 @@ static void kbd_write_command(void *opaque, uint32_t addr, uint32_t val) case KBD_CCMD_RESET: qemu_system_reset_request(); break; - case 0xff: - /* ignore that - I don't know what is its use */ + case KBD_CCMD_NO_OP: + /* ignore that */ break; default: fprintf(stderr, "qemu: unsupported keyboard cmd=0x%02x\n", val); diff --git a/hw/qdev-properties.c b/hw/qdev-properties.c index 9219cd7a60..2d600f5eeb 100644 --- a/hw/qdev-properties.c +++ b/hw/qdev-properties.c @@ -772,5 +772,5 @@ static int qdev_add_one_global(QemuOpts *opts, void *opaque) void qemu_add_globals(void) { - qemu_opts_foreach(&qemu_global_opts, qdev_add_one_global, NULL, 0); + qemu_opts_foreach(qemu_find_opts("global"), qdev_add_one_global, NULL, 0); } @@ -792,7 +792,7 @@ int do_device_add(Monitor *mon, const QDict *qdict, QObject **ret_data) { QemuOpts *opts; - opts = qemu_opts_from_qdict(&qemu_device_opts, qdict); + opts = qemu_opts_from_qdict(qemu_find_opts("device"), qdict); if (!opts) { return -1; } diff --git a/hw/sh_intc.c b/hw/sh_intc.c index da36d32b1d..d3f5ea57d5 100644 --- a/hw/sh_intc.c +++ b/hw/sh_intc.c @@ -431,9 +431,8 @@ int sh_intc_init(struct intc_desc *desc, desc->nr_prio_regs = nr_prio_regs; i = sizeof(struct intc_source) * nr_sources; - desc->sources = qemu_malloc(i); + desc->sources = qemu_mallocz(i); - memset(desc->sources, 0, i); for (i = 0; i < desc->nr_sources; i++) { struct intc_source *source = desc->sources + i; diff --git a/hw/usb-msd.c b/hw/usb-msd.c index 65e9624e54..8b510cf90d 100644 --- a/hw/usb-msd.c +++ b/hw/usb-msd.c @@ -575,7 +575,7 @@ static USBDevice *usb_msd_init(const char *filename) /* parse -usbdevice disk: syntax into drive opts */ snprintf(id, sizeof(id), "usb%d", nr++); - opts = qemu_opts_create(&qemu_drive_opts, id, 0); + opts = qemu_opts_create(qemu_find_opts("drive"), id, 0); p1 = strchr(filename, ':'); if (p1++) { diff --git a/hw/usb-net.c b/hw/usb-net.c index a43bd17636..70f9263291 100644 --- a/hw/usb-net.c +++ b/hw/usb-net.c @@ -1472,7 +1472,7 @@ static USBDevice *usb_net_init(const char *cmdline) QemuOpts *opts; int idx; - opts = qemu_opts_parse(&qemu_net_opts, cmdline, 0); + opts = qemu_opts_parse(qemu_find_opts("net"), cmdline, 0); if (!opts) { return NULL; } @@ -2313,13 +2313,6 @@ void vga_init(VGACommonState *s) register_ioport_write(0x1ce, 1, 2, vbe_ioport_write_index, s); register_ioport_write(0x1cf, 1, 2, vbe_ioport_write_data, s); - - /* old Bochs IO ports */ - register_ioport_read(0xff80, 1, 2, vbe_ioport_read_index, s); - register_ioport_read(0xff81, 1, 2, vbe_ioport_read_data, s); - - register_ioport_write(0xff80, 1, 2, vbe_ioport_write_index, s); - register_ioport_write(0xff81, 1, 2, vbe_ioport_write_data, s); #else register_ioport_read(0x1ce, 1, 2, vbe_ioport_read_index, s); register_ioport_read(0x1d0, 1, 2, vbe_ioport_read_data, s); diff --git a/hw/virtio-9p-local.c b/hw/virtio-9p-local.c index 04f7f6f501..43c03c188f 100644 --- a/hw/virtio-9p-local.c +++ b/hw/virtio-9p-local.c @@ -426,9 +426,6 @@ static int local_rename(FsContext *ctx, const char *oldpath, int err; tmp = qemu_strdup(rpath(ctx, oldpath)); - if (tmp == NULL) { - return -1; - } err = rename(tmp, rpath(ctx, newpath)); if (err == -1) { diff --git a/hw/virtio-9p.c b/hw/virtio-9p.c index f8c85c3d28..047c7ea4eb 100644 --- a/hw/virtio-9p.c +++ b/hw/virtio-9p.c @@ -1969,9 +1969,8 @@ static void v9fs_wstat_post_chown(V9fsState *s, V9fsWstatState *vs, int err) end = old_name; } - new_name = qemu_malloc(end - old_name + vs->v9stat.name.size + 1); + new_name = qemu_mallocz(end - old_name + vs->v9stat.name.size + 1); - memset(new_name, 0, end - old_name + vs->v9stat.name.size + 1); memcpy(new_name, old_name, end - old_name); memcpy(new_name + (end - old_name), vs->v9stat.name.data, vs->v9stat.name.size); diff --git a/hw/virtio-pci.c b/hw/virtio-pci.c index 17c3d1539b..82a6d78a9b 100644 --- a/hw/virtio-pci.c +++ b/hw/virtio-pci.c @@ -599,6 +599,14 @@ static int virtio_serial_init_pci(PCIDevice *pci_dev) return 0; } +static int virtio_serial_exit_pci(PCIDevice *pci_dev) +{ + VirtIOPCIProxy *proxy = DO_UPCAST(VirtIOPCIProxy, pci_dev, pci_dev); + + virtio_serial_exit(proxy->vdev); + return virtio_exit_pci(pci_dev); +} + static int virtio_net_init_pci(PCIDevice *pci_dev) { VirtIOPCIProxy *proxy = DO_UPCAST(VirtIOPCIProxy, pci_dev, pci_dev); @@ -689,7 +697,7 @@ static PCIDeviceInfo virtio_info[] = { .qdev.alias = "virtio-serial", .qdev.size = sizeof(VirtIOPCIProxy), .init = virtio_serial_init_pci, - .exit = virtio_exit_pci, + .exit = virtio_serial_exit_pci, .qdev.props = (Property[]) { DEFINE_PROP_UINT32("vectors", VirtIOPCIProxy, nvectors, DEV_NVECTORS_UNSPECIFIED), diff --git a/hw/virtio-serial-bus.c b/hw/virtio-serial-bus.c index 0586b8985d..74ba5ec3d3 100644 --- a/hw/virtio-serial-bus.c +++ b/hw/virtio-serial-bus.c @@ -41,6 +41,8 @@ struct VirtIOSerial { VirtIOSerialBus *bus; + DeviceState *qdev; + QTAILQ_HEAD(, VirtIOSerialPort) ports; /* bitmap for identifying active ports */ @@ -792,6 +794,8 @@ VirtIODevice *virtio_serial_init(DeviceState *dev, uint32_t max_nr_ports) vser->vdev.get_config = get_config; vser->vdev.set_config = set_config; + vser->qdev = dev; + /* * Register for the savevm section with the virtio-console name * to preserve backward compat @@ -801,3 +805,16 @@ VirtIODevice *virtio_serial_init(DeviceState *dev, uint32_t max_nr_ports) return vdev; } + +void virtio_serial_exit(VirtIODevice *vdev) +{ + VirtIOSerial *vser = DO_UPCAST(VirtIOSerial, vdev, vdev); + + unregister_savevm(vser->qdev, "virtio-console", vser); + + qemu_free(vser->ivqs); + qemu_free(vser->ovqs); + qemu_free(vser->ports_map); + + virtio_cleanup(vdev); +} diff --git a/hw/virtio.h b/hw/virtio.h index 30e472aba7..5836ab61e7 100644 --- a/hw/virtio.h +++ b/hw/virtio.h @@ -195,6 +195,7 @@ VirtIODevice *virtio_9p_init(DeviceState *dev, V9fsConf *conf); void virtio_net_exit(VirtIODevice *vdev); void virtio_blk_exit(VirtIODevice *vdev); +void virtio_serial_exit(VirtIODevice *vdev); #define DEFINE_VIRTIO_COMMON_FEATURES(_state, _field) \ DEFINE_PROP_BIT("indirect_desc", _state, _field, \ diff --git a/hw/watchdog.c b/hw/watchdog.c index aebb08a0ee..e9dd56e229 100644 --- a/hw/watchdog.c +++ b/hw/watchdog.c @@ -66,7 +66,7 @@ int select_watchdog(const char *p) QLIST_FOREACH(model, &watchdog_list, entry) { if (strcasecmp(model->wdt_name, p) == 0) { /* add the device */ - opts = qemu_opts_create(&qemu_device_opts, NULL, 0); + opts = qemu_opts_create(qemu_find_opts("device"), NULL, 0); qemu_opt_set(opts, "driver", p); return 0; } @@ -1236,6 +1236,38 @@ int kvm_set_signal_mask(CPUState *env, const sigset_t *sigset) return r; } +int kvm_set_ioeventfd_mmio_long(int fd, uint32_t addr, uint32_t val, bool assign) +{ +#ifdef KVM_IOEVENTFD + int ret; + struct kvm_ioeventfd iofd; + + iofd.datamatch = val; + iofd.addr = addr; + iofd.len = 4; + iofd.flags = KVM_IOEVENTFD_FLAG_DATAMATCH; + iofd.fd = fd; + + if (!kvm_enabled()) { + return -ENOSYS; + } + + if (!assign) { + iofd.flags |= KVM_IOEVENTFD_FLAG_DEASSIGN; + } + + ret = kvm_vm_ioctl(kvm_state, KVM_IOEVENTFD, &iofd); + + if (ret < 0) { + return -errno; + } + + return 0; +#else + return -ENOSYS; +#endif +} + int kvm_set_ioeventfd_pio_word(int fd, uint16_t addr, uint16_t val, bool assign) { #ifdef KVM_IOEVENTFD diff --git a/kvm-stub.c b/kvm-stub.c index 3378bd3b21..d45f9fa1df 100644 --- a/kvm-stub.c +++ b/kvm-stub.c @@ -136,3 +136,8 @@ int kvm_set_ioeventfd_pio_word(int fd, uint16_t addr, uint16_t val, bool assign) { return -ENOSYS; } + +int kvm_set_ioeventfd_mmio_long(int fd, uint32_t adr, uint32_t val, bool assign) +{ + return -ENOSYS; +} @@ -175,6 +175,7 @@ static inline void cpu_synchronize_post_init(CPUState *env) } #endif +int kvm_set_ioeventfd_mmio_long(int fd, uint32_t adr, uint32_t val, bool assign); int kvm_set_ioeventfd_pio_word(int fd, uint16_t adr, uint16_t val, bool assign); #endif diff --git a/linux-user/syscall_defs.h b/linux-user/syscall_defs.h index 46cb05ea9f..6c57e2481a 100644 --- a/linux-user/syscall_defs.h +++ b/linux-user/syscall_defs.h @@ -1282,7 +1282,10 @@ struct target_stat { /* FIXME: Microblaze no-mmu user-space has a difference stat64 layout... */ struct __attribute__((__packed__)) target_stat64 { uint64_t st_dev; - uint64_t st_ino; +#define TARGET_STAT64_HAS_BROKEN_ST_INO 1 + uint32_t pad0; + uint32_t __st_ino; + uint32_t st_mode; uint32_t st_nlink; uint32_t st_uid; @@ -1296,13 +1299,12 @@ struct __attribute__((__packed__)) target_stat64 { int64_t st_blocks; /* Number 512-byte blocks allocated. */ int target_st_atime; - unsigned int target_st_atime_nsec; + unsigned int target_st_atime_nsec; int target_st_mtime; - unsigned int target_st_mtime_nsec; + unsigned int target_st_mtime_nsec; int target_st_ctime; - unsigned int target_st_ctime_nsec; - uint32_t __unused4; - uint32_t __unused5; + unsigned int target_st_ctime_nsec; + uint64_t st_ino; }; #elif defined(TARGET_M68K) diff --git a/migration.c b/migration.c index a160462dfa..468d51749f 100644 --- a/migration.c +++ b/migration.c @@ -316,8 +316,14 @@ ssize_t migrate_fd_put_buffer(void *opaque, const void *data, size_t size) if (ret == -1) ret = -(s->get_error(s)); - if (ret == -EAGAIN) + if (ret == -EAGAIN) { qemu_set_fd_handler2(s->fd, NULL, NULL, migrate_fd_put_notify, s); + } else if (ret < 0) { + if (s->mon) { + monitor_resume(s->mon); + } + s->state = MIG_STATE_ERROR; + } return ret; } @@ -669,17 +669,32 @@ help: static void do_info_version_print(Monitor *mon, const QObject *data) { QDict *qdict; + QDict *qemu; qdict = qobject_to_qdict(data); + qemu = qdict_get_qdict(qdict, "qemu"); - monitor_printf(mon, "%s%s\n", qdict_get_str(qdict, "qemu"), - qdict_get_str(qdict, "package")); + monitor_printf(mon, "%" PRId64 ".%" PRId64 ".%" PRId64 "%s\n", + qdict_get_int(qemu, "major"), + qdict_get_int(qemu, "minor"), + qdict_get_int(qemu, "micro"), + qdict_get_str(qdict, "package")); } static void do_info_version(Monitor *mon, QObject **ret_data) { - *ret_data = qobject_from_jsonf("{ 'qemu': %s, 'package': %s }", - QEMU_VERSION, QEMU_PKGVERSION); + const char *version = QEMU_VERSION; + int major = 0, minor = 0, micro = 0; + char *tmp; + + major = strtol(version, &tmp, 10); + tmp++; + minor = strtol(tmp, &tmp, 10); + tmp++; + micro = strtol(tmp, &tmp, 10); + + *ret_data = qobject_from_jsonf("{ 'qemu': { 'major': %d, 'minor': %d, \ + 'micro': %d }, 'package': %s }", major, minor, micro, QEMU_PKGVERSION); } static void do_info_name_print(Monitor *mon, const QObject *data) @@ -2274,8 +2289,9 @@ static void do_loadvm(Monitor *mon, const QDict *qdict) vm_stop(0); - if (load_vmstate(name) >= 0 && saved_vm_running) + if (load_vmstate(name) == 0 && saved_vm_running) { vm_start(); + } } int monitor_get_fd(Monitor *mon, const char *fdname) @@ -1168,7 +1168,7 @@ void net_host_device_add(Monitor *mon, const QDict *qdict) return; } - opts = qemu_opts_parse(&qemu_net_opts, opts_str ? opts_str : "", 0); + opts = qemu_opts_parse(qemu_find_opts("net"), opts_str ? opts_str : "", 0); if (!opts) { return; } @@ -1202,7 +1202,7 @@ int do_netdev_add(Monitor *mon, const QDict *qdict, QObject **ret_data) QemuOpts *opts; int res; - opts = qemu_opts_from_qdict(&qemu_netdev_opts, qdict); + opts = qemu_opts_from_qdict(qemu_find_opts("netdev"), qdict); if (!opts) { return -1; } @@ -1226,7 +1226,7 @@ int do_netdev_del(Monitor *mon, const QDict *qdict, QObject **ret_data) return -1; } qemu_del_vlan_client(vc); - qemu_opts_del(qemu_opts_find(&qemu_netdev_opts, id)); + qemu_opts_del(qemu_opts_find(qemu_find_opts("netdev"), id)); return 0; } @@ -1349,21 +1349,23 @@ static int net_init_netdev(QemuOpts *opts, void *dummy) int net_init_clients(void) { + QemuOptsList *net = qemu_find_opts("net"); + if (default_net) { /* if no clients, we use a default config */ - qemu_opts_set(&qemu_net_opts, NULL, "type", "nic"); + qemu_opts_set(net, NULL, "type", "nic"); #ifdef CONFIG_SLIRP - qemu_opts_set(&qemu_net_opts, NULL, "type", "user"); + qemu_opts_set(net, NULL, "type", "user"); #endif } QTAILQ_INIT(&vlans); QTAILQ_INIT(&non_vlan_clients); - if (qemu_opts_foreach(&qemu_netdev_opts, net_init_netdev, NULL, 1) == -1) + if (qemu_opts_foreach(qemu_find_opts("netdev"), net_init_netdev, NULL, 1) == -1) return -1; - if (qemu_opts_foreach(&qemu_net_opts, net_init_client, NULL, 1) == -1) { + if (qemu_opts_foreach(net, net_init_client, NULL, 1) == -1) { return -1; } diff --git a/os-posix.c b/os-posix.c index 00133a0c76..6321e990c5 100644 --- a/os-posix.c +++ b/os-posix.c @@ -110,7 +110,7 @@ char *os_find_datadir(const char *argv0) size_t len = sizeof(buf) - 1; *buf = '\0'; - if (!sysctl(mib, sizeof(mib)/sizeof(*mib), buf, &len, NULL, 0) && + if (!sysctl(mib, ARRAY_SIZE(mib), buf, &len, NULL, 0) && *buf) { buf[sizeof(buf) - 1] = '\0'; p = buf; diff --git a/os-win32.c b/os-win32.c index d98fd77c12..dd46bf459d 100644 --- a/os-win32.c +++ b/os-win32.c @@ -34,6 +34,21 @@ #include "qemu-options.h" /***********************************************************/ +/* Functions missing in mingw */ + +int setenv(const char *name, const char *value, int overwrite) +{ + int result = 0; + if (overwrite || !getenv(name)) { + size_t length = strlen(name) + strlen(value) + 2; + char *string = qemu_malloc(length); + snprintf(string, length, "%s=%s", name, value); + result = putenv(string); + } + return result; +} + +/***********************************************************/ /* Polling handling */ typedef struct PollingEntry { @@ -95,6 +95,8 @@ int qemu_create_pidfile(const char *filename); #ifdef _WIN32 int ffs(int i); +int setenv(const char *name, const char *value, int overwrite); + typedef struct { long tv_sec; long tv_usec; diff --git a/qemu-char.c b/qemu-char.c index 9b69d928ef..6d2dce7a9b 100644 --- a/qemu-char.c +++ b/qemu-char.c @@ -2087,6 +2087,13 @@ static void tcp_chr_read(void *opaque) } } +#ifndef _WIN32 +CharDriverState *qemu_chr_open_eventfd(int eventfd) +{ + return qemu_chr_open_fd(eventfd, eventfd); +} +#endif + static void tcp_chr_connect(void *opaque) { CharDriverState *chr = opaque; @@ -2279,7 +2286,7 @@ QemuOpts *qemu_chr_parse_compat(const char *label, const char *filename) const char *p; QemuOpts *opts; - opts = qemu_opts_create(&qemu_chardev_opts, label, 1); + opts = qemu_opts_create(qemu_find_opts("chardev"), label, 1); if (NULL == opts) return NULL; diff --git a/qemu-char.h b/qemu-char.h index e3a07838a4..6ea01ba179 100644 --- a/qemu-char.h +++ b/qemu-char.h @@ -94,6 +94,9 @@ void qemu_chr_info_print(Monitor *mon, const QObject *ret_data); void qemu_chr_info(Monitor *mon, QObject **ret_data); CharDriverState *qemu_chr_find(const char *name); +/* add an eventfd to the qemu devices that are polled */ +CharDriverState *qemu_chr_open_eventfd(int eventfd); + extern int term_escape_char; /* async I/O support */ diff --git a/qemu-config.c b/qemu-config.c index 95abe61fab..3abe6555c2 100644 --- a/qemu-config.c +++ b/qemu-config.c @@ -5,7 +5,7 @@ #include "sysemu.h" #include "hw/qdev.h" -QemuOptsList qemu_drive_opts = { +static QemuOptsList qemu_drive_opts = { .name = "drive", .head = QTAILQ_HEAD_INITIALIZER(qemu_drive_opts.head), .desc = { @@ -84,7 +84,7 @@ QemuOptsList qemu_drive_opts = { }, }; -QemuOptsList qemu_chardev_opts = { +static QemuOptsList qemu_chardev_opts = { .name = "chardev", .implied_opt_name = "backend", .head = QTAILQ_HEAD_INITIALIZER(qemu_chardev_opts.head), @@ -151,7 +151,6 @@ QemuOptsList qemu_chardev_opts = { }, }; -#ifdef CONFIG_LINUX QemuOptsList qemu_fsdev_opts = { .name = "fsdev", .implied_opt_name = "fstype", @@ -170,9 +169,7 @@ QemuOptsList qemu_fsdev_opts = { { /*End of list */ } }, }; -#endif -#ifdef CONFIG_LINUX QemuOptsList qemu_virtfs_opts = { .name = "virtfs", .implied_opt_name = "fstype", @@ -195,9 +192,8 @@ QemuOptsList qemu_virtfs_opts = { { /*End of list */ } }, }; -#endif -QemuOptsList qemu_device_opts = { +static QemuOptsList qemu_device_opts = { .name = "device", .implied_opt_name = "driver", .head = QTAILQ_HEAD_INITIALIZER(qemu_device_opts.head), @@ -211,7 +207,7 @@ QemuOptsList qemu_device_opts = { }, }; -QemuOptsList qemu_netdev_opts = { +static QemuOptsList qemu_netdev_opts = { .name = "netdev", .implied_opt_name = "type", .head = QTAILQ_HEAD_INITIALIZER(qemu_netdev_opts.head), @@ -224,7 +220,7 @@ QemuOptsList qemu_netdev_opts = { }, }; -QemuOptsList qemu_net_opts = { +static QemuOptsList qemu_net_opts = { .name = "net", .implied_opt_name = "type", .head = QTAILQ_HEAD_INITIALIZER(qemu_net_opts.head), @@ -237,7 +233,7 @@ QemuOptsList qemu_net_opts = { }, }; -QemuOptsList qemu_rtc_opts = { +static QemuOptsList qemu_rtc_opts = { .name = "rtc", .head = QTAILQ_HEAD_INITIALIZER(qemu_rtc_opts.head), .desc = { @@ -247,17 +243,15 @@ QemuOptsList qemu_rtc_opts = { },{ .name = "clock", .type = QEMU_OPT_STRING, -#ifdef TARGET_I386 },{ .name = "driftfix", .type = QEMU_OPT_STRING, -#endif }, { /* end if list */ } }, }; -QemuOptsList qemu_global_opts = { +static QemuOptsList qemu_global_opts = { .name = "global", .head = QTAILQ_HEAD_INITIALIZER(qemu_global_opts.head), .desc = { @@ -275,7 +269,7 @@ QemuOptsList qemu_global_opts = { }, }; -QemuOptsList qemu_mon_opts = { +static QemuOptsList qemu_mon_opts = { .name = "mon", .implied_opt_name = "chardev", .head = QTAILQ_HEAD_INITIALIZER(qemu_mon_opts.head), @@ -294,7 +288,7 @@ QemuOptsList qemu_mon_opts = { }, }; -QemuOptsList qemu_cpudef_opts = { +static QemuOptsList qemu_cpudef_opts = { .name = "cpudef", .head = QTAILQ_HEAD_INITIALIZER(qemu_cpudef_opts.head), .desc = { @@ -342,7 +336,7 @@ QemuOptsList qemu_cpudef_opts = { }, }; -static QemuOptsList *vm_config_groups[] = { +static QemuOptsList *vm_config_groups[32] = { &qemu_drive_opts, &qemu_chardev_opts, &qemu_device_opts, @@ -374,6 +368,22 @@ QemuOptsList *qemu_find_opts(const char *group) return find_list(vm_config_groups, group); } +void qemu_add_opts(QemuOptsList *list) +{ + int entries, i; + + entries = ARRAY_SIZE(vm_config_groups); + entries--; /* keep list NULL terminated */ + for (i = 0; i < entries; i++) { + if (vm_config_groups[i] == NULL) { + vm_config_groups[i] = list; + return; + } + } + fprintf(stderr, "ran out of space in vm_config_groups"); + abort(); +} + int qemu_set_option(const char *str) { char group[64], id[64], arg[64]; diff --git a/qemu-config.h b/qemu-config.h index dca69d454b..533a049276 100644 --- a/qemu-config.h +++ b/qemu-config.h @@ -1,21 +1,11 @@ #ifndef QEMU_CONFIG_H #define QEMU_CONFIG_H -extern QemuOptsList qemu_drive_opts; -extern QemuOptsList qemu_chardev_opts; -#ifdef CONFIG_LINUX extern QemuOptsList qemu_fsdev_opts; extern QemuOptsList qemu_virtfs_opts; -#endif -extern QemuOptsList qemu_device_opts; -extern QemuOptsList qemu_netdev_opts; -extern QemuOptsList qemu_net_opts; -extern QemuOptsList qemu_rtc_opts; -extern QemuOptsList qemu_global_opts; -extern QemuOptsList qemu_mon_opts; -extern QemuOptsList qemu_cpudef_opts; QemuOptsList *qemu_find_opts(const char *group); +void qemu_add_opts(QemuOptsList *list); int qemu_set_option(const char *str); int qemu_global_option(const char *str); void qemu_add_globals(void); diff --git a/qemu-doc.texi b/qemu-doc.texi index e67bf44ff3..55a966fe71 100644 --- a/qemu-doc.texi +++ b/qemu-doc.texi @@ -706,6 +706,49 @@ Using the @option{-net socket} option, it is possible to make VLANs that span several QEMU instances. See @ref{sec_invocation} to have a basic example. +@section Other Devices + +@subsection Inter-VM Shared Memory device + +With KVM enabled on a Linux host, a shared memory device is available. Guests +map a POSIX shared memory region into the guest as a PCI device that enables +zero-copy communication to the application level of the guests. The basic +syntax is: + +@example +qemu -device ivshmem,size=<size in format accepted by -m>[,shm=<shm name>] +@end example + +If desired, interrupts can be sent between guest VMs accessing the same shared +memory region. Interrupt support requires using a shared memory server and +using a chardev socket to connect to it. The code for the shared memory server +is qemu.git/contrib/ivshmem-server. An example syntax when using the shared +memory server is: + +@example +qemu -device ivshmem,size=<size in format accepted by -m>[,chardev=<id>] + [,msi=on][,ioeventfd=on][,vectors=n][,role=peer|master] +qemu -chardev socket,path=<path>,id=<id> +@end example + +When using the server, the guest will be assigned a VM ID (>=0) that allows guests +using the same server to communicate via interrupts. Guests can read their +VM ID from a device register (see example code). Since receiving the shared +memory region from the server is asynchronous, there is a (small) chance the +guest may boot before the shared memory is attached. To allow an application +to ensure shared memory is attached, the VM ID register will return -1 (an +invalid VM ID) until the memory is attached. Once the shared memory is +attached, the VM ID will return the guest's valid VM ID. With these semantics, +the guest application can check to ensure the shared memory is attached to the +guest before proceeding. + +The @option{role} argument can be set to either master or peer and will affect +how the shared memory is migrated. With @option{role=master}, the guest will +copy the shared memory on migration to the destination host. With +@option{role=peer}, the guest will not be able to migrate with the device attached. +With the @option{peer} case, the device should be detached and then reattached +after migration using the PCI hotplug support. + @node direct_linux_boot @section Direct Linux Boot diff --git a/qemu-monitor.hx b/qemu-monitor.hx index 2af3de6c22..5c1da3398e 100644 --- a/qemu-monitor.hx +++ b/qemu-monitor.hx @@ -35,7 +35,29 @@ information on the Server command and response formats. NOTE: This document is temporary and will be replaced soon. -1. Regular Commands +1. Stability Considerations +=========================== + +The current QMP command set (described in this file) may be useful for a +number of use cases, however it's limited and several commands have bad +defined semantics, specially with regard to command completion. + +These problems are going to be solved incrementally in the next QEMU releases +and we're going to establish a deprecation policy for badly defined commands. + +If you're planning to adopt QMP, please observe the following: + + 1. The deprecation policy will take efect and be documented soon, please + check the documentation of each used command as soon as a new release of + QEMU is available + + 2. DO NOT rely on anything which is not explicit documented + + 3. Errors, in special, are not documented. Applications should NOT check + for specific errors classes or data (it's strongly recommended to only + check for the "error" key) + +2. Regular Commands =================== Server's responses in the examples below are always a success response, please @@ -1592,7 +1614,7 @@ HXCOMM This is required for the QMP documentation layout. SQMP -2. Query Commands +3. Query Commands ================= EQMP @@ -1623,13 +1645,25 @@ Show QEMU version. Return a json-object with the following information: -- "qemu": QEMU's version (json-string) +- "qemu": A json-object containing three integer values: + - "major": QEMU's major version (json-int) + - "minor": QEMU's minor version (json-int) + - "micro": QEMU's micro version (json-int) - "package": package's version (json-string) Example: -> { "execute": "query-version" } -<- { "return": { "qemu": "0.11.50", "package": "" } } +<- { + "return":{ + "qemu":{ + "major":0, + "minor":11, + "micro":5 + }, + "package":"" + } + } EQMP diff --git a/qemu-options.hx b/qemu-options.hx index db86feb09b..453f129949 100644 --- a/qemu-options.hx +++ b/qemu-options.hx @@ -482,7 +482,6 @@ possible drivers and properties, use @code{-device ?} and @code{-device @var{driver},?}. ETEXI -#ifdef CONFIG_LINUX DEFHEADING(File system options:) DEF("fsdev", HAS_ARG, QEMU_OPTION_fsdev, @@ -515,9 +514,7 @@ Create a file-system-"device" for local-filesystem. @end table ETEXI -#endif -#ifdef CONFIG_LINUX DEFHEADING(Virtual File system pass-through options:) DEF("virtfs", HAS_ARG, QEMU_OPTION_virtfs, @@ -554,7 +551,6 @@ Create a Virtual file-system-pass through for local-filesystem. @end table ETEXI -#endif DEFHEADING() @@ -1018,6 +1018,7 @@ typedef struct SaveStateEntry { const VMStateDescription *vmsd; void *opaque; CompatEntry *compat; + int no_migrate; } SaveStateEntry; @@ -1081,6 +1082,7 @@ int register_savevm_live(DeviceState *dev, se->load_state = load_state; se->opaque = opaque; se->vmsd = NULL; + se->no_migrate = 0; if (dev && dev->parent_bus && dev->parent_bus->info->get_dev_path) { char *id = dev->parent_bus->info->get_dev_path(dev); @@ -1147,6 +1149,31 @@ void unregister_savevm(DeviceState *dev, const char *idstr, void *opaque) } } +/* mark a device as not to be migrated, that is the device should be + unplugged before migration */ +void register_device_unmigratable(DeviceState *dev, const char *idstr, + void *opaque) +{ + SaveStateEntry *se; + char id[256] = ""; + + if (dev && dev->parent_bus && dev->parent_bus->info->get_dev_path) { + char *path = dev->parent_bus->info->get_dev_path(dev); + if (path) { + pstrcpy(id, sizeof(id), path); + pstrcat(id, sizeof(id), "/"); + qemu_free(path); + } + } + pstrcat(id, sizeof(id), idstr); + + QTAILQ_FOREACH(se, &savevm_handlers, entry) { + if (strcmp(se->idstr, id) == 0 && se->opaque == opaque) { + se->no_migrate = 1; + } + } +} + int vmstate_register_with_alias_id(DeviceState *dev, int instance_id, const VMStateDescription *vmsd, void *opaque, int alias_id, @@ -1353,13 +1380,19 @@ static int vmstate_load(QEMUFile *f, SaveStateEntry *se, int version_id) return vmstate_load_state(f, se->vmsd, se->opaque, version_id); } -static void vmstate_save(QEMUFile *f, SaveStateEntry *se) +static int vmstate_save(QEMUFile *f, SaveStateEntry *se) { + if (se->no_migrate) { + return -1; + } + if (!se->vmsd) { /* Old style */ se->save_state(f, se->opaque); - return; + return 0; } vmstate_save_state(f,se->vmsd, se->opaque); + + return 0; } #define QEMU_VM_FILE_MAGIC 0x5145564d @@ -1454,6 +1487,7 @@ int qemu_savevm_state_iterate(Monitor *mon, QEMUFile *f) int qemu_savevm_state_complete(Monitor *mon, QEMUFile *f) { SaveStateEntry *se; + int r; cpu_synchronize_all_states(); @@ -1486,7 +1520,11 @@ int qemu_savevm_state_complete(Monitor *mon, QEMUFile *f) qemu_put_be32(f, se->instance_id); qemu_put_be32(f, se->version_id); - vmstate_save(f, se); + r = vmstate_save(f, se); + if (r < 0) { + monitor_printf(mon, "cannot migrate with device '%s'\n", se->idstr); + return r; + } } qemu_put_byte(f, QEMU_VM_EOF); @@ -1894,12 +1932,27 @@ void do_savevm(Monitor *mon, const QDict *qdict) int load_vmstate(const char *name) { - BlockDriverState *bs, *bs1; + BlockDriverState *bs, *bs_vm_state; QEMUSnapshotInfo sn; QEMUFile *f; int ret; - /* Verify if there is a device that doesn't support snapshots and is writable */ + bs_vm_state = bdrv_snapshots(); + if (!bs_vm_state) { + error_report("No block device supports snapshots"); + return -ENOTSUP; + } + + /* Don't even try to load empty VM states */ + ret = bdrv_snapshot_find(bs_vm_state, &sn, name); + if (ret < 0) { + return ret; + } else if (sn.vm_state_size == 0) { + return -EINVAL; + } + + /* Verify if there is any device that doesn't support snapshots and is + writable and check if the requested snapshot is available too. */ bs = NULL; while ((bs = bdrv_next(bs))) { @@ -1912,63 +1965,45 @@ int load_vmstate(const char *name) bdrv_get_device_name(bs)); return -ENOTSUP; } - } - bs = bdrv_snapshots(); - if (!bs) { - error_report("No block device supports snapshots"); - return -EINVAL; + ret = bdrv_snapshot_find(bs, &sn, name); + if (ret < 0) { + error_report("Device '%s' does not have the requested snapshot '%s'", + bdrv_get_device_name(bs), name); + return ret; + } } /* Flush all IO requests so they don't interfere with the new state. */ qemu_aio_flush(); - bs1 = NULL; - while ((bs1 = bdrv_next(bs1))) { - if (bdrv_can_snapshot(bs1)) { - ret = bdrv_snapshot_goto(bs1, name); + bs = NULL; + while ((bs = bdrv_next(bs))) { + if (bdrv_can_snapshot(bs)) { + ret = bdrv_snapshot_goto(bs, name); if (ret < 0) { - switch(ret) { - case -ENOTSUP: - error_report("%sSnapshots not supported on device '%s'", - bs != bs1 ? "Warning: " : "", - bdrv_get_device_name(bs1)); - break; - case -ENOENT: - error_report("%sCould not find snapshot '%s' on device '%s'", - bs != bs1 ? "Warning: " : "", - name, bdrv_get_device_name(bs1)); - break; - default: - error_report("%sError %d while activating snapshot on '%s'", - bs != bs1 ? "Warning: " : "", - ret, bdrv_get_device_name(bs1)); - break; - } - /* fatal on snapshot block device */ - if (bs == bs1) - return 0; + error_report("Error %d while activating snapshot '%s' on '%s'", + ret, name, bdrv_get_device_name(bs)); + return ret; } } } - /* Don't even try to load empty VM states */ - ret = bdrv_snapshot_find(bs, &sn, name); - if ((ret >= 0) && (sn.vm_state_size == 0)) - return -EINVAL; - /* restore the VM state */ - f = qemu_fopen_bdrv(bs, 0); + f = qemu_fopen_bdrv(bs_vm_state, 0); if (!f) { error_report("Could not open VM state file"); return -EINVAL; } + ret = qemu_loadvm_state(f); + qemu_fclose(f); if (ret < 0) { error_report("Error %d while loading VM state", ret); return ret; } + return 0; } diff --git a/target-i386/cpuid.c b/target-i386/cpuid.c index dcfd81b7ac..04ba8d5152 100644 --- a/target-i386/cpuid.c +++ b/target-i386/cpuid.c @@ -543,7 +543,7 @@ static int check_features_against_host(x86_def_t *guest_def) ~CPUID_EXT3_SVM, ext3_feature_name, 0x80000001}}; cpu_x86_fill_host(&host_def); - for (rv = 0, i = 0; i < sizeof (ft) / sizeof (ft[0]); ++i) + for (rv = 0, i = 0; i < ARRAY_SIZE(ft); ++i) for (mask = 1; mask; mask <<= 1) if (ft[i].check_feat & mask && *ft[i].guest_feat & mask && !(*ft[i].host_feat & mask)) { @@ -965,7 +965,7 @@ void x86_cpudef_setup(void) x86_defs = &builtin_x86_defs[i]; } #if !defined(CONFIG_USER_ONLY) - qemu_opts_foreach(&qemu_cpudef_opts, cpudef_register, NULL, 0); + qemu_opts_foreach(qemu_find_opts("cpudef"), cpudef_register, NULL, 0); #endif } diff --git a/target-i386/op_helper.c b/target-i386/op_helper.c index c50e818e72..ec6b3e9384 100644 --- a/target-i386/op_helper.c +++ b/target-i386/op_helper.c @@ -5237,7 +5237,7 @@ void helper_svm_check_intercept_param(uint32_t type, uint64_t param) switch((uint32_t)ECX) { case 0 ... 0x1fff: t0 = (ECX * 2) % 8; - t1 = ECX / 8; + t1 = (ECX * 2) / 8; break; case 0xc0000000 ... 0xc0001fff: t0 = (8192 + ECX - 0xc0000000) * 2; diff --git a/target-mips/cpu.h b/target-mips/cpu.h index b8e6feefc2..19511d7f02 100644 --- a/target-mips/cpu.h +++ b/target-mips/cpu.h @@ -525,6 +525,29 @@ static inline void cpu_clone_regs(CPUState *env, target_ulong newsp) env->active_tc.gpr[2] = 0; } +static inline int cpu_mips_hw_interrupts_pending(CPUState *env) +{ + int32_t pending; + int32_t status; + int r; + + pending = env->CP0_Cause & CP0Ca_IP_mask; + status = env->CP0_Status & CP0Ca_IP_mask; + + if (env->CP0_Config3 & (1 << CP0C3_VEIC)) { + /* A MIPS configured with a vectorizing external interrupt controller + will feed a vector into the Cause pending lines. The core treats + the status lines as a vector level, not as indiviual masks. */ + r = pending > status; + } else { + /* A MIPS configured with compatibility or VInt (Vectored Interrupts) + treats the pending lines as individual interrupt lines, the status + lines are individual masks. */ + r = pending & status; + } + return r; +} + #include "cpu-all.h" /* Memory access type : diff --git a/target-mips/helper.c b/target-mips/helper.c index de2ed7d2c7..bdc1e53669 100644 --- a/target-mips/helper.c +++ b/target-mips/helper.c @@ -478,6 +478,33 @@ void do_interrupt (CPUState *env) cause = 0; if (env->CP0_Cause & (1 << CP0Ca_IV)) offset = 0x200; + + if (env->CP0_Config3 & ((1 << CP0C3_VInt) | (1 << CP0C3_VEIC))) { + /* Vectored Interrupts. */ + unsigned int spacing; + unsigned int vector; + unsigned int pending = (env->CP0_Cause & CP0Ca_IP_mask) >> 8; + + /* Compute the Vector Spacing. */ + spacing = (env->CP0_IntCtl >> CP0IntCtl_VS) & ((1 << 6) - 1); + spacing <<= 5; + + if (env->CP0_Config3 & (1 << CP0C3_VInt)) { + /* For VInt mode, the MIPS computes the vector internally. */ + for (vector = 0; vector < 8; vector++) { + if (pending & 1) { + /* Found it. */ + break; + } + pending >>= 1; + } + } else { + /* For VEIC mode, the external interrupt controller feeds the + vector throught the CP0Cause IP lines. */ + vector = pending; + } + offset = 0x200 + vector * spacing; + } goto set_EPC; case EXCP_LTLBL: cause = 1; diff --git a/tcg/ppc/tcg-target.h b/tcg/ppc/tcg-target.h index 53024282da..a1f85997c6 100644 --- a/tcg/ppc/tcg-target.h +++ b/tcg/ppc/tcg-target.h @@ -65,7 +65,7 @@ enum { /* used for function call generation */ #define TCG_REG_CALL_STACK TCG_REG_R1 #define TCG_TARGET_STACK_ALIGN 16 -#if defined _CALL_DARWIN +#if defined _CALL_DARWIN || defined __APPLE__ #define TCG_TARGET_CALL_STACK_OFFSET 24 #elif defined _CALL_AIX #define TCG_TARGET_CALL_STACK_OFFSET 52 diff --git a/tcg/ppc64/tcg-target.c b/tcg/ppc64/tcg-target.c index 5ba5d053b7..ebbee343fd 100644 --- a/tcg/ppc64/tcg-target.c +++ b/tcg/ppc64/tcg-target.c @@ -746,7 +746,7 @@ static void tcg_out_qemu_ld (TCGContext *s, const TCGArg *args, int opc) else tcg_out32 (s, LDX | TAB (data_reg, rbase, r0)); #else if (bswap) { - tcg_out_movi32 (s, TCG_TYPE_I64, 0, 4); + tcg_out_movi32 (s, 0, 4); tcg_out32 (s, LWBRX | RT (data_reg) | RB (r0)); tcg_out32 (s, LWBRX | RT ( r1) | RA (r0)); tcg_out_rld (s, RLDIMI, data_reg, r1, 32, 0); @@ -21,6 +21,10 @@ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN * THE SOFTWARE. */ + +/* Avoid compiler warning because macro is redefined in SDL_syswm.h. */ +#undef WIN32_LEAN_AND_MEAN + #include <SDL.h> #include <SDL_syswm.h> @@ -1184,7 +1184,7 @@ void vnc_client_write(void *opaque) vnc_lock_output(vs); if (vs->output.offset) { vnc_client_write_locked(opaque); - } else { + } else if (vs->csock != -1) { qemu_set_fd_handler2(vs->csock, NULL, vnc_client_read, NULL, vs); } vnc_unlock_output(vs); @@ -1461,12 +1461,12 @@ static int balloon_parse(const char *arg) if (!strncmp(arg, "virtio", 6)) { if (arg[6] == ',') { /* have params -> parse them */ - opts = qemu_opts_parse(&qemu_device_opts, arg+7, 0); + opts = qemu_opts_parse(qemu_find_opts("device"), arg+7, 0); if (!opts) return -1; } else { /* create empty opts */ - opts = qemu_opts_create(&qemu_device_opts, NULL, 0); + opts = qemu_opts_create(qemu_find_opts("device"), NULL, 0); } qemu_opt_set(opts, "driver", "virtio-balloon-pci"); return 0; @@ -1598,7 +1598,7 @@ static void monitor_parse(const char *optarg, const char *mode) } } - opts = qemu_opts_create(&qemu_mon_opts, label, 1); + opts = qemu_opts_create(qemu_find_opts("mon"), label, 1); if (!opts) { fprintf(stderr, "duplicate chardev: %s\n", label); exit(1); @@ -1695,6 +1695,7 @@ static int parallel_parse(const char *devname) static int virtcon_parse(const char *devname) { + QemuOptsList *device = qemu_find_opts("device"); static int index = 0; char label[32]; QemuOpts *bus_opts, *dev_opts; @@ -1706,10 +1707,10 @@ static int virtcon_parse(const char *devname) exit(1); } - bus_opts = qemu_opts_create(&qemu_device_opts, NULL, 0); + bus_opts = qemu_opts_create(device, NULL, 0); qemu_opt_set(bus_opts, "driver", "virtio-serial"); - dev_opts = qemu_opts_create(&qemu_device_opts, NULL, 0); + dev_opts = qemu_opts_create(device, NULL, 0); qemu_opt_set(dev_opts, "driver", "virtconsole"); snprintf(label, sizeof(label), "virtcon%d", index); @@ -1732,7 +1733,7 @@ static int debugcon_parse(const char *devname) if (!qemu_chr_open("debugcon", devname, NULL)) { exit(1); } - opts = qemu_opts_create(&qemu_device_opts, "debugcon", 1); + opts = qemu_opts_create(qemu_find_opts("device"), "debugcon", 1); if (!opts) { fprintf(stderr, "qemu: already have a debugcon device\n"); exit(1); @@ -1810,6 +1811,7 @@ int main(int argc, char **argv, char **envp) DisplayChangeListener *dcl; int cyls, heads, secs, translation; QemuOpts *hda_opts = NULL, *opts; + QemuOptsList *olist; int optind; const char *optarg; const char *loadvm = NULL; @@ -1853,6 +1855,11 @@ int main(int argc, char **argv, char **envp) tb_size = 0; autostart= 1; +#ifdef CONFIG_VIRTFS + qemu_add_opts(&qemu_fsdev_opts); + qemu_add_opts(&qemu_virtfs_opts); +#endif + /* first pass of option parsing */ optind = 1; while (optind < argc) { @@ -2104,12 +2111,12 @@ int main(int argc, char **argv, char **envp) fd_bootchk = 0; break; case QEMU_OPTION_netdev: - if (net_client_parse(&qemu_netdev_opts, optarg) == -1) { + if (net_client_parse(qemu_find_opts("netdev"), optarg) == -1) { exit(1); } break; case QEMU_OPTION_net: - if (net_client_parse(&qemu_net_opts, optarg) == -1) { + if (net_client_parse(qemu_find_opts("net"), optarg) == -1) { exit(1); } break; @@ -2268,21 +2275,25 @@ int main(int argc, char **argv, char **envp) default_monitor = 0; break; case QEMU_OPTION_mon: - opts = qemu_opts_parse(&qemu_mon_opts, optarg, 1); + opts = qemu_opts_parse(qemu_find_opts("mon"), optarg, 1); if (!opts) { exit(1); } default_monitor = 0; break; case QEMU_OPTION_chardev: - opts = qemu_opts_parse(&qemu_chardev_opts, optarg, 1); + opts = qemu_opts_parse(qemu_find_opts("chardev"), optarg, 1); if (!opts) { exit(1); } break; -#ifdef CONFIG_VIRTFS case QEMU_OPTION_fsdev: - opts = qemu_opts_parse(&qemu_fsdev_opts, optarg, 1); + olist = qemu_find_opts("fsdev"); + if (!olist) { + fprintf(stderr, "fsdev is not supported by this qemu build.\n"); + exit(1); + } + opts = qemu_opts_parse(olist, optarg, 1); if (!opts) { fprintf(stderr, "parse error: %s\n", optarg); exit(1); @@ -2293,7 +2304,12 @@ int main(int argc, char **argv, char **envp) char *arg_9p = NULL; int len = 0; - opts = qemu_opts_parse(&qemu_virtfs_opts, optarg, 1); + olist = qemu_find_opts("virtfs"); + if (!olist) { + fprintf(stderr, "virtfs is not supported by this qemu build.\n"); + exit(1); + } + opts = qemu_opts_parse(olist, optarg, 1); if (!opts) { fprintf(stderr, "parse error: %s\n", optarg); exit(1); @@ -2316,12 +2332,6 @@ int main(int argc, char **argv, char **envp) len += strlen(qemu_opt_get(opts, "security_model")); arg_fsdev = qemu_malloc((len + 1) * sizeof(*arg_fsdev)); - if (!arg_fsdev) { - fprintf(stderr, "No memory to parse -fsdev for %s\n", - optarg); - exit(1); - } - sprintf(arg_fsdev, "%s,id=%s,path=%s,security_model=%s", qemu_opt_get(opts, "fstype"), qemu_opt_get(opts, "mount_tag"), @@ -2332,22 +2342,16 @@ int main(int argc, char **argv, char **envp) len += 2*strlen(qemu_opt_get(opts, "mount_tag")); arg_9p = qemu_malloc((len + 1) * sizeof(*arg_9p)); - if (!arg_9p) { - fprintf(stderr, "No memory to parse -device for %s\n", - optarg); - exit(1); - } - sprintf(arg_9p, "virtio-9p-pci,fsdev=%s,mount_tag=%s", qemu_opt_get(opts, "mount_tag"), qemu_opt_get(opts, "mount_tag")); - if (!qemu_opts_parse(&qemu_fsdev_opts, arg_fsdev, 1)) { + if (!qemu_opts_parse(qemu_find_opts("fsdev"), arg_fsdev, 1)) { fprintf(stderr, "parse error [fsdev]: %s\n", optarg); exit(1); } - if (!qemu_opts_parse(&qemu_device_opts, arg_9p, 1)) { + if (!qemu_opts_parse(qemu_find_opts("device"), arg_9p, 1)) { fprintf(stderr, "parse error [device]: %s\n", optarg); exit(1); } @@ -2356,7 +2360,6 @@ int main(int argc, char **argv, char **envp) qemu_free(arg_9p); break; } -#endif case QEMU_OPTION_serial: add_device_config(DEV_SERIAL, optarg); default_serial = 0; @@ -2444,7 +2447,7 @@ int main(int argc, char **argv, char **envp) add_device_config(DEV_USB, optarg); break; case QEMU_OPTION_device: - if (!qemu_opts_parse(&qemu_device_opts, optarg, 1)) { + if (!qemu_opts_parse(qemu_find_opts("device"), optarg, 1)) { exit(1); } break; @@ -2540,7 +2543,7 @@ int main(int argc, char **argv, char **envp) configure_rtc_date_offset(optarg, 1); break; case QEMU_OPTION_rtc: - opts = qemu_opts_parse(&qemu_rtc_opts, optarg, 0); + opts = qemu_opts_parse(qemu_find_opts("rtc"), optarg, 0); if (!opts) { exit(1); } @@ -2648,8 +2651,8 @@ int main(int argc, char **argv, char **envp) exit(1); } - qemu_opts_foreach(&qemu_device_opts, default_driver_check, NULL, 0); - qemu_opts_foreach(&qemu_global_opts, default_driver_check, NULL, 0); + qemu_opts_foreach(qemu_find_opts("device"), default_driver_check, NULL, 0); + qemu_opts_foreach(qemu_find_opts("global"), default_driver_check, NULL, 0); if (machine->no_serial) { default_serial = 0; @@ -2703,10 +2706,10 @@ int main(int argc, char **argv, char **envp) socket_init(); - if (qemu_opts_foreach(&qemu_chardev_opts, chardev_init_func, NULL, 1) != 0) + if (qemu_opts_foreach(qemu_find_opts("chardev"), chardev_init_func, NULL, 1) != 0) exit(1); #ifdef CONFIG_VIRTFS - if (qemu_opts_foreach(&qemu_fsdev_opts, fsdev_init_func, NULL, 1) != 0) { + if (qemu_opts_foreach(qemu_find_opts("fsdev"), fsdev_init_func, NULL, 1) != 0) { exit(1); } #endif @@ -2790,8 +2793,8 @@ int main(int argc, char **argv, char **envp) /* open the virtual block devices */ if (snapshot) - qemu_opts_foreach(&qemu_drive_opts, drive_enable_snapshot, NULL, 0); - if (qemu_opts_foreach(&qemu_drive_opts, drive_init_func, &machine->use_scsi, 1) != 0) + qemu_opts_foreach(qemu_find_opts("drive"), drive_enable_snapshot, NULL, 0); + if (qemu_opts_foreach(qemu_find_opts("drive"), drive_init_func, &machine->use_scsi, 1) != 0) exit(1); register_savevm_live(NULL, "ram", 0, 4, NULL, ram_save_live, NULL, @@ -2839,7 +2842,7 @@ int main(int argc, char **argv, char **envp) } } - if (qemu_opts_foreach(&qemu_mon_opts, mon_init_func, NULL, 1) != 0) { + if (qemu_opts_foreach(qemu_find_opts("mon"), mon_init_func, NULL, 1) != 0) { exit(1); } @@ -2854,7 +2857,7 @@ int main(int argc, char **argv, char **envp) module_call_init(MODULE_INIT_DEVICE); - if (qemu_opts_foreach(&qemu_device_opts, device_help_func, NULL, 0) != 0) + if (qemu_opts_foreach(qemu_find_opts("device"), device_help_func, NULL, 0) != 0) exit(0); if (watchdog) { @@ -2887,7 +2890,7 @@ int main(int argc, char **argv, char **envp) } /* init generic devices */ - if (qemu_opts_foreach(&qemu_device_opts, device_init_func, NULL, 1) != 0) + if (qemu_opts_foreach(qemu_find_opts("device"), device_init_func, NULL, 1) != 0) exit(1); net_check_clients(); |