diff options
131 files changed, 5243 insertions, 2027 deletions
diff --git a/arch_init.c b/arch_init.c index 89c8fa46bb..5fc6fc382c 100644 --- a/arch_init.c +++ b/arch_init.c @@ -52,6 +52,7 @@ #include "exec/ram_addr.h" #include "hw/acpi/acpi.h" #include "qemu/host-utils.h" +#include "qemu/rcu_queue.h" #ifdef DEBUG_ARCH_INIT #define DPRINTF(fmt, ...) \ @@ -487,7 +488,6 @@ static void migration_bitmap_sync_range(ram_addr_t start, ram_addr_t length) } -/* Needs iothread lock! */ /* Fix me: there are too many global variables used in migration process. */ static int64_t start_time; static int64_t bytes_xfer_prev; @@ -500,6 +500,7 @@ static void migration_bitmap_sync_init(void) num_dirty_pages_period = 0; } +/* Called with iothread lock held, to protect ram_list.dirty_memory[] */ static void migration_bitmap_sync(void) { RAMBlock *block; @@ -523,9 +524,12 @@ static void migration_bitmap_sync(void) trace_migration_bitmap_sync_start(); address_space_sync_dirty_bitmap(&address_space_memory); - QTAILQ_FOREACH(block, &ram_list.blocks, next) { + rcu_read_lock(); + QLIST_FOREACH_RCU(block, &ram_list.blocks, next) { migration_bitmap_sync_range(block->mr->ram_addr, block->used_length); } + rcu_read_unlock(); + trace_migration_bitmap_sync_end(migration_dirty_pages - num_dirty_pages_init); num_dirty_pages_period += migration_dirty_pages - num_dirty_pages_init; @@ -648,6 +652,8 @@ static int ram_save_page(QEMUFile *f, RAMBlock* block, ram_addr_t offset, /* * ram_find_and_save_block: Finds a page to send and sends it to f * + * Called within an RCU critical section. + * * Returns: The number of bytes written. * 0 means no dirty pages */ @@ -661,7 +667,7 @@ static int ram_find_and_save_block(QEMUFile *f, bool last_stage) MemoryRegion *mr; if (!block) - block = QTAILQ_FIRST(&ram_list.blocks); + block = QLIST_FIRST_RCU(&ram_list.blocks); while (true) { mr = block->mr; @@ -672,9 +678,9 @@ static int ram_find_and_save_block(QEMUFile *f, bool last_stage) } if (offset >= block->used_length) { offset = 0; - block = QTAILQ_NEXT(block, next); + block = QLIST_NEXT_RCU(block, next); if (!block) { - block = QTAILQ_FIRST(&ram_list.blocks); + block = QLIST_FIRST_RCU(&ram_list.blocks); complete_round = true; ram_bulk_stage = false; } @@ -688,9 +694,9 @@ static int ram_find_and_save_block(QEMUFile *f, bool last_stage) } } } + last_seen_block = block; last_offset = offset; - return bytes_sent; } @@ -728,9 +734,10 @@ uint64_t ram_bytes_total(void) RAMBlock *block; uint64_t total = 0; - QTAILQ_FOREACH(block, &ram_list.blocks, next) + rcu_read_lock(); + QLIST_FOREACH_RCU(block, &ram_list.blocks, next) total += block->used_length; - + rcu_read_unlock(); return total; } @@ -776,6 +783,13 @@ static void reset_ram_globals(void) #define MAX_WAIT 50 /* ms, half buffered_file limit */ + +/* Each of ram_save_setup, ram_save_iterate and ram_save_complete has + * long-running RCU critical section. When rcu-reclaims in the code + * start to become numerous it will be necessary to reduce the + * granularity of these critical sections. + */ + static int ram_save_setup(QEMUFile *f, void *opaque) { RAMBlock *block; @@ -816,8 +830,10 @@ static int ram_save_setup(QEMUFile *f, void *opaque) acct_clear(); } + /* iothread lock needed for ram_list.dirty_memory[] */ qemu_mutex_lock_iothread(); qemu_mutex_lock_ramlist(); + rcu_read_lock(); bytes_transferred = 0; reset_ram_globals(); @@ -830,7 +846,7 @@ static int ram_save_setup(QEMUFile *f, void *opaque) * gaps due to alignment or unplugs. */ migration_dirty_pages = 0; - QTAILQ_FOREACH(block, &ram_list.blocks, next) { + QLIST_FOREACH_RCU(block, &ram_list.blocks, next) { uint64_t block_pages; block_pages = block->used_length >> TARGET_PAGE_BITS; @@ -839,17 +855,18 @@ static int ram_save_setup(QEMUFile *f, void *opaque) memory_global_dirty_log_start(); migration_bitmap_sync(); + qemu_mutex_unlock_ramlist(); qemu_mutex_unlock_iothread(); qemu_put_be64(f, ram_bytes_total() | RAM_SAVE_FLAG_MEM_SIZE); - QTAILQ_FOREACH(block, &ram_list.blocks, next) { + QLIST_FOREACH_RCU(block, &ram_list.blocks, next) { qemu_put_byte(f, strlen(block->idstr)); qemu_put_buffer(f, (uint8_t *)block->idstr, strlen(block->idstr)); qemu_put_be64(f, block->used_length); } - qemu_mutex_unlock_ramlist(); + rcu_read_unlock(); ram_control_before_iterate(f, RAM_CONTROL_SETUP); ram_control_after_iterate(f, RAM_CONTROL_SETUP); @@ -866,12 +883,14 @@ static int ram_save_iterate(QEMUFile *f, void *opaque) int64_t t0; int total_sent = 0; - qemu_mutex_lock_ramlist(); - + rcu_read_lock(); if (ram_list.version != last_version) { reset_ram_globals(); } + /* Read version before ram_list.blocks */ + smp_rmb(); + ram_control_before_iterate(f, RAM_CONTROL_ROUND); t0 = qemu_clock_get_ns(QEMU_CLOCK_REALTIME); @@ -902,8 +921,7 @@ static int ram_save_iterate(QEMUFile *f, void *opaque) } i++; } - - qemu_mutex_unlock_ramlist(); + rcu_read_unlock(); /* * Must occur before EOS (or any QEMUFile operation) @@ -928,9 +946,11 @@ static int ram_save_iterate(QEMUFile *f, void *opaque) return total_sent; } +/* Called with iothread lock */ static int ram_save_complete(QEMUFile *f, void *opaque) { - qemu_mutex_lock_ramlist(); + rcu_read_lock(); + migration_bitmap_sync(); ram_control_before_iterate(f, RAM_CONTROL_FINISH); @@ -952,7 +972,7 @@ static int ram_save_complete(QEMUFile *f, void *opaque) ram_control_after_iterate(f, RAM_CONTROL_FINISH); migration_end(); - qemu_mutex_unlock_ramlist(); + rcu_read_unlock(); qemu_put_be64(f, RAM_SAVE_FLAG_EOS); return 0; @@ -966,7 +986,9 @@ static uint64_t ram_save_pending(QEMUFile *f, void *opaque, uint64_t max_size) if (remaining_size < max_size) { qemu_mutex_lock_iothread(); + rcu_read_lock(); migration_bitmap_sync(); + rcu_read_unlock(); qemu_mutex_unlock_iothread(); remaining_size = ram_save_remaining() * TARGET_PAGE_SIZE; } @@ -1008,6 +1030,9 @@ static int load_xbzrle(QEMUFile *f, ram_addr_t addr, void *host) return 0; } +/* Must be called from within a rcu critical section. + * Returns a pointer from within the RCU-protected ram_list. + */ static inline void *host_from_stream_offset(QEMUFile *f, ram_addr_t offset, int flags) @@ -1029,7 +1054,7 @@ static inline void *host_from_stream_offset(QEMUFile *f, qemu_get_buffer(f, (uint8_t *)id, len); id[len] = 0; - QTAILQ_FOREACH(block, &ram_list.blocks, next) { + QLIST_FOREACH_RCU(block, &ram_list.blocks, next) { if (!strncmp(id, block->idstr, sizeof(id)) && block->max_length > offset) { return memory_region_get_ram_ptr(block->mr) + offset; @@ -1062,6 +1087,12 @@ static int ram_load(QEMUFile *f, void *opaque, int version_id) ret = -EINVAL; } + /* This RCU critical section can be very long running. + * When RCU reclaims in the code start to become numerous, + * it will be necessary to reduce the granularity of this + * critical section. + */ + rcu_read_lock(); while (!ret && !(flags & RAM_SAVE_FLAG_EOS)) { ram_addr_t addr, total_ram_bytes; void *host; @@ -1086,7 +1117,7 @@ static int ram_load(QEMUFile *f, void *opaque, int version_id) id[len] = 0; length = qemu_get_be64(f); - QTAILQ_FOREACH(block, &ram_list.blocks, next) { + QLIST_FOREACH_RCU(block, &ram_list.blocks, next) { if (!strncmp(id, block->idstr, sizeof(id))) { if (length != block->used_length) { Error *local_err = NULL; @@ -1117,7 +1148,6 @@ static int ram_load(QEMUFile *f, void *opaque, int version_id) ret = -EINVAL; break; } - ch = qemu_get_byte(f); ram_handle_compressed(host, ch, TARGET_PAGE_SIZE); break; @@ -1128,7 +1158,6 @@ static int ram_load(QEMUFile *f, void *opaque, int version_id) ret = -EINVAL; break; } - qemu_get_buffer(f, host, TARGET_PAGE_SIZE); break; case RAM_SAVE_FLAG_XBZRLE: @@ -1138,7 +1167,6 @@ static int ram_load(QEMUFile *f, void *opaque, int version_id) ret = -EINVAL; break; } - if (load_xbzrle(f, addr, host) < 0) { error_report("Failed to decompress XBZRLE page at " RAM_ADDR_FMT, addr); @@ -1163,6 +1191,7 @@ static int ram_load(QEMUFile *f, void *opaque, int version_id) } } + rcu_read_unlock(); DPRINTF("Completed load of VM with exit code %d seq iteration " "%" PRIu64 "\n", ret, seq_iter); return ret; @@ -508,9 +508,8 @@ int bdrv_create_file(const char *filename, QemuOpts *opts, Error **errp) Error *local_err = NULL; int ret; - drv = bdrv_find_protocol(filename, true); + drv = bdrv_find_protocol(filename, true, errp); if (drv == NULL) { - error_setg(errp, "Could not find protocol for file '%s'", filename); return -ENOENT; } @@ -628,7 +627,8 @@ static BlockDriver *find_hdev_driver(const char *filename) } BlockDriver *bdrv_find_protocol(const char *filename, - bool allow_protocol_prefix) + bool allow_protocol_prefix, + Error **errp) { BlockDriver *drv1; char protocol[128]; @@ -666,6 +666,8 @@ BlockDriver *bdrv_find_protocol(const char *filename, return drv1; } } + + error_setg(errp, "Unknown protocol '%s'", protocol); return NULL; } @@ -970,7 +972,6 @@ static int bdrv_open_common(BlockDriverState *bs, BlockDriverState *file, bs->zero_beyond_eof = true; open_flags = bdrv_open_flags(bs, flags); bs->read_only = !(open_flags & BDRV_O_RDWR); - bs->growable = !!(flags & BDRV_O_PROTOCOL); if (use_bdrv_whitelist && !bdrv_is_whitelisted(drv, bs->read_only)) { error_setg(errp, @@ -1136,9 +1137,8 @@ static int bdrv_fill_options(QDict **options, const char **pfilename, int flags, } else { if (!drvname && protocol) { if (filename) { - drv = bdrv_find_protocol(filename, parse_filename); + drv = bdrv_find_protocol(filename, parse_filename, errp); if (!drv) { - error_setg(errp, "Unknown protocol"); return -EINVAL; } @@ -1885,7 +1885,6 @@ void bdrv_close(BlockDriverState *bs) bs->encrypted = 0; bs->valid_key = 0; bs->sg = 0; - bs->growable = 0; bs->zero_beyond_eof = false; QDECREF(bs->options); bs->options = NULL; @@ -2645,25 +2644,17 @@ exit: static int bdrv_check_byte_request(BlockDriverState *bs, int64_t offset, size_t size) { - int64_t len; - if (size > BDRV_REQUEST_MAX_SECTORS << BDRV_SECTOR_BITS) { return -EIO; } - if (!bdrv_is_inserted(bs)) + if (!bdrv_is_inserted(bs)) { return -ENOMEDIUM; + } - if (bs->growable) - return 0; - - len = bdrv_getlength(bs); - - if (offset < 0) - return -EIO; - - if ((offset > len) || (len - offset < size)) + if (offset < 0) { return -EIO; + } return 0; } @@ -3042,10 +3033,10 @@ static int coroutine_fn bdrv_aligned_preadv(BlockDriverState *bs, } /* Forward the request to the BlockDriver */ - if (!(bs->zero_beyond_eof && bs->growable)) { + if (!bs->zero_beyond_eof) { ret = drv->bdrv_co_readv(bs, sector_num, nb_sectors, qiov); } else { - /* Read zeros after EOF of growable BDSes */ + /* Read zeros after EOF */ int64_t total_sectors, max_nb_sectors; total_sectors = bdrv_nb_sectors(bs); @@ -3107,8 +3098,10 @@ static int coroutine_fn bdrv_co_do_preadv(BlockDriverState *bs, if (!drv) { return -ENOMEDIUM; } - if (bdrv_check_byte_request(bs, offset, bytes)) { - return -EIO; + + ret = bdrv_check_byte_request(bs, offset, bytes); + if (ret < 0) { + return ret; } if (bs->copy_on_read) { @@ -3322,7 +3315,7 @@ static int coroutine_fn bdrv_aligned_pwritev(BlockDriverState *bs, block_acct_highest_sector(&bs->stats, sector_num, nb_sectors); - if (bs->growable && ret >= 0) { + if (ret >= 0) { bs->total_sectors = MAX(bs->total_sectors, sector_num + nb_sectors); } @@ -3351,8 +3344,10 @@ static int coroutine_fn bdrv_co_do_pwritev(BlockDriverState *bs, if (bs->read_only) { return -EACCES; } - if (bdrv_check_byte_request(bs, offset, bytes)) { - return -EIO; + + ret = bdrv_check_byte_request(bs, offset, bytes); + if (ret < 0) { + return ret; } /* throttling disk I/O */ @@ -4206,12 +4201,18 @@ int bdrv_write_compressed(BlockDriverState *bs, int64_t sector_num, const uint8_t *buf, int nb_sectors) { BlockDriver *drv = bs->drv; - if (!drv) + int ret; + + if (!drv) { return -ENOMEDIUM; - if (!drv->bdrv_write_compressed) + } + if (!drv->bdrv_write_compressed) { return -ENOTSUP; - if (bdrv_check_request(bs, sector_num, nb_sectors)) - return -EIO; + } + ret = bdrv_check_request(bs, sector_num, nb_sectors); + if (ret < 0) { + return ret; + } assert(QLIST_EMPTY(&bs->dirty_bitmaps)); @@ -5126,12 +5127,15 @@ static void coroutine_fn bdrv_discard_co_entry(void *opaque) int coroutine_fn bdrv_co_discard(BlockDriverState *bs, int64_t sector_num, int nb_sectors) { - int max_discard; + int max_discard, ret; if (!bs->drv) { return -ENOMEDIUM; - } else if (bdrv_check_request(bs, sector_num, nb_sectors)) { - return -EIO; + } + + ret = bdrv_check_request(bs, sector_num, nb_sectors); + if (ret < 0) { + return ret; } else if (bs->read_only) { return -EROFS; } @@ -5623,9 +5627,8 @@ void bdrv_img_create(const char *filename, const char *fmt, return; } - proto_drv = bdrv_find_protocol(filename, true); + proto_drv = bdrv_find_protocol(filename, true, errp); if (!proto_drv) { - error_setg(errp, "Unknown protocol '%s'", filename); return; } diff --git a/block/block-backend.c b/block/block-backend.c index 1988515dfd..bfb041823e 100644 --- a/block/block-backend.c +++ b/block/block-backend.c @@ -31,6 +31,16 @@ struct BlockBackend { void *dev_opaque; }; +typedef struct BlockBackendAIOCB { + BlockAIOCB common; + QEMUBH *bh; + int ret; +} BlockBackendAIOCB; + +static const AIOCBInfo block_backend_aiocb_info = { + .aiocb_size = sizeof(BlockBackendAIOCB), +}; + static void drive_info_del(DriveInfo *dinfo); /* All the BlockBackends (except for hidden ones) */ @@ -91,6 +101,40 @@ BlockBackend *blk_new_with_bs(const char *name, Error **errp) return blk; } +/* + * Calls blk_new_with_bs() and then calls bdrv_open() on the BlockDriverState. + * + * Just as with bdrv_open(), after having called this function the reference to + * @options belongs to the block layer (even on failure). + * + * TODO: Remove @filename and @flags; it should be possible to specify a whole + * BDS tree just by specifying the @options QDict (or @reference, + * alternatively). At the time of adding this function, this is not possible, + * though, so callers of this function have to be able to specify @filename and + * @flags. + */ +BlockBackend *blk_new_open(const char *name, const char *filename, + const char *reference, QDict *options, int flags, + Error **errp) +{ + BlockBackend *blk; + int ret; + + blk = blk_new_with_bs(name, errp); + if (!blk) { + QDECREF(options); + return NULL; + } + + ret = bdrv_open(&blk->bs, filename, reference, options, flags, NULL, errp); + if (ret < 0) { + blk_unref(blk); + return NULL; + } + + return blk; +} + static void blk_delete(BlockBackend *blk) { assert(!blk->refcnt); @@ -394,39 +438,137 @@ void blk_iostatus_enable(BlockBackend *blk) bdrv_iostatus_enable(blk->bs); } +static int blk_check_byte_request(BlockBackend *blk, int64_t offset, + size_t size) +{ + int64_t len; + + if (size > INT_MAX) { + return -EIO; + } + + if (!blk_is_inserted(blk)) { + return -ENOMEDIUM; + } + + len = blk_getlength(blk); + if (len < 0) { + return len; + } + + if (offset < 0) { + return -EIO; + } + + if (offset > len || len - offset < size) { + return -EIO; + } + + return 0; +} + +static int blk_check_request(BlockBackend *blk, int64_t sector_num, + int nb_sectors) +{ + if (sector_num < 0 || sector_num > INT64_MAX / BDRV_SECTOR_SIZE) { + return -EIO; + } + + if (nb_sectors < 0 || nb_sectors > INT_MAX / BDRV_SECTOR_SIZE) { + return -EIO; + } + + return blk_check_byte_request(blk, sector_num * BDRV_SECTOR_SIZE, + nb_sectors * BDRV_SECTOR_SIZE); +} + int blk_read(BlockBackend *blk, int64_t sector_num, uint8_t *buf, int nb_sectors) { + int ret = blk_check_request(blk, sector_num, nb_sectors); + if (ret < 0) { + return ret; + } + return bdrv_read(blk->bs, sector_num, buf, nb_sectors); } int blk_read_unthrottled(BlockBackend *blk, int64_t sector_num, uint8_t *buf, int nb_sectors) { + int ret = blk_check_request(blk, sector_num, nb_sectors); + if (ret < 0) { + return ret; + } + return bdrv_read_unthrottled(blk->bs, sector_num, buf, nb_sectors); } int blk_write(BlockBackend *blk, int64_t sector_num, const uint8_t *buf, int nb_sectors) { + int ret = blk_check_request(blk, sector_num, nb_sectors); + if (ret < 0) { + return ret; + } + return bdrv_write(blk->bs, sector_num, buf, nb_sectors); } +static void error_callback_bh(void *opaque) +{ + struct BlockBackendAIOCB *acb = opaque; + qemu_bh_delete(acb->bh); + acb->common.cb(acb->common.opaque, acb->ret); + qemu_aio_unref(acb); +} + +static BlockAIOCB *abort_aio_request(BlockBackend *blk, BlockCompletionFunc *cb, + void *opaque, int ret) +{ + struct BlockBackendAIOCB *acb; + QEMUBH *bh; + + acb = blk_aio_get(&block_backend_aiocb_info, blk, cb, opaque); + acb->ret = ret; + + bh = aio_bh_new(blk_get_aio_context(blk), error_callback_bh, acb); + acb->bh = bh; + qemu_bh_schedule(bh); + + return &acb->common; +} + BlockAIOCB *blk_aio_write_zeroes(BlockBackend *blk, int64_t sector_num, int nb_sectors, BdrvRequestFlags flags, BlockCompletionFunc *cb, void *opaque) { + int ret = blk_check_request(blk, sector_num, nb_sectors); + if (ret < 0) { + return abort_aio_request(blk, cb, opaque, ret); + } + return bdrv_aio_write_zeroes(blk->bs, sector_num, nb_sectors, flags, cb, opaque); } int blk_pread(BlockBackend *blk, int64_t offset, void *buf, int count) { + int ret = blk_check_byte_request(blk, offset, count); + if (ret < 0) { + return ret; + } + return bdrv_pread(blk->bs, offset, buf, count); } int blk_pwrite(BlockBackend *blk, int64_t offset, const void *buf, int count) { + int ret = blk_check_byte_request(blk, offset, count); + if (ret < 0) { + return ret; + } + return bdrv_pwrite(blk->bs, offset, buf, count); } @@ -440,10 +582,20 @@ void blk_get_geometry(BlockBackend *blk, uint64_t *nb_sectors_ptr) bdrv_get_geometry(blk->bs, nb_sectors_ptr); } +int64_t blk_nb_sectors(BlockBackend *blk) +{ + return bdrv_nb_sectors(blk->bs); +} + BlockAIOCB *blk_aio_readv(BlockBackend *blk, int64_t sector_num, QEMUIOVector *iov, int nb_sectors, BlockCompletionFunc *cb, void *opaque) { + int ret = blk_check_request(blk, sector_num, nb_sectors); + if (ret < 0) { + return abort_aio_request(blk, cb, opaque, ret); + } + return bdrv_aio_readv(blk->bs, sector_num, iov, nb_sectors, cb, opaque); } @@ -451,6 +603,11 @@ BlockAIOCB *blk_aio_writev(BlockBackend *blk, int64_t sector_num, QEMUIOVector *iov, int nb_sectors, BlockCompletionFunc *cb, void *opaque) { + int ret = blk_check_request(blk, sector_num, nb_sectors); + if (ret < 0) { + return abort_aio_request(blk, cb, opaque, ret); + } + return bdrv_aio_writev(blk->bs, sector_num, iov, nb_sectors, cb, opaque); } @@ -464,6 +621,11 @@ BlockAIOCB *blk_aio_discard(BlockBackend *blk, int64_t sector_num, int nb_sectors, BlockCompletionFunc *cb, void *opaque) { + int ret = blk_check_request(blk, sector_num, nb_sectors); + if (ret < 0) { + return abort_aio_request(blk, cb, opaque, ret); + } + return bdrv_aio_discard(blk->bs, sector_num, nb_sectors, cb, opaque); } @@ -479,6 +641,15 @@ void blk_aio_cancel_async(BlockAIOCB *acb) int blk_aio_multiwrite(BlockBackend *blk, BlockRequest *reqs, int num_reqs) { + int i, ret; + + for (i = 0; i < num_reqs; i++) { + ret = blk_check_request(blk, reqs[i].sector, reqs[i].nb_sectors); + if (ret < 0) { + return ret; + } + } + return bdrv_aio_multiwrite(blk->bs, reqs, num_reqs); } @@ -495,6 +666,11 @@ BlockAIOCB *blk_aio_ioctl(BlockBackend *blk, unsigned long int req, void *buf, int blk_co_discard(BlockBackend *blk, int64_t sector_num, int nb_sectors) { + int ret = blk_check_request(blk, sector_num, nb_sectors); + if (ret < 0) { + return ret; + } + return bdrv_co_discard(blk->bs, sector_num, nb_sectors); } @@ -668,3 +844,51 @@ void *blk_aio_get(const AIOCBInfo *aiocb_info, BlockBackend *blk, { return qemu_aio_get(aiocb_info, blk_bs(blk), cb, opaque); } + +int coroutine_fn blk_co_write_zeroes(BlockBackend *blk, int64_t sector_num, + int nb_sectors, BdrvRequestFlags flags) +{ + int ret = blk_check_request(blk, sector_num, nb_sectors); + if (ret < 0) { + return ret; + } + + return bdrv_co_write_zeroes(blk->bs, sector_num, nb_sectors, flags); +} + +int blk_write_compressed(BlockBackend *blk, int64_t sector_num, + const uint8_t *buf, int nb_sectors) +{ + int ret = blk_check_request(blk, sector_num, nb_sectors); + if (ret < 0) { + return ret; + } + + return bdrv_write_compressed(blk->bs, sector_num, buf, nb_sectors); +} + +int blk_truncate(BlockBackend *blk, int64_t offset) +{ + return bdrv_truncate(blk->bs, offset); +} + +int blk_discard(BlockBackend *blk, int64_t sector_num, int nb_sectors) +{ + int ret = blk_check_request(blk, sector_num, nb_sectors); + if (ret < 0) { + return ret; + } + + return bdrv_discard(blk->bs, sector_num, nb_sectors); +} + +int blk_save_vmstate(BlockBackend *blk, const uint8_t *buf, + int64_t pos, int size) +{ + return bdrv_save_vmstate(blk->bs, buf, pos, size); +} + +int blk_load_vmstate(BlockBackend *blk, uint8_t *buf, int64_t pos, int size) +{ + return bdrv_load_vmstate(blk->bs, buf, pos, size); +} diff --git a/block/nbd-client.c b/block/nbd-client.c index 28bfb62bed..259f5a3cb6 100644 --- a/block/nbd-client.c +++ b/block/nbd-client.c @@ -43,20 +43,23 @@ static void nbd_recv_coroutines_enter_all(NbdClientSession *s) } } -static void nbd_teardown_connection(NbdClientSession *client) +static void nbd_teardown_connection(BlockDriverState *bs) { + NbdClientSession *client = nbd_get_client_session(bs); + /* finish any pending coroutines */ shutdown(client->sock, 2); nbd_recv_coroutines_enter_all(client); - nbd_client_session_detach_aio_context(client); + nbd_client_detach_aio_context(bs); closesocket(client->sock); client->sock = -1; } static void nbd_reply_ready(void *opaque) { - NbdClientSession *s = opaque; + BlockDriverState *bs = opaque; + NbdClientSession *s = nbd_get_client_session(bs); uint64_t i; int ret; @@ -89,28 +92,40 @@ static void nbd_reply_ready(void *opaque) } fail: - nbd_teardown_connection(s); + nbd_teardown_connection(bs); } static void nbd_restart_write(void *opaque) { - NbdClientSession *s = opaque; + BlockDriverState *bs = opaque; - qemu_coroutine_enter(s->send_coroutine, NULL); + qemu_coroutine_enter(nbd_get_client_session(bs)->send_coroutine, NULL); } -static int nbd_co_send_request(NbdClientSession *s, - struct nbd_request *request, - QEMUIOVector *qiov, int offset) +static int nbd_co_send_request(BlockDriverState *bs, + struct nbd_request *request, + QEMUIOVector *qiov, int offset) { + NbdClientSession *s = nbd_get_client_session(bs); AioContext *aio_context; - int rc, ret; + int rc, ret, i; qemu_co_mutex_lock(&s->send_mutex); + + for (i = 0; i < MAX_NBD_REQUESTS; i++) { + if (s->recv_coroutine[i] == NULL) { + s->recv_coroutine[i] = qemu_coroutine_self(); + break; + } + } + + assert(i < MAX_NBD_REQUESTS); + request->handle = INDEX_TO_HANDLE(s, i); s->send_coroutine = qemu_coroutine_self(); - aio_context = bdrv_get_aio_context(s->bs); + aio_context = bdrv_get_aio_context(bs); + aio_set_fd_handler(aio_context, s->sock, - nbd_reply_ready, nbd_restart_write, s); + nbd_reply_ready, nbd_restart_write, bs); if (qiov) { if (!s->is_unix) { socket_set_cork(s->sock, 1); @@ -129,7 +144,7 @@ static int nbd_co_send_request(NbdClientSession *s, } else { rc = nbd_send_request(s->sock, request); } - aio_set_fd_handler(aio_context, s->sock, nbd_reply_ready, NULL, s); + aio_set_fd_handler(aio_context, s->sock, nbd_reply_ready, NULL, bs); s->send_coroutine = NULL; qemu_co_mutex_unlock(&s->send_mutex); return rc; @@ -164,8 +179,6 @@ static void nbd_co_receive_reply(NbdClientSession *s, static void nbd_coroutine_start(NbdClientSession *s, struct nbd_request *request) { - int i; - /* Poor man semaphore. The free_sema is locked when no other request * can be accepted, and unlocked after receiving one reply. */ if (s->in_flight >= MAX_NBD_REQUESTS - 1) { @@ -174,15 +187,7 @@ static void nbd_coroutine_start(NbdClientSession *s, } s->in_flight++; - for (i = 0; i < MAX_NBD_REQUESTS; i++) { - if (s->recv_coroutine[i] == NULL) { - s->recv_coroutine[i] = qemu_coroutine_self(); - break; - } - } - - assert(i < MAX_NBD_REQUESTS); - request->handle = INDEX_TO_HANDLE(s, i); + /* s->recv_coroutine[i] is set as soon as we get the send_lock. */ } static void nbd_coroutine_end(NbdClientSession *s, @@ -195,10 +200,11 @@ static void nbd_coroutine_end(NbdClientSession *s, } } -static int nbd_co_readv_1(NbdClientSession *client, int64_t sector_num, +static int nbd_co_readv_1(BlockDriverState *bs, int64_t sector_num, int nb_sectors, QEMUIOVector *qiov, int offset) { + NbdClientSession *client = nbd_get_client_session(bs); struct nbd_request request = { .type = NBD_CMD_READ }; struct nbd_reply reply; ssize_t ret; @@ -207,7 +213,7 @@ static int nbd_co_readv_1(NbdClientSession *client, int64_t sector_num, request.len = nb_sectors * 512; nbd_coroutine_start(client, &request); - ret = nbd_co_send_request(client, &request, NULL, 0); + ret = nbd_co_send_request(bs, &request, NULL, 0); if (ret < 0) { reply.error = -ret; } else { @@ -218,15 +224,16 @@ static int nbd_co_readv_1(NbdClientSession *client, int64_t sector_num, } -static int nbd_co_writev_1(NbdClientSession *client, int64_t sector_num, +static int nbd_co_writev_1(BlockDriverState *bs, int64_t sector_num, int nb_sectors, QEMUIOVector *qiov, int offset) { + NbdClientSession *client = nbd_get_client_session(bs); struct nbd_request request = { .type = NBD_CMD_WRITE }; struct nbd_reply reply; ssize_t ret; - if (!bdrv_enable_write_cache(client->bs) && + if (!bdrv_enable_write_cache(bs) && (client->nbdflags & NBD_FLAG_SEND_FUA)) { request.type |= NBD_CMD_FLAG_FUA; } @@ -235,7 +242,7 @@ static int nbd_co_writev_1(NbdClientSession *client, int64_t sector_num, request.len = nb_sectors * 512; nbd_coroutine_start(client, &request); - ret = nbd_co_send_request(client, &request, qiov, offset); + ret = nbd_co_send_request(bs, &request, qiov, offset); if (ret < 0) { reply.error = -ret; } else { @@ -249,14 +256,13 @@ static int nbd_co_writev_1(NbdClientSession *client, int64_t sector_num, * remain aligned to 4K. */ #define NBD_MAX_SECTORS 2040 -int nbd_client_session_co_readv(NbdClientSession *client, int64_t sector_num, - int nb_sectors, QEMUIOVector *qiov) +int nbd_client_co_readv(BlockDriverState *bs, int64_t sector_num, + int nb_sectors, QEMUIOVector *qiov) { int offset = 0; int ret; while (nb_sectors > NBD_MAX_SECTORS) { - ret = nbd_co_readv_1(client, sector_num, - NBD_MAX_SECTORS, qiov, offset); + ret = nbd_co_readv_1(bs, sector_num, NBD_MAX_SECTORS, qiov, offset); if (ret < 0) { return ret; } @@ -264,17 +270,16 @@ int nbd_client_session_co_readv(NbdClientSession *client, int64_t sector_num, sector_num += NBD_MAX_SECTORS; nb_sectors -= NBD_MAX_SECTORS; } - return nbd_co_readv_1(client, sector_num, nb_sectors, qiov, offset); + return nbd_co_readv_1(bs, sector_num, nb_sectors, qiov, offset); } -int nbd_client_session_co_writev(NbdClientSession *client, int64_t sector_num, - int nb_sectors, QEMUIOVector *qiov) +int nbd_client_co_writev(BlockDriverState *bs, int64_t sector_num, + int nb_sectors, QEMUIOVector *qiov) { int offset = 0; int ret; while (nb_sectors > NBD_MAX_SECTORS) { - ret = nbd_co_writev_1(client, sector_num, - NBD_MAX_SECTORS, qiov, offset); + ret = nbd_co_writev_1(bs, sector_num, NBD_MAX_SECTORS, qiov, offset); if (ret < 0) { return ret; } @@ -282,11 +287,12 @@ int nbd_client_session_co_writev(NbdClientSession *client, int64_t sector_num, sector_num += NBD_MAX_SECTORS; nb_sectors -= NBD_MAX_SECTORS; } - return nbd_co_writev_1(client, sector_num, nb_sectors, qiov, offset); + return nbd_co_writev_1(bs, sector_num, nb_sectors, qiov, offset); } -int nbd_client_session_co_flush(NbdClientSession *client) +int nbd_client_co_flush(BlockDriverState *bs) { + NbdClientSession *client = nbd_get_client_session(bs); struct nbd_request request = { .type = NBD_CMD_FLUSH }; struct nbd_reply reply; ssize_t ret; @@ -303,7 +309,7 @@ int nbd_client_session_co_flush(NbdClientSession *client) request.len = 0; nbd_coroutine_start(client, &request); - ret = nbd_co_send_request(client, &request, NULL, 0); + ret = nbd_co_send_request(bs, &request, NULL, 0); if (ret < 0) { reply.error = -ret; } else { @@ -313,9 +319,10 @@ int nbd_client_session_co_flush(NbdClientSession *client) return -reply.error; } -int nbd_client_session_co_discard(NbdClientSession *client, int64_t sector_num, - int nb_sectors) +int nbd_client_co_discard(BlockDriverState *bs, int64_t sector_num, + int nb_sectors) { + NbdClientSession *client = nbd_get_client_session(bs); struct nbd_request request = { .type = NBD_CMD_TRIM }; struct nbd_reply reply; ssize_t ret; @@ -327,7 +334,7 @@ int nbd_client_session_co_discard(NbdClientSession *client, int64_t sector_num, request.len = nb_sectors * 512; nbd_coroutine_start(client, &request); - ret = nbd_co_send_request(client, &request, NULL, 0); + ret = nbd_co_send_request(bs, &request, NULL, 0); if (ret < 0) { reply.error = -ret; } else { @@ -338,43 +345,41 @@ int nbd_client_session_co_discard(NbdClientSession *client, int64_t sector_num, } -void nbd_client_session_detach_aio_context(NbdClientSession *client) +void nbd_client_detach_aio_context(BlockDriverState *bs) { - aio_set_fd_handler(bdrv_get_aio_context(client->bs), client->sock, - NULL, NULL, NULL); + aio_set_fd_handler(bdrv_get_aio_context(bs), + nbd_get_client_session(bs)->sock, NULL, NULL, NULL); } -void nbd_client_session_attach_aio_context(NbdClientSession *client, - AioContext *new_context) +void nbd_client_attach_aio_context(BlockDriverState *bs, + AioContext *new_context) { - aio_set_fd_handler(new_context, client->sock, - nbd_reply_ready, NULL, client); + aio_set_fd_handler(new_context, nbd_get_client_session(bs)->sock, + nbd_reply_ready, NULL, bs); } -void nbd_client_session_close(NbdClientSession *client) +void nbd_client_close(BlockDriverState *bs) { + NbdClientSession *client = nbd_get_client_session(bs); struct nbd_request request = { .type = NBD_CMD_DISC, .from = 0, .len = 0 }; - if (!client->bs) { - return; - } if (client->sock == -1) { return; } nbd_send_request(client->sock, &request); - nbd_teardown_connection(client); - client->bs = NULL; + nbd_teardown_connection(bs); } -int nbd_client_session_init(NbdClientSession *client, BlockDriverState *bs, - int sock, const char *export, Error **errp) +int nbd_client_init(BlockDriverState *bs, int sock, const char *export, + Error **errp) { + NbdClientSession *client = nbd_get_client_session(bs); int ret; /* NBD handshake */ @@ -391,13 +396,12 @@ int nbd_client_session_init(NbdClientSession *client, BlockDriverState *bs, qemu_co_mutex_init(&client->send_mutex); qemu_co_mutex_init(&client->free_sema); - client->bs = bs; client->sock = sock; /* Now that we're connected, set the socket to be non-blocking and * kick the reply mechanism. */ qemu_set_nonblock(sock); - nbd_client_session_attach_aio_context(client, bdrv_get_aio_context(bs)); + nbd_client_attach_aio_context(bs, bdrv_get_aio_context(bs)); logout("Established connection with NBD server\n"); return 0; diff --git a/block/nbd-client.h b/block/nbd-client.h index cfeecc2775..fa4ff42d22 100644 --- a/block/nbd-client.h +++ b/block/nbd-client.h @@ -31,24 +31,24 @@ typedef struct NbdClientSession { struct nbd_reply reply; bool is_unix; - - BlockDriverState *bs; } NbdClientSession; -int nbd_client_session_init(NbdClientSession *client, BlockDriverState *bs, - int sock, const char *export_name, Error **errp); -void nbd_client_session_close(NbdClientSession *client); - -int nbd_client_session_co_discard(NbdClientSession *client, int64_t sector_num, - int nb_sectors); -int nbd_client_session_co_flush(NbdClientSession *client); -int nbd_client_session_co_writev(NbdClientSession *client, int64_t sector_num, - int nb_sectors, QEMUIOVector *qiov); -int nbd_client_session_co_readv(NbdClientSession *client, int64_t sector_num, - int nb_sectors, QEMUIOVector *qiov); - -void nbd_client_session_detach_aio_context(NbdClientSession *client); -void nbd_client_session_attach_aio_context(NbdClientSession *client, - AioContext *new_context); +NbdClientSession *nbd_get_client_session(BlockDriverState *bs); + +int nbd_client_init(BlockDriverState *bs, int sock, const char *export_name, + Error **errp); +void nbd_client_close(BlockDriverState *bs); + +int nbd_client_co_discard(BlockDriverState *bs, int64_t sector_num, + int nb_sectors); +int nbd_client_co_flush(BlockDriverState *bs); +int nbd_client_co_writev(BlockDriverState *bs, int64_t sector_num, + int nb_sectors, QEMUIOVector *qiov); +int nbd_client_co_readv(BlockDriverState *bs, int64_t sector_num, + int nb_sectors, QEMUIOVector *qiov); + +void nbd_client_detach_aio_context(BlockDriverState *bs); +void nbd_client_attach_aio_context(BlockDriverState *bs, + AioContext *new_context); #endif /* NBD_CLIENT_H */ diff --git a/block/nbd.c b/block/nbd.c index b05d1d0407..2f3b9adf72 100644 --- a/block/nbd.c +++ b/block/nbd.c @@ -224,6 +224,12 @@ static void nbd_config(BDRVNBDState *s, QDict *options, char **export, } } +NbdClientSession *nbd_get_client_session(BlockDriverState *bs) +{ + BDRVNBDState *s = bs->opaque; + return &s->client; +} + static int nbd_establish_connection(BlockDriverState *bs, Error **errp) { BDRVNBDState *s = bs->opaque; @@ -271,7 +277,7 @@ static int nbd_open(BlockDriverState *bs, QDict *options, int flags, } /* NBD handshake */ - result = nbd_client_session_init(&s->client, bs, sock, export, errp); + result = nbd_client_init(bs, sock, export, errp); g_free(export); return result; } @@ -279,26 +285,18 @@ static int nbd_open(BlockDriverState *bs, QDict *options, int flags, static int nbd_co_readv(BlockDriverState *bs, int64_t sector_num, int nb_sectors, QEMUIOVector *qiov) { - BDRVNBDState *s = bs->opaque; - - return nbd_client_session_co_readv(&s->client, sector_num, - nb_sectors, qiov); + return nbd_client_co_readv(bs, sector_num, nb_sectors, qiov); } static int nbd_co_writev(BlockDriverState *bs, int64_t sector_num, int nb_sectors, QEMUIOVector *qiov) { - BDRVNBDState *s = bs->opaque; - - return nbd_client_session_co_writev(&s->client, sector_num, - nb_sectors, qiov); + return nbd_client_co_writev(bs, sector_num, nb_sectors, qiov); } static int nbd_co_flush(BlockDriverState *bs) { - BDRVNBDState *s = bs->opaque; - - return nbd_client_session_co_flush(&s->client); + return nbd_client_co_flush(bs); } static void nbd_refresh_limits(BlockDriverState *bs, Error **errp) @@ -310,10 +308,7 @@ static void nbd_refresh_limits(BlockDriverState *bs, Error **errp) static int nbd_co_discard(BlockDriverState *bs, int64_t sector_num, int nb_sectors) { - BDRVNBDState *s = bs->opaque; - - return nbd_client_session_co_discard(&s->client, sector_num, - nb_sectors); + return nbd_client_co_discard(bs, sector_num, nb_sectors); } static void nbd_close(BlockDriverState *bs) @@ -321,7 +316,7 @@ static void nbd_close(BlockDriverState *bs) BDRVNBDState *s = bs->opaque; qemu_opts_del(s->socket_opts); - nbd_client_session_close(&s->client); + nbd_client_close(bs); } static int64_t nbd_getlength(BlockDriverState *bs) @@ -333,17 +328,13 @@ static int64_t nbd_getlength(BlockDriverState *bs) static void nbd_detach_aio_context(BlockDriverState *bs) { - BDRVNBDState *s = bs->opaque; - - nbd_client_session_detach_aio_context(&s->client); + nbd_client_detach_aio_context(bs); } static void nbd_attach_aio_context(BlockDriverState *bs, AioContext *new_context) { - BDRVNBDState *s = bs->opaque; - - nbd_client_session_attach_aio_context(&s->client, new_context); + nbd_client_attach_aio_context(bs, new_context); } static void nbd_refresh_filename(BlockDriverState *bs) diff --git a/block/qcow2.c b/block/qcow2.c index 7e614d76a4..2ed8d95b1f 100644 --- a/block/qcow2.c +++ b/block/qcow2.c @@ -2521,15 +2521,12 @@ static int qcow2_save_vmstate(BlockDriverState *bs, QEMUIOVector *qiov, { BDRVQcowState *s = bs->opaque; int64_t total_sectors = bs->total_sectors; - int growable = bs->growable; bool zero_beyond_eof = bs->zero_beyond_eof; int ret; BLKDBG_EVENT(bs->file, BLKDBG_VMSTATE_SAVE); - bs->growable = 1; bs->zero_beyond_eof = false; ret = bdrv_pwritev(bs, qcow2_vm_state_offset(s) + pos, qiov); - bs->growable = growable; bs->zero_beyond_eof = zero_beyond_eof; /* bdrv_co_do_writev will have increased the total_sectors value to include @@ -2544,15 +2541,12 @@ static int qcow2_load_vmstate(BlockDriverState *bs, uint8_t *buf, int64_t pos, int size) { BDRVQcowState *s = bs->opaque; - int growable = bs->growable; bool zero_beyond_eof = bs->zero_beyond_eof; int ret; BLKDBG_EVENT(bs->file, BLKDBG_VMSTATE_LOAD); - bs->growable = 1; bs->zero_beyond_eof = false; ret = bdrv_pread(bs, qcow2_vm_state_offset(s) + pos, buf, size); - bs->growable = growable; bs->zero_beyond_eof = zero_beyond_eof; return ret; diff --git a/block/raw-posix.c b/block/raw-posix.c index e474c17974..b5f077a8f1 100644 --- a/block/raw-posix.c +++ b/block/raw-posix.c @@ -1047,7 +1047,7 @@ static int aio_worker(void *arg) switch (aiocb->aio_type & QEMU_AIO_TYPE_MASK) { case QEMU_AIO_READ: ret = handle_aiocb_rw(aiocb); - if (ret >= 0 && ret < aiocb->aio_nbytes && aiocb->bs->growable) { + if (ret >= 0 && ret < aiocb->aio_nbytes) { iov_memset(aiocb->aio_iov, aiocb->aio_niov, ret, 0, aiocb->aio_nbytes - ret); diff --git a/block/raw-win32.c b/block/raw-win32.c index 06243d76df..dae5d2fee9 100644 --- a/block/raw-win32.c +++ b/block/raw-win32.c @@ -101,7 +101,7 @@ static int aio_worker(void *arg) switch (aiocb->aio_type & QEMU_AIO_TYPE_MASK) { case QEMU_AIO_READ: count = handle_aiocb_rw(aiocb); - if (count < aiocb->aio_nbytes && aiocb->bs->growable) { + if (count < aiocb->aio_nbytes) { /* A short read means that we have reached EOF. Pad the buffer * with zeros for bytes after EOF. */ iov_memset(aiocb->aio_iov, aiocb->aio_niov, count, diff --git a/block/sheepdog.c b/block/sheepdog.c index be3176fcb4..b320871e9c 100644 --- a/block/sheepdog.c +++ b/block/sheepdog.c @@ -1730,7 +1730,7 @@ static int sd_create(const char *filename, QemuOpts *opts, BlockDriver *drv; /* Currently, only Sheepdog backing image is supported. */ - drv = bdrv_find_protocol(backing_file, true); + drv = bdrv_find_protocol(backing_file, true, NULL); if (!drv || strcmp(drv->protocol_name, "sheepdog") != 0) { error_setg(errp, "backing_file must be a sheepdog image"); ret = -EINVAL; @@ -2117,7 +2117,7 @@ static coroutine_fn int sd_co_writev(BlockDriverState *bs, int64_t sector_num, int64_t offset = (sector_num + nb_sectors) * BDRV_SECTOR_SIZE; BDRVSheepdogState *s = bs->opaque; - if (bs->growable && offset > s->inode.vdi_size) { + if (offset > s->inode.vdi_size) { ret = sd_truncate(bs, offset); if (ret < 0) { return ret; diff --git a/block/vmdk.c b/block/vmdk.c index 7d079adc4a..8410a158a2 100644 --- a/block/vmdk.c +++ b/block/vmdk.c @@ -843,8 +843,7 @@ static int vmdk_parse_extents(const char *desc, BlockDriverState *bs, } extent_path = g_malloc0(PATH_MAX); - path_combine(extent_path, sizeof(extent_path), - desc_file_path, fname); + path_combine(extent_path, PATH_MAX, desc_file_path, fname); extent_file = NULL; ret = bdrv_open(&extent_file, extent_path, NULL, NULL, bs->open_flags | BDRV_O_PROTOCOL, NULL, errp); diff --git a/blockdev.c b/blockdev.c index 0f1a253c85..18e420779f 100644 --- a/blockdev.c +++ b/blockdev.c @@ -354,13 +354,11 @@ static BlockBackend *blockdev_init(const char *file, QDict *bs_opts, ThrottleConfig cfg; int snapshot = 0; bool copy_on_read; - int ret; Error *error = NULL; QemuOpts *opts; const char *id; bool has_driver_specific_opts; BlockdevDetectZeroesOptions detect_zeroes; - BlockDriver *drv = NULL; /* Check common options by copying from bs_opts to opts, all other options * stay in bs_opts for processing by bdrv_open(). */ @@ -426,11 +424,11 @@ static BlockBackend *blockdev_init(const char *file, QDict *bs_opts, goto early_err; } - drv = bdrv_find_format(buf); - if (!drv) { - error_setg(errp, "'%s' invalid format", buf); + if (qdict_haskey(bs_opts, "driver")) { + error_setg(errp, "Cannot specify both 'driver' and 'format'"); goto early_err; } + qdict_put(bs_opts, "driver", qstring_from_str(buf)); } /* disk I/O throttling */ @@ -505,70 +503,64 @@ static BlockBackend *blockdev_init(const char *file, QDict *bs_opts, } /* init */ - blk = blk_new_with_bs(qemu_opts_id(opts), errp); - if (!blk) { - goto early_err; - } - bs = blk_bs(blk); - bs->open_flags = snapshot ? BDRV_O_SNAPSHOT : 0; - bs->read_only = ro; - bs->detect_zeroes = detect_zeroes; - - bdrv_set_on_error(bs, on_read_error, on_write_error); + if ((!file || !*file) && !has_driver_specific_opts) { + blk = blk_new_with_bs(qemu_opts_id(opts), errp); + if (!blk) { + goto early_err; + } - /* disk I/O throttling */ - if (throttle_enabled(&cfg)) { - bdrv_io_limits_enable(bs); - bdrv_set_io_limits(bs, &cfg); - } + bs = blk_bs(blk); + bs->open_flags = snapshot ? BDRV_O_SNAPSHOT : 0; + bs->read_only = ro; - if (!file || !*file) { - if (has_driver_specific_opts) { + QDECREF(bs_opts); + } else { + if (file && !*file) { file = NULL; - } else { - QDECREF(bs_opts); - qemu_opts_del(opts); - return blk; } - } - if (snapshot) { - /* always use cache=unsafe with snapshot */ - bdrv_flags &= ~BDRV_O_CACHE_MASK; - bdrv_flags |= (BDRV_O_SNAPSHOT|BDRV_O_CACHE_WB|BDRV_O_NO_FLUSH); - } - if (copy_on_read) { - bdrv_flags |= BDRV_O_COPY_ON_READ; - } + if (snapshot) { + /* always use cache=unsafe with snapshot */ + bdrv_flags &= ~BDRV_O_CACHE_MASK; + bdrv_flags |= (BDRV_O_SNAPSHOT|BDRV_O_CACHE_WB|BDRV_O_NO_FLUSH); + } + + if (copy_on_read) { + bdrv_flags |= BDRV_O_COPY_ON_READ; + } + + if (runstate_check(RUN_STATE_INMIGRATE)) { + bdrv_flags |= BDRV_O_INCOMING; + } - if (runstate_check(RUN_STATE_INMIGRATE)) { - bdrv_flags |= BDRV_O_INCOMING; + bdrv_flags |= ro ? 0 : BDRV_O_RDWR; + + blk = blk_new_open(qemu_opts_id(opts), file, NULL, bs_opts, bdrv_flags, + errp); + if (!blk) { + goto err_no_bs_opts; + } + bs = blk_bs(blk); } - bdrv_flags |= ro ? 0 : BDRV_O_RDWR; + bs->detect_zeroes = detect_zeroes; - QINCREF(bs_opts); - ret = bdrv_open(&bs, file, NULL, bs_opts, bdrv_flags, drv, &error); - assert(bs == blk_bs(blk)); + bdrv_set_on_error(bs, on_read_error, on_write_error); - if (ret < 0) { - error_setg(errp, "could not open disk image %s: %s", - file ?: blk_name(blk), error_get_pretty(error)); - error_free(error); - goto err; + /* disk I/O throttling */ + if (throttle_enabled(&cfg)) { + bdrv_io_limits_enable(bs); + bdrv_set_io_limits(bs, &cfg); } if (bdrv_key_required(bs)) { autostart = 0; } - QDECREF(bs_opts); +err_no_bs_opts: qemu_opts_del(opts); - return blk; -err: - blk_unref(blk); early_err: qemu_opts_del(opts); err_no_opts: diff --git a/bootdevice.c b/bootdevice.c index 5914417027..c3a010c094 100644 --- a/bootdevice.c +++ b/bootdevice.c @@ -210,7 +210,9 @@ char *get_boot_devices_list(size_t *size, bool ignore_suffixes) char *list = NULL; QTAILQ_FOREACH(i, &fw_boot_order, link) { - char *devpath = NULL, *bootpath; + char *devpath = NULL, *suffix = NULL; + char *bootpath; + char *d; size_t len; if (i->dev) { @@ -218,21 +220,22 @@ char *get_boot_devices_list(size_t *size, bool ignore_suffixes) assert(devpath); } - if (i->suffix && !ignore_suffixes && devpath) { - size_t bootpathlen = strlen(devpath) + strlen(i->suffix) + 1; - - bootpath = g_malloc(bootpathlen); - snprintf(bootpath, bootpathlen, "%s%s", devpath, i->suffix); - g_free(devpath); - } else if (devpath) { - bootpath = devpath; - } else if (!ignore_suffixes) { - assert(i->suffix); - bootpath = g_strdup(i->suffix); - } else { - bootpath = g_strdup(""); + if (!ignore_suffixes) { + d = qdev_get_own_fw_dev_path_from_handler(i->dev->parent_bus, i->dev); + if (d) { + assert(!i->suffix); + suffix = d; + } else { + suffix = g_strdup(i->suffix); + } } + bootpath = g_strdup_printf("%s%s", + devpath ? devpath : "", + suffix ? suffix : ""); + g_free(devpath); + g_free(suffix); + if (total) { list[total-1] = '\n'; } diff --git a/cpu-exec.c b/cpu-exec.c index 67381176da..2ffeb6e40d 100644 --- a/cpu-exec.c +++ b/cpu-exec.c @@ -24,6 +24,9 @@ #include "qemu/atomic.h" #include "sysemu/qtest.h" #include "qemu/timer.h" +#include "exec/address-spaces.h" +#include "exec/memory-internal.h" +#include "qemu/rcu.h" /* -icount align implementation. */ @@ -141,6 +144,33 @@ void cpu_resume_from_signal(CPUState *cpu, void *puc) cpu->exception_index = -1; siglongjmp(cpu->jmp_env, 1); } + +void cpu_reload_memory_map(CPUState *cpu) +{ + AddressSpaceDispatch *d; + + if (qemu_in_vcpu_thread()) { + /* Do not let the guest prolong the critical section as much as it + * as it desires. + * + * Currently, this is prevented by the I/O thread's periodinc kicking + * of the VCPU thread (iothread_requesting_mutex, qemu_cpu_kick_thread) + * but this will go away once TCG's execution moves out of the global + * mutex. + * + * This pair matches cpu_exec's rcu_read_lock()/rcu_read_unlock(), which + * only protects cpu->as->dispatch. Since we reload it below, we can + * split the critical section. + */ + rcu_read_unlock(); + rcu_read_lock(); + } + + /* The CPU and TLB are protected by the iothread lock. */ + d = atomic_rcu_read(&cpu->as->dispatch); + cpu->memory_dispatch = d; + tlb_flush(cpu, 1); +} #endif /* Execute a TB, and fix up the CPU state afterwards if necessary */ @@ -352,6 +382,8 @@ int cpu_exec(CPUArchState *env) * an instruction scheduling constraint on modern architectures. */ smp_mb(); + rcu_read_lock(); + if (unlikely(exit_request)) { cpu->exit_request = 1; } @@ -548,6 +580,7 @@ int cpu_exec(CPUArchState *env) } /* for(;;) */ cc->cpu_exec_exit(cpu); + rcu_read_unlock(); /* fail safe : never use current_cpu outside cpu_exec() */ current_cpu = NULL; @@ -361,15 +361,19 @@ static void icount_warp_rt(void *opaque) void qtest_clock_warp(int64_t dest) { int64_t clock = qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL); + AioContext *aio_context; assert(qtest_enabled()); + aio_context = qemu_get_aio_context(); while (clock < dest) { int64_t deadline = qemu_clock_deadline_ns_all(QEMU_CLOCK_VIRTUAL); int64_t warp = qemu_soonest_timeout(dest - clock, deadline); + seqlock_write_lock(&timers_state.vm_clock_seqlock); timers_state.qemu_icount_bias += warp; seqlock_write_unlock(&timers_state.vm_clock_seqlock); qemu_clock_run_timers(QEMU_CLOCK_VIRTUAL); + timerlist_run_timers(aio_context->tlg.tl[QEMU_CLOCK_VIRTUAL]); clock = qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL); } qemu_clock_notify(QEMU_CLOCK_VIRTUAL); @@ -1104,7 +1108,7 @@ bool qemu_cpu_is_self(CPUState *cpu) return qemu_thread_is_self(cpu->thread); } -static bool qemu_in_vcpu_thread(void) +bool qemu_in_vcpu_thread(void) { return current_cpu && qemu_cpu_is_self(current_cpu); } @@ -243,8 +243,12 @@ static void tlb_add_large_page(CPUArchState *env, target_ulong vaddr, } /* Add a new TLB entry. At most one entry for a given virtual address - is permitted. Only a single TARGET_PAGE_SIZE region is mapped, the - supplied size is only used by tlb_flush_page. */ + * is permitted. Only a single TARGET_PAGE_SIZE region is mapped, the + * supplied size is only used by tlb_flush_page. + * + * Called from TCG-generated code, which is under an RCU read-side + * critical section. + */ void tlb_set_page(CPUState *cpu, target_ulong vaddr, hwaddr paddr, int prot, int mmu_idx, target_ulong size) @@ -265,8 +269,7 @@ void tlb_set_page(CPUState *cpu, target_ulong vaddr, } sz = size; - section = address_space_translate_for_iotlb(cpu->as, paddr, - &xlat, &sz); + section = address_space_translate_for_iotlb(cpu, paddr, &xlat, &sz); assert(sz >= TARGET_PAGE_SIZE); #if defined(DEBUG_TLB) @@ -347,7 +350,7 @@ tb_page_addr_t get_page_addr_code(CPUArchState *env1, target_ulong addr) cpu_ldub_code(env1, addr); } pd = env1->iotlb[mmu_idx][page_index] & ~TARGET_PAGE_MASK; - mr = iotlb_to_region(cpu->as, pd); + mr = iotlb_to_region(cpu, pd); if (memory_region_is_unassigned(mr)) { CPUClass *cc = CPU_GET_CLASS(cpu); diff --git a/default-configs/arm-softmmu.mak b/default-configs/arm-softmmu.mak index 7671ee278a..b00c2e150e 100644 --- a/default-configs/arm-softmmu.mak +++ b/default-configs/arm-softmmu.mak @@ -33,6 +33,7 @@ CONFIG_PFLASH_CFI01=y CONFIG_PFLASH_CFI02=y CONFIG_MICRODRIVE=y CONFIG_USB_MUSB=y +CONFIG_USB_EHCI_SYSBUS=y CONFIG_ARM11MPCORE=y CONFIG_A9MPCORE=y diff --git a/docs/memory.txt b/docs/memory.txt index b12f1f049a..2ceb348942 100644 --- a/docs/memory.txt +++ b/docs/memory.txt @@ -73,17 +73,66 @@ stability. Region lifecycle ---------------- -A region is created by one of the constructor functions (memory_region_init*()) -and attached to an object. It is then destroyed by object_unparent() or simply -when the parent object dies. +A region is created by one of the memory_region_init*() functions and +attached to an object, which acts as its owner or parent. QEMU ensures +that the owner object remains alive as long as the region is visible to +the guest, or as long as the region is in use by a virtual CPU or another +device. For example, the owner object will not die between an +address_space_map operation and the corresponding address_space_unmap. -In between, a region can be added to an address space -by using memory_region_add_subregion() and removed using -memory_region_del_subregion(). Destroying the region implicitly -removes the region from the address space. +After creation, a region can be added to an address space or a +container with memory_region_add_subregion(), and removed using +memory_region_del_subregion(). + +Various region attributes (read-only, dirty logging, coalesced mmio, +ioeventfd) can be changed during the region lifecycle. They take effect +as soon as the region is made visible. This can be immediately, later, +or never. + +Destruction of a memory region happens automatically when the owner +object dies. + +If however the memory region is part of a dynamically allocated data +structure, you should call object_unparent() to destroy the memory region +before the data structure is freed. For an example see VFIOMSIXInfo +and VFIOQuirk in hw/vfio/pci.c. + +You must not destroy a memory region as long as it may be in use by a +device or CPU. In order to do this, as a general rule do not create or +destroy memory regions dynamically during a device's lifetime, and only +call object_unparent() in the memory region owner's instance_finalize +callback. The dynamically allocated data structure that contains the +memory region then should obviously be freed in the instance_finalize +callback as well. + +If you break this rule, the following situation can happen: + +- the memory region's owner had a reference taken via memory_region_ref + (for example by address_space_map) + +- the region is unparented, and has no owner anymore + +- when address_space_unmap is called, the reference to the memory region's + owner is leaked. + + +There is an exception to the above rule: it is okay to call +object_unparent at any time for an alias or a container region. It is +therefore also okay to create or destroy alias and container regions +dynamically during a device's lifetime. + +This exceptional usage is valid because aliases and containers only help +QEMU building the guest's memory map; they are never accessed directly. +memory_region_ref and memory_region_unref are never called on aliases +or containers, and the above situation then cannot happen. Exploiting +this exception is rarely necessary, and therefore it is discouraged, +but nevertheless it is used in a few places. + +For regions that "have no owner" (NULL is passed at creation time), the +machine object is actually used as the owner. Since instance_finalize is +never called for the machine object, you must never call object_unparent +on regions that have no owner, unless they are aliases or containers. -Region attributes may be changed at any point; they take effect once -the region becomes exposed to the guest. Overlapping regions and priority -------------------------------- @@ -215,13 +264,6 @@ BAR containing MMIO registers is mapped after it. Note that if the guest maps a BAR outside the PCI hole, it would not be visible as the pci-hole alias clips it to a 0.5GB range. -Attributes ----------- - -Various region attributes (read-only, dirty logging, coalesced mmio, ioeventfd) -can be changed during the region lifecycle. They take effect once the region -is made visible (which can be immediately, later, or never). - MMIO Operations --------------- diff --git a/docs/rcu.txt b/docs/rcu.txt index 61752b93ab..21ecb8106c 100644 --- a/docs/rcu.txt +++ b/docs/rcu.txt @@ -120,12 +120,15 @@ The core RCU API is small: void call_rcu(T *p, void (*func)(T *p), field-name); + void g_free_rcu(T *p, + field-name); - call_rcu1 is typically used through this macro, in the common case - where the "struct rcu_head" is the first field in the struct. In - the above case, one could have written simply: + call_rcu1 is typically used through these macro, in the common case + where the "struct rcu_head" is the first field in the struct. If + the callback function is g_free, in particular, g_free_rcu can be + used. In the above case, one could have written simply: - call_rcu(foo_reclaim, g_free, rcu); + g_free_rcu(foo_reclaim, rcu); typeof(*p) atomic_rcu_read(p); @@ -44,7 +44,7 @@ #include "trace.h" #endif #include "exec/cpu-all.h" - +#include "qemu/rcu_queue.h" #include "exec/cputlb.h" #include "translate-all.h" @@ -58,7 +58,10 @@ #if !defined(CONFIG_USER_ONLY) static bool in_migration; -RAMList ram_list = { .blocks = QTAILQ_HEAD_INITIALIZER(ram_list.blocks) }; +/* ram_list is read under rcu_read_lock()/rcu_read_unlock(). Writes + * are protected by the ramlist lock. + */ +RAMList ram_list = { .blocks = QLIST_HEAD_INITIALIZER(ram_list.blocks) }; static MemoryRegion *system_memory; static MemoryRegion *system_io; @@ -115,6 +118,8 @@ struct PhysPageEntry { typedef PhysPageEntry Node[P_L2_SIZE]; typedef struct PhysPageMap { + struct rcu_head rcu; + unsigned sections_nb; unsigned sections_nb_alloc; unsigned nodes_nb; @@ -124,6 +129,8 @@ typedef struct PhysPageMap { } PhysPageMap; struct AddressSpaceDispatch { + struct rcu_head rcu; + /* This is a multi-level map on the physical address space. * The bottom level has pointers to MemoryRegionSections. */ @@ -315,6 +322,7 @@ bool memory_region_is_unassigned(MemoryRegion *mr) && mr != &io_mem_watch; } +/* Called from RCU critical section */ static MemoryRegionSection *address_space_lookup_region(AddressSpaceDispatch *d, hwaddr addr, bool resolve_subpage) @@ -330,6 +338,7 @@ static MemoryRegionSection *address_space_lookup_region(AddressSpaceDispatch *d, return section; } +/* Called from RCU critical section */ static MemoryRegionSection * address_space_translate_internal(AddressSpaceDispatch *d, hwaddr addr, hwaddr *xlat, hwaddr *plen, bool resolve_subpage) @@ -370,8 +379,10 @@ MemoryRegion *address_space_translate(AddressSpace *as, hwaddr addr, MemoryRegion *mr; hwaddr len = *plen; + rcu_read_lock(); for (;;) { - section = address_space_translate_internal(as->dispatch, addr, &addr, plen, true); + AddressSpaceDispatch *d = atomic_rcu_read(&as->dispatch); + section = address_space_translate_internal(d, addr, &addr, plen, true); mr = section->mr; if (!mr->iommu_ops) { @@ -397,15 +408,18 @@ MemoryRegion *address_space_translate(AddressSpace *as, hwaddr addr, *plen = len; *xlat = addr; + rcu_read_unlock(); return mr; } +/* Called from RCU critical section */ MemoryRegionSection * -address_space_translate_for_iotlb(AddressSpace *as, hwaddr addr, hwaddr *xlat, - hwaddr *plen) +address_space_translate_for_iotlb(CPUState *cpu, hwaddr addr, + hwaddr *xlat, hwaddr *plen) { MemoryRegionSection *section; - section = address_space_translate_internal(as->dispatch, addr, xlat, plen, false); + section = address_space_translate_internal(cpu->memory_dispatch, + addr, xlat, plen, false); assert(!section->mr->iommu_ops); return section; @@ -795,16 +809,16 @@ void cpu_abort(CPUState *cpu, const char *fmt, ...) } #if !defined(CONFIG_USER_ONLY) +/* Called from RCU critical section */ static RAMBlock *qemu_get_ram_block(ram_addr_t addr) { RAMBlock *block; - /* The list is protected by the iothread lock here. */ - block = ram_list.mru_block; + block = atomic_rcu_read(&ram_list.mru_block); if (block && addr - block->offset < block->max_length) { goto found; } - QTAILQ_FOREACH(block, &ram_list.blocks, next) { + QLIST_FOREACH_RCU(block, &ram_list.blocks, next) { if (addr - block->offset < block->max_length) { goto found; } @@ -814,6 +828,22 @@ static RAMBlock *qemu_get_ram_block(ram_addr_t addr) abort(); found: + /* It is safe to write mru_block outside the iothread lock. This + * is what happens: + * + * mru_block = xxx + * rcu_read_unlock() + * xxx removed from list + * rcu_read_lock() + * read mru_block + * mru_block = NULL; + * call_rcu(reclaim_ramblock, xxx); + * rcu_read_unlock() + * + * atomic_rcu_set is not needed here. The block was already published + * when it was placed into the list. Here we're just making an extra + * copy of the pointer. + */ ram_list.mru_block = block; return block; } @@ -827,10 +857,12 @@ static void tlb_reset_dirty_range_all(ram_addr_t start, ram_addr_t length) end = TARGET_PAGE_ALIGN(start + length); start &= TARGET_PAGE_MASK; + rcu_read_lock(); block = qemu_get_ram_block(start); assert(block == qemu_get_ram_block(end - 1)); start1 = (uintptr_t)ramblock_ptr(block, start - block->offset); cpu_tlb_reset_dirty_all(start1, length); + rcu_read_unlock(); } /* Note: start and end must be within the same ram block. */ @@ -851,6 +883,7 @@ static void cpu_physical_memory_set_dirty_tracking(bool enable) in_migration = enable; } +/* Called from RCU critical section */ hwaddr memory_region_section_get_iotlb(CPUState *cpu, MemoryRegionSection *section, target_ulong vaddr, @@ -1162,6 +1195,7 @@ error: } #endif +/* Called with the ramlist lock held. */ static ram_addr_t find_ram_offset(ram_addr_t size) { RAMBlock *block, *next_block; @@ -1169,15 +1203,16 @@ static ram_addr_t find_ram_offset(ram_addr_t size) assert(size != 0); /* it would hand out same offset multiple times */ - if (QTAILQ_EMPTY(&ram_list.blocks)) + if (QLIST_EMPTY_RCU(&ram_list.blocks)) { return 0; + } - QTAILQ_FOREACH(block, &ram_list.blocks, next) { + QLIST_FOREACH_RCU(block, &ram_list.blocks, next) { ram_addr_t end, next = RAM_ADDR_MAX; end = block->offset + block->max_length; - QTAILQ_FOREACH(next_block, &ram_list.blocks, next) { + QLIST_FOREACH_RCU(next_block, &ram_list.blocks, next) { if (next_block->offset >= end) { next = MIN(next, next_block->offset); } @@ -1202,9 +1237,11 @@ ram_addr_t last_ram_offset(void) RAMBlock *block; ram_addr_t last = 0; - QTAILQ_FOREACH(block, &ram_list.blocks, next) + rcu_read_lock(); + QLIST_FOREACH_RCU(block, &ram_list.blocks, next) { last = MAX(last, block->offset + block->max_length); - + } + rcu_read_unlock(); return last; } @@ -1224,11 +1261,14 @@ static void qemu_ram_setup_dump(void *addr, ram_addr_t size) } } +/* Called within an RCU critical section, or while the ramlist lock + * is held. + */ static RAMBlock *find_ram_block(ram_addr_t addr) { RAMBlock *block; - QTAILQ_FOREACH(block, &ram_list.blocks, next) { + QLIST_FOREACH_RCU(block, &ram_list.blocks, next) { if (block->offset == addr) { return block; } @@ -1237,11 +1277,13 @@ static RAMBlock *find_ram_block(ram_addr_t addr) return NULL; } +/* Called with iothread lock held. */ void qemu_ram_set_idstr(ram_addr_t addr, const char *name, DeviceState *dev) { - RAMBlock *new_block = find_ram_block(addr); - RAMBlock *block; + RAMBlock *new_block, *block; + rcu_read_lock(); + new_block = find_ram_block(addr); assert(new_block); assert(!new_block->idstr[0]); @@ -1254,25 +1296,32 @@ void qemu_ram_set_idstr(ram_addr_t addr, const char *name, DeviceState *dev) } pstrcat(new_block->idstr, sizeof(new_block->idstr), name); - /* This assumes the iothread lock is taken here too. */ - qemu_mutex_lock_ramlist(); - QTAILQ_FOREACH(block, &ram_list.blocks, next) { + QLIST_FOREACH_RCU(block, &ram_list.blocks, next) { if (block != new_block && !strcmp(block->idstr, new_block->idstr)) { fprintf(stderr, "RAMBlock \"%s\" already registered, abort!\n", new_block->idstr); abort(); } } - qemu_mutex_unlock_ramlist(); + rcu_read_unlock(); } +/* Called with iothread lock held. */ void qemu_ram_unset_idstr(ram_addr_t addr) { - RAMBlock *block = find_ram_block(addr); + RAMBlock *block; + + /* FIXME: arch_init.c assumes that this is not called throughout + * migration. Ignore the problem since hot-unplug during migration + * does not work anyway. + */ + rcu_read_lock(); + block = find_ram_block(addr); if (block) { memset(block->idstr, 0, sizeof(block->idstr)); } + rcu_read_unlock(); } static int memory_try_enable_merging(void *addr, size_t len) @@ -1331,11 +1380,11 @@ int qemu_ram_resize(ram_addr_t base, ram_addr_t newsize, Error **errp) static ram_addr_t ram_block_add(RAMBlock *new_block, Error **errp) { RAMBlock *block; + RAMBlock *last_block = NULL; ram_addr_t old_ram_size, new_ram_size; old_ram_size = last_ram_offset() >> TARGET_PAGE_BITS; - /* This assumes the iothread lock is taken here too. */ qemu_mutex_lock_ramlist(); new_block->offset = find_ram_offset(new_block->max_length); @@ -1357,19 +1406,27 @@ static ram_addr_t ram_block_add(RAMBlock *new_block, Error **errp) } } - /* Keep the list sorted from biggest to smallest block. */ - QTAILQ_FOREACH(block, &ram_list.blocks, next) { + /* Keep the list sorted from biggest to smallest block. Unlike QTAILQ, + * QLIST (which has an RCU-friendly variant) does not have insertion at + * tail, so save the last element in last_block. + */ + QLIST_FOREACH_RCU(block, &ram_list.blocks, next) { + last_block = block; if (block->max_length < new_block->max_length) { break; } } if (block) { - QTAILQ_INSERT_BEFORE(block, new_block, next); - } else { - QTAILQ_INSERT_TAIL(&ram_list.blocks, new_block, next); + QLIST_INSERT_BEFORE_RCU(block, new_block, next); + } else if (last_block) { + QLIST_INSERT_AFTER_RCU(last_block, new_block, next); + } else { /* list is empty */ + QLIST_INSERT_HEAD_RCU(&ram_list.blocks, new_block, next); } ram_list.mru_block = NULL; + /* Write list before version */ + smp_wmb(); ram_list.version++; qemu_mutex_unlock_ramlist(); @@ -1377,6 +1434,8 @@ static ram_addr_t ram_block_add(RAMBlock *new_block, Error **errp) if (new_ram_size > old_ram_size) { int i; + + /* ram_list.dirty_memory[] is protected by the iothread lock. */ for (i = 0; i < DIRTY_MEMORY_NUM; i++) { ram_list.dirty_memory[i] = bitmap_zero_extend(ram_list.dirty_memory[i], @@ -1507,49 +1566,55 @@ void qemu_ram_free_from_ptr(ram_addr_t addr) { RAMBlock *block; - /* This assumes the iothread lock is taken here too. */ qemu_mutex_lock_ramlist(); - QTAILQ_FOREACH(block, &ram_list.blocks, next) { + QLIST_FOREACH_RCU(block, &ram_list.blocks, next) { if (addr == block->offset) { - QTAILQ_REMOVE(&ram_list.blocks, block, next); + QLIST_REMOVE_RCU(block, next); ram_list.mru_block = NULL; + /* Write list before version */ + smp_wmb(); ram_list.version++; - g_free(block); + g_free_rcu(block, rcu); break; } } qemu_mutex_unlock_ramlist(); } +static void reclaim_ramblock(RAMBlock *block) +{ + if (block->flags & RAM_PREALLOC) { + ; + } else if (xen_enabled()) { + xen_invalidate_map_cache_entry(block->host); +#ifndef _WIN32 + } else if (block->fd >= 0) { + munmap(block->host, block->max_length); + close(block->fd); +#endif + } else { + qemu_anon_ram_free(block->host, block->max_length); + } + g_free(block); +} + void qemu_ram_free(ram_addr_t addr) { RAMBlock *block; - /* This assumes the iothread lock is taken here too. */ qemu_mutex_lock_ramlist(); - QTAILQ_FOREACH(block, &ram_list.blocks, next) { + QLIST_FOREACH_RCU(block, &ram_list.blocks, next) { if (addr == block->offset) { - QTAILQ_REMOVE(&ram_list.blocks, block, next); + QLIST_REMOVE_RCU(block, next); ram_list.mru_block = NULL; + /* Write list before version */ + smp_wmb(); ram_list.version++; - if (block->flags & RAM_PREALLOC) { - ; - } else if (xen_enabled()) { - xen_invalidate_map_cache_entry(block->host); -#ifndef _WIN32 - } else if (block->fd >= 0) { - munmap(block->host, block->max_length); - close(block->fd); -#endif - } else { - qemu_anon_ram_free(block->host, block->max_length); - } - g_free(block); + call_rcu(block, reclaim_ramblock, rcu); break; } } qemu_mutex_unlock_ramlist(); - } #ifndef _WIN32 @@ -1560,7 +1625,7 @@ void qemu_ram_remap(ram_addr_t addr, ram_addr_t length) int flags; void *area, *vaddr; - QTAILQ_FOREACH(block, &ram_list.blocks, next) { + QLIST_FOREACH_RCU(block, &ram_list.blocks, next) { offset = addr - block->offset; if (offset < block->max_length) { vaddr = ramblock_ptr(block, offset); @@ -1597,7 +1662,6 @@ void qemu_ram_remap(ram_addr_t addr, ram_addr_t length) memory_try_enable_merging(vaddr, length); qemu_ram_setup_dump(vaddr, length); } - return; } } } @@ -1605,49 +1669,78 @@ void qemu_ram_remap(ram_addr_t addr, ram_addr_t length) int qemu_get_ram_fd(ram_addr_t addr) { - RAMBlock *block = qemu_get_ram_block(addr); + RAMBlock *block; + int fd; - return block->fd; + rcu_read_lock(); + block = qemu_get_ram_block(addr); + fd = block->fd; + rcu_read_unlock(); + return fd; } void *qemu_get_ram_block_host_ptr(ram_addr_t addr) { - RAMBlock *block = qemu_get_ram_block(addr); + RAMBlock *block; + void *ptr; - return ramblock_ptr(block, 0); + rcu_read_lock(); + block = qemu_get_ram_block(addr); + ptr = ramblock_ptr(block, 0); + rcu_read_unlock(); + return ptr; } /* Return a host pointer to ram allocated with qemu_ram_alloc. - With the exception of the softmmu code in this file, this should - only be used for local memory (e.g. video ram) that the device owns, - and knows it isn't going to access beyond the end of the block. - - It should not be used for general purpose DMA. - Use cpu_physical_memory_map/cpu_physical_memory_rw instead. + * This should not be used for general purpose DMA. Use address_space_map + * or address_space_rw instead. For local memory (e.g. video ram) that the + * device owns, use memory_region_get_ram_ptr. + * + * By the time this function returns, the returned pointer is not protected + * by RCU anymore. If the caller is not within an RCU critical section and + * does not hold the iothread lock, it must have other means of protecting the + * pointer, such as a reference to the region that includes the incoming + * ram_addr_t. */ void *qemu_get_ram_ptr(ram_addr_t addr) { - RAMBlock *block = qemu_get_ram_block(addr); + RAMBlock *block; + void *ptr; - if (xen_enabled()) { + rcu_read_lock(); + block = qemu_get_ram_block(addr); + + if (xen_enabled() && block->host == NULL) { /* We need to check if the requested address is in the RAM * because we don't want to map the entire memory in QEMU. * In that case just map until the end of the page. */ if (block->offset == 0) { - return xen_map_cache(addr, 0, 0); - } else if (block->host == NULL) { - block->host = - xen_map_cache(block->offset, block->max_length, 1); + ptr = xen_map_cache(addr, 0, 0); + goto unlock; } + + block->host = xen_map_cache(block->offset, block->max_length, 1); } - return ramblock_ptr(block, addr - block->offset); + ptr = ramblock_ptr(block, addr - block->offset); + +unlock: + rcu_read_unlock(); + return ptr; } /* Return a host pointer to guest's ram. Similar to qemu_get_ram_ptr - * but takes a size argument */ + * but takes a size argument. + * + * By the time this function returns, the returned pointer is not protected + * by RCU anymore. If the caller is not within an RCU critical section and + * does not hold the iothread lock, it must have other means of protecting the + * pointer, such as a reference to the region that includes the incoming + * ram_addr_t. + */ static void *qemu_ram_ptr_length(ram_addr_t addr, hwaddr *size) { + void *ptr; if (*size == 0) { return NULL; } @@ -1655,12 +1748,14 @@ static void *qemu_ram_ptr_length(ram_addr_t addr, hwaddr *size) return xen_map_cache(addr, *size, 1); } else { RAMBlock *block; - - QTAILQ_FOREACH(block, &ram_list.blocks, next) { + rcu_read_lock(); + QLIST_FOREACH_RCU(block, &ram_list.blocks, next) { if (addr - block->offset < block->max_length) { if (addr - block->offset + *size > block->max_length) *size = block->max_length - addr + block->offset; - return ramblock_ptr(block, addr - block->offset); + ptr = ramblock_ptr(block, addr - block->offset); + rcu_read_unlock(); + return ptr; } } @@ -1670,23 +1765,35 @@ static void *qemu_ram_ptr_length(ram_addr_t addr, hwaddr *size) } /* Some of the softmmu routines need to translate from a host pointer - (typically a TLB entry) back to a ram offset. */ + * (typically a TLB entry) back to a ram offset. + * + * By the time this function returns, the returned pointer is not protected + * by RCU anymore. If the caller is not within an RCU critical section and + * does not hold the iothread lock, it must have other means of protecting the + * pointer, such as a reference to the region that includes the incoming + * ram_addr_t. + */ MemoryRegion *qemu_ram_addr_from_host(void *ptr, ram_addr_t *ram_addr) { RAMBlock *block; uint8_t *host = ptr; + MemoryRegion *mr; if (xen_enabled()) { + rcu_read_lock(); *ram_addr = xen_ram_addr_from_mapcache(ptr); - return qemu_get_ram_block(*ram_addr)->mr; + mr = qemu_get_ram_block(*ram_addr)->mr; + rcu_read_unlock(); + return mr; } - block = ram_list.mru_block; + rcu_read_lock(); + block = atomic_rcu_read(&ram_list.mru_block); if (block && block->host && host - block->host < block->max_length) { goto found; } - QTAILQ_FOREACH(block, &ram_list.blocks, next) { + QLIST_FOREACH_RCU(block, &ram_list.blocks, next) { /* This case append when the block is not mapped. */ if (block->host == NULL) { continue; @@ -1696,11 +1803,14 @@ MemoryRegion *qemu_ram_addr_from_host(void *ptr, ram_addr_t *ram_addr) } } + rcu_read_unlock(); return NULL; found: *ram_addr = block->offset + (host - block->host); - return block->mr; + mr = block->mr; + rcu_read_unlock(); + return mr; } static void notdirty_mem_write(void *opaque, hwaddr ram_addr, @@ -1961,9 +2071,12 @@ static uint16_t dummy_section(PhysPageMap *map, AddressSpace *as, return phys_section_add(map, §ion); } -MemoryRegion *iotlb_to_region(AddressSpace *as, hwaddr index) +MemoryRegion *iotlb_to_region(CPUState *cpu, hwaddr index) { - return as->dispatch->map.sections[index & ~TARGET_PAGE_MASK].mr; + AddressSpaceDispatch *d = atomic_rcu_read(&cpu->memory_dispatch); + MemoryRegionSection *sections = d->map.sections; + + return sections[index & ~TARGET_PAGE_MASK].mr; } static void io_mem_init(void) @@ -1997,6 +2110,12 @@ static void mem_begin(MemoryListener *listener) as->next_dispatch = d; } +static void address_space_dispatch_free(AddressSpaceDispatch *d) +{ + phys_sections_free(&d->map); + g_free(d); +} + static void mem_commit(MemoryListener *listener) { AddressSpace *as = container_of(listener, AddressSpace, dispatch_listener); @@ -2005,11 +2124,9 @@ static void mem_commit(MemoryListener *listener) phys_page_compact_all(next, next->map.nodes_nb); - as->dispatch = next; - + atomic_rcu_set(&as->dispatch, next); if (cur) { - phys_sections_free(&cur->map); - g_free(cur); + call_rcu(cur, address_space_dispatch_free, rcu); } } @@ -2026,7 +2143,7 @@ static void tcg_commit(MemoryListener *listener) if (cpu->tcg_as_listener != listener) { continue; } - tlb_flush(cpu, 1); + cpu_reload_memory_map(cpu); } } @@ -2068,8 +2185,10 @@ void address_space_destroy_dispatch(AddressSpace *as) { AddressSpaceDispatch *d = as->dispatch; - g_free(d); - as->dispatch = NULL; + atomic_rcu_set(&as->dispatch, NULL); + if (d) { + call_rcu(d, address_space_dispatch_free, rcu); + } } static void memory_map_init(void) @@ -2948,8 +3067,10 @@ void qemu_ram_foreach_block(RAMBlockIterFunc func, void *opaque) { RAMBlock *block; - QTAILQ_FOREACH(block, &ram_list.blocks, next) { + rcu_read_lock(); + QLIST_FOREACH_RCU(block, &ram_list.blocks, next) { func(block->host, block->offset, block->used_length, opaque); } + rcu_read_unlock(); } #endif @@ -16,6 +16,7 @@ #include "hmp.h" #include "net/net.h" #include "sysemu/char.h" +#include "sysemu/block-backend.h" #include "qemu/option.h" #include "qemu/timer.h" #include "qmp-commands.h" @@ -1720,14 +1721,14 @@ void hmp_chardev_remove(Monitor *mon, const QDict *qdict) void hmp_qemu_io(Monitor *mon, const QDict *qdict) { - BlockDriverState *bs; + BlockBackend *blk; const char* device = qdict_get_str(qdict, "device"); const char* command = qdict_get_str(qdict, "command"); Error *err = NULL; - bs = bdrv_find(device); - if (bs) { - qemuio_command(bs, command); + blk = blk_by_name(device); + if (blk) { + qemuio_command(blk, command); } else { error_set(&err, QERR_DEVICE_NOT_FOUND, device); } diff --git a/hw/9pfs/virtio-9p-synth.c b/hw/9pfs/virtio-9p-synth.c index e75aa8772e..a0ab9a86a9 100644 --- a/hw/9pfs/virtio-9p-synth.c +++ b/hw/9pfs/virtio-9p-synth.c @@ -18,7 +18,7 @@ #include "fsdev/qemu-fsdev.h" #include "virtio-9p-synth.h" #include "qemu/rcu.h" - +#include "qemu/rcu_queue.h" #include <sys/stat.h> /* Root node for synth file system */ diff --git a/hw/alpha/typhoon.c b/hw/alpha/typhoon.c index 53100061d2..62af946105 100644 --- a/hw/alpha/typhoon.c +++ b/hw/alpha/typhoon.c @@ -920,7 +920,7 @@ PCIBus *typhoon_init(ram_addr_t ram_size, ISABus **isa_bus, { qemu_irq isa_pci_irq, *isa_irqs; - *isa_bus = isa_bus_new(NULL, &s->pchip.reg_io); + *isa_bus = isa_bus_new(NULL, get_system_memory(), &s->pchip.reg_io); isa_pci_irq = *qemu_allocate_irqs(typhoon_set_isa_irq, s, 1); isa_irqs = i8259_init(*isa_bus, isa_pci_irq); isa_bus_irqs(*isa_bus, isa_irqs); diff --git a/hw/block/dataplane/virtio-blk.c b/hw/block/dataplane/virtio-blk.c index be957d1117..cd41478b08 100644 --- a/hw/block/dataplane/virtio-blk.c +++ b/hw/block/dataplane/virtio-blk.c @@ -16,7 +16,9 @@ #include "qemu/iov.h" #include "qemu/thread.h" #include "qemu/error-report.h" +#include "hw/virtio/virtio-access.h" #include "hw/virtio/dataplane/vring.h" +#include "hw/virtio/dataplane/vring-accessors.h" #include "sysemu/block-backend.h" #include "hw/virtio/virtio-blk.h" #include "virtio-blk.h" @@ -75,7 +77,7 @@ static void complete_request_vring(VirtIOBlockReq *req, unsigned char status) VirtIOBlockDataPlane *s = req->dev->dataplane; stb_p(&req->in->status, status); - vring_push(&req->dev->dataplane->vring, &req->elem, + vring_push(s->vdev, &req->dev->dataplane->vring, &req->elem, req->qiov.size + sizeof(*req->in)); /* Suppress notification to guest by BH and its scheduled diff --git a/hw/block/xen_disk.c b/hw/block/xen_disk.c index 21842a01e7..267d8a8c70 100644 --- a/hw/block/xen_disk.c +++ b/hw/block/xen_disk.c @@ -40,6 +40,8 @@ #include "xen_blkif.h" #include "sysemu/blockdev.h" #include "sysemu/block-backend.h" +#include "qapi/qmp/qdict.h" +#include "qapi/qmp/qstring.h" /* ------------------------------------------------------------- */ @@ -897,30 +899,23 @@ static int blk_connect(struct XenDevice *xendev) blkdev->dinfo = drive_get(IF_XEN, 0, index); if (!blkdev->dinfo) { Error *local_err = NULL; - BlockBackend *blk; - BlockDriver *drv; - BlockDriverState *bs; + QDict *options = NULL; - /* setup via xenbus -> create new block driver instance */ - xen_be_printf(&blkdev->xendev, 2, "create new bdrv (xenbus setup)\n"); - blk = blk_new_with_bs(blkdev->dev, NULL); - if (!blk) { - return -1; + if (strcmp(blkdev->fileproto, "<unset>")) { + options = qdict_new(); + qdict_put(options, "driver", qstring_from_str(blkdev->fileproto)); } - blkdev->blk = blk; - bs = blk_bs(blk); - drv = bdrv_find_whitelisted_format(blkdev->fileproto, readonly); - if (bdrv_open(&bs, blkdev->filename, NULL, NULL, qflags, - drv, &local_err) != 0) { + /* setup via xenbus -> create new block driver instance */ + xen_be_printf(&blkdev->xendev, 2, "create new bdrv (xenbus setup)\n"); + blkdev->blk = blk_new_open(blkdev->dev, blkdev->filename, NULL, options, + qflags, &local_err); + if (!blkdev->blk) { xen_be_printf(&blkdev->xendev, 0, "error: %s\n", error_get_pretty(local_err)); error_free(local_err); - blk_unref(blk); - blkdev->blk = NULL; return -1; } - assert(bs == blk_bs(blk)); } else { /* setup via qemu cmdline -> already setup for us */ xen_be_printf(&blkdev->xendev, 2, "get configured bdrv (cmdline setup)\n"); diff --git a/hw/core/qdev.c b/hw/core/qdev.c index 2eacac0787..44c6b93727 100644 --- a/hw/core/qdev.c +++ b/hw/core/qdev.c @@ -818,6 +818,13 @@ static char *qdev_get_fw_dev_path_from_handler(BusState *bus, DeviceState *dev) return d; } +char *qdev_get_own_fw_dev_path_from_handler(BusState *bus, DeviceState *dev) +{ + Object *obj = OBJECT(dev); + + return fw_path_provider_try_get_dev_path(obj, bus, dev); +} + static int qdev_get_fw_dev_path_helper(DeviceState *dev, char *p, int size) { int l = 0; diff --git a/hw/display/cirrus_vga.c b/hw/display/cirrus_vga.c index 3a53f20392..ec923c8c4b 100644 --- a/hw/display/cirrus_vga.c +++ b/hw/display/cirrus_vga.c @@ -2907,7 +2907,7 @@ static void cirrus_init_common(CirrusVGAState *s, Object *owner, bank, 1); } memory_region_add_subregion_overlap(system_memory, - isa_mem_base + 0x000a0000, + 0x000a0000, &s->low_mem_container, 1); memory_region_set_coalescing(&s->low_mem); diff --git a/hw/display/vga-isa.c b/hw/display/vga-isa.c index 2b480bd44d..7f3c98941b 100644 --- a/hw/display/vga-isa.c +++ b/hw/display/vga-isa.c @@ -64,7 +64,7 @@ static void vga_isa_realizefn(DeviceState *dev, Error **errp) isa_register_portio_list(isadev, 0x1ce, vbe_ports, s, "vbe"); } memory_region_add_subregion_overlap(isa_address_space(isadev), - isa_mem_base + 0x000a0000, + 0x000a0000, vga_io_memory, 1); memory_region_set_coalescing(vga_io_memory); s->con = graphic_console_init(DEVICE(dev), 0, s->hw_ops, s); diff --git a/hw/display/vga.c b/hw/display/vga.c index 9c62fbf488..c8c49abc6e 100644 --- a/hw/display/vga.c +++ b/hw/display/vga.c @@ -177,7 +177,6 @@ static void vga_update_memory_access(VGACommonState *s) size = 0x8000; break; } - base += isa_mem_base; memory_region_init_alias(&s->chain4_alias, memory_region_owner(&s->vram), "vga.chain4", &s->vram, offset, size); memory_region_add_subregion_overlap(s->legacy_address_space, base, @@ -2218,7 +2217,7 @@ void vga_init(VGACommonState *s, Object *obj, MemoryRegion *address_space, vga_io_memory = vga_init_io(s, obj, &vga_ports, &vbe_ports); memory_region_add_subregion_overlap(address_space, - isa_mem_base + 0x000a0000, + 0x000a0000, vga_io_memory, 1); memory_region_set_coalescing(vga_io_memory); diff --git a/hw/i386/intel_iommu.c b/hw/i386/intel_iommu.c index 0a4282adf3..7da70ff349 100644 --- a/hw/i386/intel_iommu.c +++ b/hw/i386/intel_iommu.c @@ -745,6 +745,9 @@ static inline bool vtd_is_interrupt_addr(hwaddr addr) /* Map dev to context-entry then do a paging-structures walk to do a iommu * translation. + * + * Called from RCU critical section. + * * @bus_num: The bus number * @devfn: The devfn, which is the combined of device and function number * @is_write: The access is a write operation diff --git a/hw/i386/pc_piix.c b/hw/i386/pc_piix.c index 38b42b05f8..de75cf0e87 100644 --- a/hw/i386/pc_piix.c +++ b/hw/i386/pc_piix.c @@ -208,7 +208,7 @@ static void pc_init1(MachineState *machine, } else { pci_bus = NULL; i440fx_state = NULL; - isa_bus = isa_bus_new(NULL, system_io); + isa_bus = isa_bus_new(NULL, get_system_memory(), system_io); no_hpet = 1; } isa_bus_irqs(isa_bus, gsi); diff --git a/hw/isa/i82378.c b/hw/isa/i82378.c index a7d9aa6da1..0dc440df5c 100644 --- a/hw/isa/i82378.c +++ b/hw/isa/i82378.c @@ -75,7 +75,8 @@ static int i82378_initfn(PCIDevice *pci) pci_config_set_interrupt_pin(pci_conf, 1); /* interrupt pin 0 */ - isabus = isa_bus_new(dev, pci_address_space_io(pci)); + isabus = isa_bus_new(dev, get_system_memory(), + pci_address_space_io(pci)); /* This device has: 2 82C59 (irq) diff --git a/hw/isa/isa-bus.c b/hw/isa/isa-bus.c index cc85e538b1..825aa627df 100644 --- a/hw/isa/isa-bus.c +++ b/hw/isa/isa-bus.c @@ -21,10 +21,8 @@ #include "hw/sysbus.h" #include "sysemu/sysemu.h" #include "hw/isa/isa.h" -#include "exec/address-spaces.h" static ISABus *isabus; -hwaddr isa_mem_base = 0; static void isabus_dev_print(Monitor *mon, DeviceState *dev, int indent); static char *isabus_get_fw_dev_path(DeviceState *dev); @@ -44,7 +42,8 @@ static const TypeInfo isa_bus_info = { .class_init = isa_bus_class_init, }; -ISABus *isa_bus_new(DeviceState *dev, MemoryRegion *address_space_io) +ISABus *isa_bus_new(DeviceState *dev, MemoryRegion* address_space, + MemoryRegion *address_space_io) { if (isabus) { fprintf(stderr, "Can't create a second ISA bus\n"); @@ -56,6 +55,7 @@ ISABus *isa_bus_new(DeviceState *dev, MemoryRegion *address_space_io) } isabus = ISA_BUS(qbus_create(TYPE_ISA_BUS, dev, NULL)); + isabus->address_space = address_space; isabus->address_space_io = address_space_io; return isabus; } @@ -250,7 +250,11 @@ static char *isabus_get_fw_dev_path(DeviceState *dev) MemoryRegion *isa_address_space(ISADevice *dev) { - return get_system_memory(); + if (dev) { + return isa_bus_from_device(dev)->address_space; + } + + return isabus->address_space; } MemoryRegion *isa_address_space_io(ISADevice *dev) diff --git a/hw/isa/lpc_ich9.c b/hw/isa/lpc_ich9.c index 530b074551..231de74414 100644 --- a/hw/isa/lpc_ich9.c +++ b/hw/isa/lpc_ich9.c @@ -575,7 +575,7 @@ static int ich9_lpc_init(PCIDevice *d) ICH9LPCState *lpc = ICH9_LPC_DEVICE(d); ISABus *isa_bus; - isa_bus = isa_bus_new(&d->qdev, get_system_io()); + isa_bus = isa_bus_new(DEVICE(d), get_system_memory(), get_system_io()); pci_set_long(d->wmask + ICH9_LPC_PMBASE, ICH9_LPC_PMBASE_BASE_ADDRESS_MASK); diff --git a/hw/isa/piix4.c b/hw/isa/piix4.c index 1aa17d7cf6..a9916df20a 100644 --- a/hw/isa/piix4.c +++ b/hw/isa/piix4.c @@ -86,7 +86,8 @@ static int piix4_initfn(PCIDevice *dev) { PIIX4State *d = DO_UPCAST(PIIX4State, dev, dev); - isa_bus_new(&d->dev.qdev, pci_address_space_io(dev)); + isa_bus_new(DEVICE(d), pci_address_space(dev), + pci_address_space_io(dev)); piix4_dev = &d->dev; qemu_register_reset(piix4_reset, d); return 0; diff --git a/hw/isa/vt82c686.c b/hw/isa/vt82c686.c index 17510ce528..b223526bde 100644 --- a/hw/isa/vt82c686.c +++ b/hw/isa/vt82c686.c @@ -429,7 +429,8 @@ static int vt82c686b_initfn(PCIDevice *d) uint8_t *wmask; int i; - isa_bus = isa_bus_new(&d->qdev, pci_address_space_io(d)); + isa_bus = isa_bus_new(DEVICE(d), get_system_memory(), + pci_address_space_io(d)); pci_conf = d->config; pci_config_set_prog_interface(pci_conf, 0x0); diff --git a/hw/mips/gt64xxx_pci.c b/hw/mips/gt64xxx_pci.c index 1f2fe5fab9..10fcca33f8 100644 --- a/hw/mips/gt64xxx_pci.c +++ b/hw/mips/gt64xxx_pci.c @@ -239,7 +239,11 @@ typedef struct GT64120State { uint32_t regs[GT_REGS]; PCI_MAPPING_ENTRY(PCI0IO); + PCI_MAPPING_ENTRY(PCI0M0); + PCI_MAPPING_ENTRY(PCI0M1); PCI_MAPPING_ENTRY(ISD); + MemoryRegion pci0_mem; + AddressSpace pci0_mem_as; } GT64120State; /* Adjust range to avoid touching space which isn't mappable via PCI */ @@ -290,25 +294,63 @@ static void gt64120_isd_mapping(GT64120State *s) static void gt64120_pci_mapping(GT64120State *s) { - /* Update IO mapping */ - if ((s->regs[GT_PCI0IOLD] & 0x7f) <= s->regs[GT_PCI0IOHD]) - { - /* Unmap old IO address */ - if (s->PCI0IO_length) - { - memory_region_del_subregion(get_system_memory(), &s->PCI0IO_mem); - object_unparent(OBJECT(&s->PCI0IO_mem)); - } - /* Map new IO address */ - s->PCI0IO_start = s->regs[GT_PCI0IOLD] << 21; - s->PCI0IO_length = ((s->regs[GT_PCI0IOHD] + 1) - (s->regs[GT_PCI0IOLD] & 0x7f)) << 21; - isa_mem_base = s->PCI0IO_start; - if (s->PCI0IO_length) { - memory_region_init_alias(&s->PCI0IO_mem, OBJECT(s), "isa_mmio", - get_system_io(), 0, s->PCI0IO_length); - memory_region_add_subregion(get_system_memory(), s->PCI0IO_start, - &s->PCI0IO_mem); - } + /* Update PCI0IO mapping */ + if ((s->regs[GT_PCI0IOLD] & 0x7f) <= s->regs[GT_PCI0IOHD]) { + /* Unmap old IO address */ + if (s->PCI0IO_length) { + memory_region_del_subregion(get_system_memory(), &s->PCI0IO_mem); + object_unparent(OBJECT(&s->PCI0IO_mem)); + } + /* Map new IO address */ + s->PCI0IO_start = s->regs[GT_PCI0IOLD] << 21; + s->PCI0IO_length = ((s->regs[GT_PCI0IOHD] + 1) - + (s->regs[GT_PCI0IOLD] & 0x7f)) << 21; + if (s->PCI0IO_length) { + memory_region_init_alias(&s->PCI0IO_mem, OBJECT(s), "pci0-io", + get_system_io(), 0, s->PCI0IO_length); + memory_region_add_subregion(get_system_memory(), s->PCI0IO_start, + &s->PCI0IO_mem); + } + } + + /* Update PCI0M0 mapping */ + if ((s->regs[GT_PCI0M0LD] & 0x7f) <= s->regs[GT_PCI0M0HD]) { + /* Unmap old MEM address */ + if (s->PCI0M0_length) { + memory_region_del_subregion(get_system_memory(), &s->PCI0M0_mem); + object_unparent(OBJECT(&s->PCI0M0_mem)); + } + /* Map new mem address */ + s->PCI0M0_start = s->regs[GT_PCI0M0LD] << 21; + s->PCI0M0_length = ((s->regs[GT_PCI0M0HD] + 1) - + (s->regs[GT_PCI0M0LD] & 0x7f)) << 21; + if (s->PCI0M0_length) { + memory_region_init_alias(&s->PCI0M0_mem, OBJECT(s), "pci0-mem0", + &s->pci0_mem, s->PCI0M0_start, + s->PCI0M0_length); + memory_region_add_subregion(get_system_memory(), s->PCI0M0_start, + &s->PCI0M0_mem); + } + } + + /* Update PCI0M1 mapping */ + if ((s->regs[GT_PCI0M1LD] & 0x7f) <= s->regs[GT_PCI0M1HD]) { + /* Unmap old MEM address */ + if (s->PCI0M1_length) { + memory_region_del_subregion(get_system_memory(), &s->PCI0M1_mem); + object_unparent(OBJECT(&s->PCI0M1_mem)); + } + /* Map new mem address */ + s->PCI0M1_start = s->regs[GT_PCI0M1LD] << 21; + s->PCI0M1_length = ((s->regs[GT_PCI0M1HD] + 1) - + (s->regs[GT_PCI0M1LD] & 0x7f)) << 21; + if (s->PCI0M1_length) { + memory_region_init_alias(&s->PCI0M1_mem, OBJECT(s), "pci0-mem1", + &s->pci0_mem, s->PCI0M1_start, + s->PCI0M1_length); + memory_region_add_subregion(get_system_memory(), s->PCI0M1_start, + &s->PCI0M1_mem); + } } } @@ -363,10 +405,12 @@ static void gt64120_writel (void *opaque, hwaddr addr, case GT_PCI0M0LD: s->regs[GT_PCI0M0LD] = val & 0x00007fff; s->regs[GT_PCI0M0REMAP] = val & 0x000007ff; + gt64120_pci_mapping(s); break; case GT_PCI0M1LD: s->regs[GT_PCI0M1LD] = val & 0x00007fff; s->regs[GT_PCI0M1REMAP] = val & 0x000007ff; + gt64120_pci_mapping(s); break; case GT_PCI1IOLD: s->regs[GT_PCI1IOLD] = val & 0x00007fff; @@ -380,12 +424,12 @@ static void gt64120_writel (void *opaque, hwaddr addr, s->regs[GT_PCI1M1LD] = val & 0x00007fff; s->regs[GT_PCI1M1REMAP] = val & 0x000007ff; break; + case GT_PCI0M0HD: + case GT_PCI0M1HD: case GT_PCI0IOHD: s->regs[saddr] = val & 0x0000007f; gt64120_pci_mapping(s); break; - case GT_PCI0M0HD: - case GT_PCI0M1HD: case GT_PCI1IOHD: case GT_PCI1M0HD: case GT_PCI1M1HD: @@ -1124,10 +1168,12 @@ PCIBus *gt64120_register(qemu_irq *pic) qdev_init_nofail(dev); d = GT64120_PCI_HOST_BRIDGE(dev); phb = PCI_HOST_BRIDGE(dev); + memory_region_init(&d->pci0_mem, OBJECT(dev), "pci0-mem", UINT32_MAX); + address_space_init(&d->pci0_mem_as, &d->pci0_mem, "pci0-mem"); phb->bus = pci_register_bus(dev, "pci", gt64120_pci_set_irq, gt64120_pci_map_irq, pic, - get_system_memory(), + &d->pci0_mem, get_system_io(), PCI_DEVFN(18, 0), 4, TYPE_PCI_BUS); memory_region_init_io(&d->ISD_mem, OBJECT(dev), &isd_mem_ops, d, "isd-mem", 0x1000); @@ -1142,11 +1188,6 @@ static int gt64120_init(SysBusDevice *dev) s = GT64120_PCI_HOST_BRIDGE(dev); - /* FIXME: This value is computed from registers during reset, but some - devices (e.g. VGA card) need to know it when they are registered. - This also mean that changing the register to change the mapping - does not fully work. */ - isa_mem_base = 0x10000000; qemu_register_reset(gt64120_reset, s); return 0; } diff --git a/hw/mips/mips_jazz.c b/hw/mips/mips_jazz.c index 3f33093fd9..ef5dd7d5ab 100644 --- a/hw/mips/mips_jazz.c +++ b/hw/mips/mips_jazz.c @@ -60,13 +60,16 @@ static void main_cpu_reset(void *opaque) static uint64_t rtc_read(void *opaque, hwaddr addr, unsigned size) { - return cpu_inw(0x71); + uint8_t val; + address_space_read(&address_space_memory, 0x90000071, &val, 1); + return val; } static void rtc_write(void *opaque, hwaddr addr, uint64_t val, unsigned size) { - cpu_outw(0x71, val & 0xff); + uint8_t buf = val & 0xff; + address_space_write(&address_space_memory, 0x90000071, &buf, 1); } static const MemoryRegionOps rtc_ops = { @@ -120,12 +123,11 @@ static void mips_jazz_do_unassigned_access(CPUState *cpu, hwaddr addr, (*real_do_unassigned_access)(cpu, addr, is_write, is_exec, opaque, size); } -static void mips_jazz_init(MemoryRegion *address_space, - MemoryRegion *address_space_io, - ram_addr_t ram_size, - const char *cpu_model, +static void mips_jazz_init(MachineState *machine, enum jazz_model_e jazz_model) { + MemoryRegion *address_space = get_system_memory(); + const char *cpu_model = machine->cpu_model; char *filename; int bios_size, n; MIPSCPU *cpu; @@ -134,7 +136,8 @@ static void mips_jazz_init(MemoryRegion *address_space, qemu_irq *rc4030, *i8259; rc4030_dma *dmas; void* rc4030_opaque; - MemoryRegion *isa = g_new(MemoryRegion, 1); + MemoryRegion *isa_mem = g_new(MemoryRegion, 1); + MemoryRegion *isa_io = g_new(MemoryRegion, 1); MemoryRegion *rtc = g_new(MemoryRegion, 1); MemoryRegion *i8042 = g_new(MemoryRegion, 1); MemoryRegion *dma_dummy = g_new(MemoryRegion, 1); @@ -179,7 +182,8 @@ static void mips_jazz_init(MemoryRegion *address_space, cc->do_unassigned_access = mips_jazz_do_unassigned_access; /* allocate RAM */ - memory_region_init_ram(ram, NULL, "mips_jazz.ram", ram_size, &error_abort); + memory_region_init_ram(ram, NULL, "mips_jazz.ram", machine->ram_size, + &error_abort); vmstate_register_ram_global(ram); memory_region_add_subregion(address_space, 0, ram); @@ -218,8 +222,14 @@ static void mips_jazz_init(MemoryRegion *address_space, memory_region_init_io(dma_dummy, NULL, &dma_dummy_ops, NULL, "dummy_dma", 0x1000); memory_region_add_subregion(address_space, 0x8000d000, dma_dummy); + /* ISA bus: IO space at 0x90000000, mem space at 0x91000000 */ + memory_region_init(isa_io, NULL, "isa-io", 0x00010000); + memory_region_init(isa_mem, NULL, "isa-mem", 0x01000000); + memory_region_add_subregion(address_space, 0x90000000, isa_io); + memory_region_add_subregion(address_space, 0x91000000, isa_mem); + isa_bus = isa_bus_new(NULL, isa_mem, isa_io); + /* ISA devices */ - isa_bus = isa_bus_new(NULL, address_space_io); i8259 = i8259_init(isa_bus, env->irq[4]); isa_bus_irqs(isa_bus, i8259); cpu_exit_irq = qemu_allocate_irqs(cpu_request_exit, NULL, 1); @@ -227,12 +237,6 @@ static void mips_jazz_init(MemoryRegion *address_space, pit = pit_init(isa_bus, 0x40, 0, NULL); pcspk_init(isa_bus, pit); - /* ISA IO space at 0x90000000 */ - memory_region_init_alias(isa, NULL, "isa_mmio", - get_system_io(), 0, 0x01000000); - memory_region_add_subregion(address_space, 0x90000000, isa); - isa_mem_base = 0x11000000; - /* Video card */ switch (jazz_model) { case JAZZ_MAGNUM: @@ -333,19 +337,13 @@ static void mips_jazz_init(MemoryRegion *address_space, static void mips_magnum_init(MachineState *machine) { - ram_addr_t ram_size = machine->ram_size; - const char *cpu_model = machine->cpu_model; - mips_jazz_init(get_system_memory(), get_system_io(), - ram_size, cpu_model, JAZZ_MAGNUM); + mips_jazz_init(machine, JAZZ_MAGNUM); } static void mips_pica61_init(MachineState *machine) { - ram_addr_t ram_size = machine->ram_size; - const char *cpu_model = machine->cpu_model; - mips_jazz_init(get_system_memory(), get_system_io(), - ram_size, cpu_model, JAZZ_PICA61); + mips_jazz_init(machine, JAZZ_PICA61); } static QEMUMachine mips_magnum_machine = { diff --git a/hw/mips/mips_r4k.c b/hw/mips/mips_r4k.c index a7fe0ceadf..3e90e273dc 100644 --- a/hw/mips/mips_r4k.c +++ b/hw/mips/mips_r4k.c @@ -165,7 +165,8 @@ void mips_r4k_init(MachineState *machine) MemoryRegion *ram = g_new(MemoryRegion, 1); MemoryRegion *bios; MemoryRegion *iomem = g_new(MemoryRegion, 1); - MemoryRegion *isa = g_new(MemoryRegion, 1); + MemoryRegion *isa_io = g_new(MemoryRegion, 1); + MemoryRegion *isa_mem = g_new(MemoryRegion, 1); int bios_size; MIPSCPU *cpu; CPUMIPSState *env; @@ -267,20 +268,20 @@ void mips_r4k_init(MachineState *machine) cpu_mips_irq_init_cpu(env); cpu_mips_clock_init(env); + /* ISA bus: IO space at 0x14000000, mem space at 0x10000000 */ + memory_region_init_alias(isa_io, NULL, "isa-io", + get_system_io(), 0, 0x00010000); + memory_region_init(isa_mem, NULL, "isa-mem", 0x01000000); + memory_region_add_subregion(get_system_memory(), 0x14000000, isa_io); + memory_region_add_subregion(get_system_memory(), 0x10000000, isa_mem); + isa_bus = isa_bus_new(NULL, isa_mem, get_system_io()); + /* The PIC is attached to the MIPS CPU INT0 pin */ - isa_bus = isa_bus_new(NULL, get_system_io()); i8259 = i8259_init(isa_bus, env->irq[2]); isa_bus_irqs(isa_bus, i8259); rtc_init(isa_bus, 2000, NULL); - /* Register 64 KB of ISA IO space at 0x14000000 */ - memory_region_init_alias(isa, NULL, "isa_mmio", - get_system_io(), 0, 0x00010000); - memory_region_add_subregion(get_system_memory(), 0x14000000, isa); - - isa_mem_base = 0x10000000; - pit = pit_init(isa_bus, 0x40, 0, NULL); for(i = 0; i < MAX_SERIAL_PORTS; i++) { diff --git a/hw/pci-bridge/pci_bridge_dev.c b/hw/pci-bridge/pci_bridge_dev.c index 252ea5eb53..36f73e1f8b 100644 --- a/hw/pci-bridge/pci_bridge_dev.c +++ b/hw/pci-bridge/pci_bridge_dev.c @@ -97,6 +97,11 @@ static void pci_bridge_dev_exitfn(PCIDevice *dev) pci_bridge_exitfn(dev); } +static void pci_bridge_dev_instance_finalize(Object *obj) +{ + shpc_free(PCI_DEVICE(obj)); +} + static void pci_bridge_dev_write_config(PCIDevice *d, uint32_t address, uint32_t val, int len) { @@ -154,10 +159,11 @@ static void pci_bridge_dev_class_init(ObjectClass *klass, void *data) } static const TypeInfo pci_bridge_dev_info = { - .name = TYPE_PCI_BRIDGE_DEV, - .parent = TYPE_PCI_BRIDGE, - .instance_size = sizeof(PCIBridgeDev), - .class_init = pci_bridge_dev_class_init, + .name = TYPE_PCI_BRIDGE_DEV, + .parent = TYPE_PCI_BRIDGE, + .instance_size = sizeof(PCIBridgeDev), + .class_init = pci_bridge_dev_class_init, + .instance_finalize = pci_bridge_dev_instance_finalize, .interfaces = (InterfaceInfo[]) { { TYPE_HOTPLUG_HANDLER }, { } diff --git a/hw/pci-host/apb.c b/hw/pci-host/apb.c index f573875baf..832b6c7248 100644 --- a/hw/pci-host/apb.c +++ b/hw/pci-host/apb.c @@ -205,6 +205,7 @@ static AddressSpace *pbm_pci_dma_iommu(PCIBus *bus, void *opaque, int devfn) return &is->iommu_as; } +/* Called from RCU critical section */ static IOMMUTLBEntry pbm_translate_iommu(MemoryRegion *iommu, hwaddr addr, bool is_write) { diff --git a/hw/pci-host/piix.c b/hw/pci-host/piix.c index 1530038cb0..8ea718e18e 100644 --- a/hw/pci-host/piix.c +++ b/hw/pci-host/piix.c @@ -635,7 +635,8 @@ static int piix3_initfn(PCIDevice *dev) { PIIX3State *d = DO_UPCAST(PIIX3State, dev, dev); - isa_bus_new(DEVICE(d), pci_address_space_io(dev)); + isa_bus_new(DEVICE(d), get_system_memory(), + pci_address_space_io(dev)); memory_region_init_io(&d->rcr_mem, OBJECT(dev), &rcr_ops, d, "piix3-reset-control", 1); diff --git a/hw/pci/pcie_host.c b/hw/pci/pcie_host.c index dfb4a2b505..d8afba863e 100644 --- a/hw/pci/pcie_host.c +++ b/hw/pci/pcie_host.c @@ -88,6 +88,8 @@ static void pcie_host_init(Object *obj) PCIExpressHost *e = PCIE_HOST_BRIDGE(obj); e->base_addr = PCIE_BASE_ADDR_UNMAPPED; + memory_region_init_io(&e->mmio, OBJECT(e), &pcie_mmcfg_ops, e, "pcie-mmcfg-mmio", + PCIE_MMCFG_SIZE_MAX); } void pcie_host_mmcfg_unmap(PCIExpressHost *e) @@ -104,8 +106,7 @@ void pcie_host_mmcfg_init(PCIExpressHost *e, uint32_t size) assert(size >= PCIE_MMCFG_SIZE_MIN); assert(size <= PCIE_MMCFG_SIZE_MAX); e->size = size; - memory_region_init_io(&e->mmio, OBJECT(e), &pcie_mmcfg_ops, e, - "pcie-mmcfg", e->size); + memory_region_set_size(&e->mmio, e->size); } void pcie_host_mmcfg_map(PCIExpressHost *e, hwaddr addr, @@ -121,10 +122,12 @@ void pcie_host_mmcfg_update(PCIExpressHost *e, hwaddr addr, uint32_t size) { + memory_region_transaction_begin(); pcie_host_mmcfg_unmap(e); if (enable) { pcie_host_mmcfg_map(e, addr, size); } + memory_region_transaction_commit(); } static const TypeInfo pcie_host_type_info = { diff --git a/hw/pci/shpc.c b/hw/pci/shpc.c index 27c496e8c3..5fd7f4bbb7 100644 --- a/hw/pci/shpc.c +++ b/hw/pci/shpc.c @@ -663,13 +663,22 @@ void shpc_cleanup(PCIDevice *d, MemoryRegion *bar) SHPCDevice *shpc = d->shpc; d->cap_present &= ~QEMU_PCI_CAP_SHPC; memory_region_del_subregion(bar, &shpc->mmio); - object_unparent(OBJECT(&shpc->mmio)); /* TODO: cleanup config space changes? */ +} + +void shpc_free(PCIDevice *d) +{ + SHPCDevice *shpc = d->shpc; + if (!shpc) { + return; + } + object_unparent(OBJECT(&shpc->mmio)); g_free(shpc->config); g_free(shpc->cmask); g_free(shpc->wmask); g_free(shpc->w1cmask); g_free(shpc); + d->shpc = NULL; } void shpc_cap_write_config(PCIDevice *d, uint32_t addr, uint32_t val, int l) diff --git a/hw/ppc/mac_newworld.c b/hw/ppc/mac_newworld.c index c3770121e2..624b4ab50b 100644 --- a/hw/ppc/mac_newworld.c +++ b/hw/ppc/mac_newworld.c @@ -420,11 +420,14 @@ static void ppc_core99_init(MachineState *machine) if (machine->usb) { pci_create_simple(pci_bus, -1, "pci-ohci"); + /* U3 needs to use USB for input because Linux doesn't support via-cuda on PPC64 */ if (machine_arch == ARCH_MAC99_U3) { - usbdevice_create("keyboard"); - usbdevice_create("mouse"); + USBBus *usb_bus = usb_bus_find(-1); + + usb_create_simple(usb_bus, "usb-kbd"); + usb_create_simple(usb_bus, "usb-mouse"); } } diff --git a/hw/ppc/spapr.c b/hw/ppc/spapr.c index 812d03054d..a82a0f99b3 100644 --- a/hw/ppc/spapr.c +++ b/hw/ppc/spapr.c @@ -1533,9 +1533,12 @@ static void ppc_spapr_init(MachineState *machine) if (machine->usb) { pci_create_simple(phb->bus, -1, "pci-ohci"); + if (spapr->has_graphics) { - usbdevice_create("keyboard"); - usbdevice_create("mouse"); + USBBus *usb_bus = usb_bus_find(-1); + + usb_create_simple(usb_bus, "usb-kbd"); + usb_create_simple(usb_bus, "usb-mouse"); } } diff --git a/hw/ppc/spapr_iommu.c b/hw/ppc/spapr_iommu.c index da474740c0..ba003da39e 100644 --- a/hw/ppc/spapr_iommu.c +++ b/hw/ppc/spapr_iommu.c @@ -59,6 +59,7 @@ static sPAPRTCETable *spapr_tce_find_by_liobn(uint32_t liobn) return NULL; } +/* Called from RCU critical section */ static IOMMUTLBEntry spapr_tce_translate_iommu(MemoryRegion *iommu, hwaddr addr, bool is_write) { diff --git a/hw/scsi/vhost-scsi.c b/hw/scsi/vhost-scsi.c index dcb2bc5a6e..e30ff84c0c 100644 --- a/hw/scsi/vhost-scsi.c +++ b/hw/scsi/vhost-scsi.c @@ -24,6 +24,7 @@ #include "hw/virtio/virtio-scsi.h" #include "hw/virtio/virtio-bus.h" #include "hw/virtio/virtio-access.h" +#include "hw/fw-path-provider.h" /* Features supported by host kernel. */ static const int kernel_feature_bits[] = { @@ -250,6 +251,12 @@ static void vhost_scsi_realize(DeviceState *dev, Error **errp) return; } + /* At present, channel and lun both are 0 for bootable vhost-scsi disk */ + s->channel = 0; + s->lun = 0; + /* Note: we can also get the minimum tpgt from kernel */ + s->target = vs->conf.boot_tpgt; + error_setg(&s->migration_blocker, "vhost-scsi does not support migration"); migrate_add_blocker(s->migration_blocker); @@ -271,6 +278,19 @@ static void vhost_scsi_unrealize(DeviceState *dev, Error **errp) virtio_scsi_common_unrealize(dev, errp); } +/* + * Implementation of an interface to adjust firmware path + * for the bootindex property handling. + */ +static char *vhost_scsi_get_fw_dev_path(FWPathProvider *p, BusState *bus, + DeviceState *dev) +{ + VHostSCSI *s = VHOST_SCSI(dev); + /* format: channel@channel/vhost-scsi@target,lun */ + return g_strdup_printf("channel@%x/%s@%x,%x", s->channel, + qdev_fw_name(dev), s->target, s->lun); +} + static Property vhost_scsi_properties[] = { DEFINE_VHOST_SCSI_PROPERTIES(VHostSCSI, parent_obj.conf), DEFINE_PROP_END_OF_LIST(), @@ -280,6 +300,7 @@ static void vhost_scsi_class_init(ObjectClass *klass, void *data) { DeviceClass *dc = DEVICE_CLASS(klass); VirtioDeviceClass *vdc = VIRTIO_DEVICE_CLASS(klass); + FWPathProviderClass *fwc = FW_PATH_PROVIDER_CLASS(klass); dc->props = vhost_scsi_properties; set_bit(DEVICE_CATEGORY_STORAGE, dc->categories); @@ -288,6 +309,15 @@ static void vhost_scsi_class_init(ObjectClass *klass, void *data) vdc->get_features = vhost_scsi_get_features; vdc->set_config = vhost_scsi_set_config; vdc->set_status = vhost_scsi_set_status; + fwc->get_dev_path = vhost_scsi_get_fw_dev_path; +} + +static void vhost_scsi_instance_init(Object *obj) +{ + VHostSCSI *dev = VHOST_SCSI(obj); + + device_add_bootindex_property(obj, &dev->bootindex, "bootindex", NULL, + DEVICE(dev), NULL); } static const TypeInfo vhost_scsi_info = { @@ -295,6 +325,11 @@ static const TypeInfo vhost_scsi_info = { .parent = TYPE_VIRTIO_SCSI_COMMON, .instance_size = sizeof(VHostSCSI), .class_init = vhost_scsi_class_init, + .instance_init = vhost_scsi_instance_init, + .interfaces = (InterfaceInfo[]) { + { TYPE_FW_PATH_PROVIDER }, + { } + }, }; static void virtio_register_types(void) diff --git a/hw/scsi/virtio-scsi-dataplane.c b/hw/scsi/virtio-scsi-dataplane.c index 03a1e8cfcf..418d73b1b4 100644 --- a/hw/scsi/virtio-scsi-dataplane.c +++ b/hw/scsi/virtio-scsi-dataplane.c @@ -94,7 +94,7 @@ void virtio_scsi_vring_push_notify(VirtIOSCSIReq *req) { VirtIODevice *vdev = VIRTIO_DEVICE(req->vring->parent); - vring_push(&req->vring->vring, &req->elem, + vring_push(vdev, &req->vring->vring, &req->elem, req->qsgl.size + req->resp_iov.size); if (vring_should_notify(vdev, &req->vring->vring)) { diff --git a/hw/sh4/r2d.c b/hw/sh4/r2d.c index 12f44d28f0..d1d0847ba2 100644 --- a/hw/sh4/r2d.c +++ b/hw/sh4/r2d.c @@ -301,7 +301,7 @@ static void r2d_init(MachineState *machine) "rtl8139", i==0 ? "2" : NULL); /* USB keyboard */ - usbdevice_create("keyboard"); + usb_create_simple(usb_bus_find(-1), "usb-kbd"); /* Todo: register on board registers */ memset(&boot_params, 0, sizeof(boot_params)); diff --git a/hw/sparc64/sun4u.c b/hw/sparc64/sun4u.c index 3ff5bd8871..4620cc613a 100644 --- a/hw/sparc64/sun4u.c +++ b/hw/sparc64/sun4u.c @@ -596,7 +596,8 @@ pci_ebus_init1(PCIDevice *pci_dev) { EbusState *s = DO_UPCAST(EbusState, pci_dev, pci_dev); - isa_bus_new(&pci_dev->qdev, pci_address_space_io(pci_dev)); + isa_bus_new(DEVICE(pci_dev), get_system_memory(), + pci_address_space_io(pci_dev)); pci_dev->config[0x04] = 0x06; // command = bus master, pci mem pci_dev->config[0x05] = 0x00; diff --git a/hw/usb/Makefile.objs b/hw/usb/Makefile.objs index 3fe4dff3bd..0ccd477577 100644 --- a/hw/usb/Makefile.objs +++ b/hw/usb/Makefile.objs @@ -5,7 +5,8 @@ common-obj-y += libhw.o # usb host adapters common-obj-$(CONFIG_USB_UHCI) += hcd-uhci.o common-obj-$(CONFIG_USB_OHCI) += hcd-ohci.o -common-obj-$(CONFIG_USB_EHCI) += hcd-ehci.o hcd-ehci-pci.o hcd-ehci-sysbus.o +common-obj-$(CONFIG_USB_EHCI) += hcd-ehci.o hcd-ehci-pci.o +common-obj-$(CONFIG_USB_EHCI_SYSBUS) += hcd-ehci-sysbus.o common-obj-$(CONFIG_USB_XHCI) += hcd-xhci.o common-obj-$(CONFIG_USB_MUSB) += hcd-musb.o diff --git a/hw/usb/bus.c b/hw/usb/bus.c index 677122e8fd..91fc3e20d9 100644 --- a/hw/usb/bus.c +++ b/hw/usb/bus.c @@ -315,23 +315,33 @@ USBDevice *usb_create(USBBus *bus, const char *name) return USB_DEVICE(dev); } -USBDevice *usb_create_simple(USBBus *bus, const char *name) +static USBDevice *usb_try_create_simple(USBBus *bus, const char *name, + Error **errp) { - USBDevice *dev = usb_create(bus, name); - int rc; + Error *err = NULL; + USBDevice *dev; + dev = USB_DEVICE(qdev_try_create(&bus->qbus, name)); if (!dev) { - error_report("Failed to create USB device '%s'", name); + error_setg(errp, "Failed to create USB device '%s'", name); return NULL; } - rc = qdev_init(&dev->qdev); - if (rc < 0) { - error_report("Failed to initialize USB device '%s'", name); + object_property_set_bool(OBJECT(dev), true, "realized", &err); + if (err) { + error_setg(errp, "Failed to initialize USB device '%s': %s", + name, error_get_pretty(err)); + error_free(err); + object_unparent(OBJECT(dev)); return NULL; } return dev; } +USBDevice *usb_create_simple(USBBus *bus, const char *name) +{ + return usb_try_create_simple(bus, name, &error_abort); +} + static void usb_fill_port(USBPort *port, void *opaque, int index, USBPortOps *ops, int speedmask) { @@ -416,17 +426,17 @@ void usb_claim_port(USBDevice *dev, Error **errp) } } if (port == NULL) { - error_setg(errp, "Error: usb port %s (bus %s) not found (in use?)", + error_setg(errp, "usb port %s (bus %s) not found (in use?)", dev->port_path, bus->qbus.name); return; } } else { if (bus->nfree == 1 && strcmp(object_get_typename(OBJECT(dev)), "usb-hub") != 0) { /* Create a new hub and chain it on */ - usb_create_simple(bus, "usb-hub"); + usb_try_create_simple(bus, "usb-hub", NULL); } if (bus->nfree == 0) { - error_setg(errp, "Error: tried to attach usb device %s to a bus " + error_setg(errp, "tried to attach usb device %s to a bus " "with no free ports", dev->product_desc); return; } @@ -655,10 +665,12 @@ USBDevice *usbdevice_create(const char *cmdline) { USBBus *bus = usb_bus_find(-1 /* any */); LegacyUSBFactory *f = NULL; + Error *err = NULL; GSList *i; char driver[32]; const char *params; int len; + USBDevice *dev; params = strchr(cmdline,':'); if (params) { @@ -693,14 +705,28 @@ USBDevice *usbdevice_create(const char *cmdline) return NULL; } - if (!f->usbdevice_init) { + if (f->usbdevice_init) { + dev = f->usbdevice_init(bus, params); + } else { if (*params) { error_report("usbdevice %s accepts no params", driver); return NULL; } - return usb_create_simple(bus, f->name); + dev = usb_create(bus, f->name); + } + if (!dev) { + error_report("Failed to create USB device '%s'", f->name); + return NULL; } - return f->usbdevice_init(bus, params); + object_property_set_bool(OBJECT(dev), true, "realized", &err); + if (err) { + error_report("Failed to initialize USB device '%s': %s", + f->name, error_get_pretty(err)); + error_free(err); + object_unparent(OBJECT(dev)); + return NULL; + } + return dev; } static void usb_device_class_init(ObjectClass *klass, void *data) diff --git a/hw/usb/dev-bluetooth.c b/hw/usb/dev-bluetooth.c index 390d475c16..9bf673057a 100644 --- a/hw/usb/dev-bluetooth.c +++ b/hw/usb/dev-bluetooth.c @@ -530,21 +530,12 @@ static USBDevice *usb_bt_init(USBBus *bus, const char *cmdline) } else { hci = bt_new_hci(qemu_find_bt_vlan(0)); } - if (!hci) return NULL; + dev = usb_create(bus, name); - if (!dev) { - error_report("Failed to create USB device '%s'", name); - return NULL; - } s = DO_UPCAST(struct USBBtState, dev, dev); s->hci = hci; - if (qdev_init(&dev->qdev) < 0) { - error_report("Failed to initialize USB device '%s'", name); - return NULL; - } - return dev; } diff --git a/hw/usb/dev-network.c b/hw/usb/dev-network.c index 5b95d5c382..7131abdb21 100644 --- a/hw/usb/dev-network.c +++ b/hw/usb/dev-network.c @@ -1405,11 +1405,7 @@ static USBDevice *usb_net_init(USBBus *bus, const char *cmdline) } dev = usb_create(bus, "usb-net"); - if (!dev) { - return NULL; - } qdev_set_nic_properties(&dev->qdev, &nd_table[idx]); - qdev_init_nofail(&dev->qdev); return dev; } diff --git a/hw/usb/dev-serial.c b/hw/usb/dev-serial.c index 1cee450259..67c2072ce7 100644 --- a/hw/usb/dev-serial.c +++ b/hw/usb/dev-serial.c @@ -544,16 +544,11 @@ static USBDevice *usb_serial_init(USBBus *bus, const char *filename) return NULL; dev = usb_create(bus, "usb-serial"); - if (!dev) { - return NULL; - } qdev_prop_set_chr(&dev->qdev, "chardev", cdrv); if (vendorid) qdev_prop_set_uint16(&dev->qdev, "vendorid", vendorid); if (productid) qdev_prop_set_uint16(&dev->qdev, "productid", productid); - qdev_init_nofail(&dev->qdev); - return dev; } @@ -568,8 +563,6 @@ static USBDevice *usb_braille_init(USBBus *bus, const char *unused) dev = usb_create(bus, "usb-braille"); qdev_prop_set_chr(&dev->qdev, "chardev", cdrv); - qdev_init_nofail(&dev->qdev); - return dev; } diff --git a/hw/usb/dev-storage.c b/hw/usb/dev-storage.c index 4539733e42..af2e1b915d 100644 --- a/hw/usb/dev-storage.c +++ b/hw/usb/dev-storage.c @@ -706,17 +706,11 @@ static USBDevice *usb_msd_init(USBBus *bus, const char *filename) /* create guest device */ dev = usb_create(bus, "usb-storage"); - if (!dev) { - return NULL; - } if (qdev_prop_set_drive(&dev->qdev, "drive", blk_by_legacy_dinfo(dinfo)) < 0) { object_unparent(OBJECT(dev)); return NULL; } - if (qdev_init(&dev->qdev) < 0) - return NULL; - return dev; } diff --git a/hw/usb/host-legacy.c b/hw/usb/host-legacy.c index 3cc9c4282c..422ed9a65f 100644 --- a/hw/usb/host-legacy.c +++ b/hw/usb/host-legacy.c @@ -128,7 +128,6 @@ USBDevice *usb_host_device_open(USBBus *bus, const char *devname) qdev_prop_set_uint32(&dev->qdev, "hostaddr", filter.addr); qdev_prop_set_uint32(&dev->qdev, "vendorid", filter.vendor_id); qdev_prop_set_uint32(&dev->qdev, "productid", filter.product_id); - qdev_init_nofail(&dev->qdev); return dev; fail: diff --git a/hw/virtio/Makefile.objs b/hw/virtio/Makefile.objs index d21c397756..19b224a44d 100644 --- a/hw/virtio/Makefile.objs +++ b/hw/virtio/Makefile.objs @@ -2,7 +2,7 @@ common-obj-y += virtio-rng.o common-obj-$(CONFIG_VIRTIO_PCI) += virtio-pci.o common-obj-y += virtio-bus.o common-obj-y += virtio-mmio.o -common-obj-$(CONFIG_VIRTIO) += dataplane/ +obj-$(CONFIG_VIRTIO) += dataplane/ obj-y += virtio.o virtio-balloon.o obj-$(CONFIG_LINUX) += vhost.o vhost-backend.o vhost-user.o diff --git a/hw/virtio/dataplane/Makefile.objs b/hw/virtio/dataplane/Makefile.objs index 9a8cfc0297..753a9cab44 100644 --- a/hw/virtio/dataplane/Makefile.objs +++ b/hw/virtio/dataplane/Makefile.objs @@ -1 +1 @@ -common-obj-y += vring.o +obj-y += vring.o diff --git a/hw/virtio/dataplane/vring.c b/hw/virtio/dataplane/vring.c index 78c6f45a07..0936f659e5 100644 --- a/hw/virtio/dataplane/vring.c +++ b/hw/virtio/dataplane/vring.c @@ -18,7 +18,9 @@ #include "hw/hw.h" #include "exec/memory.h" #include "exec/address-spaces.h" +#include "hw/virtio/virtio-access.h" #include "hw/virtio/dataplane/vring.h" +#include "hw/virtio/dataplane/vring-accessors.h" #include "qemu/error-report.h" /* vring_map can be coupled with vring_unmap or (if you still have the @@ -83,7 +85,7 @@ bool vring_setup(Vring *vring, VirtIODevice *vdev, int n) vring_init(&vring->vr, virtio_queue_get_num(vdev, n), vring_ptr, 4096); vring->last_avail_idx = virtio_queue_get_last_avail_idx(vdev, n); - vring->last_used_idx = vring->vr.used->idx; + vring->last_used_idx = vring_get_used_idx(vdev, vring); vring->signalled_used = 0; vring->signalled_used_valid = false; @@ -104,7 +106,7 @@ void vring_teardown(Vring *vring, VirtIODevice *vdev, int n) void vring_disable_notification(VirtIODevice *vdev, Vring *vring) { if (!(vdev->guest_features & (1 << VIRTIO_RING_F_EVENT_IDX))) { - vring->vr.used->flags |= VRING_USED_F_NO_NOTIFY; + vring_set_used_flags(vdev, vring, VRING_USED_F_NO_NOTIFY); } } @@ -117,10 +119,10 @@ bool vring_enable_notification(VirtIODevice *vdev, Vring *vring) if (vdev->guest_features & (1 << VIRTIO_RING_F_EVENT_IDX)) { vring_avail_event(&vring->vr) = vring->vr.avail->idx; } else { - vring->vr.used->flags &= ~VRING_USED_F_NO_NOTIFY; + vring_clear_used_flags(vdev, vring, VRING_USED_F_NO_NOTIFY); } smp_mb(); /* ensure update is seen before reading avail_idx */ - return !vring_more_avail(vring); + return !vring_more_avail(vdev, vring); } /* This is stolen from linux/drivers/vhost/vhost.c:vhost_notify() */ @@ -134,12 +136,13 @@ bool vring_should_notify(VirtIODevice *vdev, Vring *vring) smp_mb(); if ((vdev->guest_features & (1 << VIRTIO_F_NOTIFY_ON_EMPTY)) && - unlikely(vring->vr.avail->idx == vring->last_avail_idx)) { + unlikely(!vring_more_avail(vdev, vring))) { return true; } if (!(vdev->guest_features & (1 << VIRTIO_RING_F_EVENT_IDX))) { - return !(vring->vr.avail->flags & VRING_AVAIL_F_NO_INTERRUPT); + return !(vring_get_avail_flags(vdev, vring) & + VRING_AVAIL_F_NO_INTERRUPT); } old = vring->signalled_used; v = vring->signalled_used_valid; @@ -202,9 +205,19 @@ static int get_desc(Vring *vring, VirtQueueElement *elem, return 0; } +static void copy_in_vring_desc(VirtIODevice *vdev, + const struct vring_desc *guest, + struct vring_desc *host) +{ + host->addr = virtio_ldq_p(vdev, &guest->addr); + host->len = virtio_ldl_p(vdev, &guest->len); + host->flags = virtio_lduw_p(vdev, &guest->flags); + host->next = virtio_lduw_p(vdev, &guest->next); +} + /* This is stolen from linux/drivers/vhost/vhost.c. */ -static int get_indirect(Vring *vring, VirtQueueElement *elem, - struct vring_desc *indirect) +static int get_indirect(VirtIODevice *vdev, Vring *vring, + VirtQueueElement *elem, struct vring_desc *indirect) { struct vring_desc desc; unsigned int i = 0, count, found = 0; @@ -244,7 +257,7 @@ static int get_indirect(Vring *vring, VirtQueueElement *elem, vring->broken = true; return -EFAULT; } - desc = *desc_ptr; + copy_in_vring_desc(vdev, desc_ptr, &desc); memory_region_unref(mr); /* Ensure descriptor has been loaded before accessing fields */ @@ -320,7 +333,7 @@ int vring_pop(VirtIODevice *vdev, Vring *vring, /* Check it isn't doing very strange things with descriptor numbers. */ last_avail_idx = vring->last_avail_idx; - avail_idx = vring->vr.avail->idx; + avail_idx = vring_get_avail_idx(vdev, vring); barrier(); /* load indices now and not again later */ if (unlikely((uint16_t)(avail_idx - last_avail_idx) > num)) { @@ -341,7 +354,7 @@ int vring_pop(VirtIODevice *vdev, Vring *vring, /* Grab the next descriptor number they're advertising, and increment * the index we've seen. */ - head = vring->vr.avail->ring[last_avail_idx % num]; + head = vring_get_avail_ring(vdev, vring, last_avail_idx % num); elem->index = head; @@ -365,13 +378,13 @@ int vring_pop(VirtIODevice *vdev, Vring *vring, ret = -EFAULT; goto out; } - desc = vring->vr.desc[i]; + copy_in_vring_desc(vdev, &vring->vr.desc[i], &desc); /* Ensure descriptor is loaded before accessing fields */ barrier(); if (desc.flags & VRING_DESC_F_INDIRECT) { - ret = get_indirect(vring, elem, &desc); + ret = get_indirect(vdev, vring, elem, &desc); if (ret < 0) { goto out; } @@ -407,9 +420,9 @@ out: * * Stolen from linux/drivers/vhost/vhost.c. */ -void vring_push(Vring *vring, VirtQueueElement *elem, int len) +void vring_push(VirtIODevice *vdev, Vring *vring, VirtQueueElement *elem, + int len) { - struct vring_used_elem *used; unsigned int head = elem->index; uint16_t new; @@ -422,14 +435,16 @@ void vring_push(Vring *vring, VirtQueueElement *elem, int len) /* The virtqueue contains a ring of used buffers. Get a pointer to the * next entry in that used ring. */ - used = &vring->vr.used->ring[vring->last_used_idx % vring->vr.num]; - used->id = head; - used->len = len; + vring_set_used_ring_id(vdev, vring, vring->last_used_idx % vring->vr.num, + head); + vring_set_used_ring_len(vdev, vring, vring->last_used_idx % vring->vr.num, + len); /* Make sure buffer is written before we update index. */ smp_wmb(); - new = vring->vr.used->idx = ++vring->last_used_idx; + new = ++vring->last_used_idx; + vring_set_used_idx(vdev, vring, new); if (unlikely((int16_t)(new - vring->signalled_used) < (uint16_t)1)) { vring->signalled_used_valid = false; } diff --git a/hw/virtio/virtio-pci.c b/hw/virtio/virtio-pci.c index dde1d73b56..604cb5b749 100644 --- a/hw/virtio/virtio-pci.c +++ b/hw/virtio/virtio-pci.c @@ -1238,6 +1238,8 @@ static void vhost_scsi_pci_instance_init(Object *obj) virtio_instance_init_common(obj, &dev->vdev, sizeof(dev->vdev), TYPE_VHOST_SCSI); + object_property_add_alias(obj, "bootindex", OBJECT(&dev->vdev), + "bootindex", &error_abort); } static const TypeInfo vhost_scsi_pci_info = { diff --git a/include/block/block.h b/include/block/block.h index 321295e5f7..471d11dbbe 100644 --- a/include/block/block.h +++ b/include/block/block.h @@ -168,7 +168,8 @@ void bdrv_io_limits_disable(BlockDriverState *bs); void bdrv_init(void); void bdrv_init_with_whitelist(void); BlockDriver *bdrv_find_protocol(const char *filename, - bool allow_protocol_prefix); + bool allow_protocol_prefix, + Error **errp); BlockDriver *bdrv_find_format(const char *format_name); BlockDriver *bdrv_find_whitelisted_format(const char *format_name, bool readonly); diff --git a/include/block/block_int.h b/include/block/block_int.h index 7ad19503df..b340e7e140 100644 --- a/include/block/block_int.h +++ b/include/block/block_int.h @@ -369,9 +369,6 @@ struct BlockDriverState { /* I/O Limits */ BlockLimits bl; - /* Whether the disk can expand beyond total_sectors */ - int growable; - /* Whether produces zeros when read beyond eof */ bool zero_beyond_eof; diff --git a/include/block/nbd.h b/include/block/nbd.h index b75959556c..ca9a5ac5b3 100644 --- a/include/block/nbd.h +++ b/include/block/nbd.h @@ -99,7 +99,6 @@ void nbd_export_close_all(void); NBDClient *nbd_client_new(NBDExport *exp, int csock, void (*close)(NBDClient *)); -void nbd_client_close(NBDClient *client); void nbd_client_get(NBDClient *client); void nbd_client_put(NBDClient *client); diff --git a/include/exec/cpu-all.h b/include/exec/cpu-all.h index 2c4828694b..ac06c6721c 100644 --- a/include/exec/cpu-all.h +++ b/include/exec/cpu-all.h @@ -24,6 +24,7 @@ #include "exec/memory.h" #include "qemu/thread.h" #include "qom/cpu.h" +#include "qemu/rcu.h" /* some important defines: * @@ -268,6 +269,7 @@ CPUArchState *cpu_copy(CPUArchState *env); typedef struct RAMBlock RAMBlock; struct RAMBlock { + struct rcu_head rcu; struct MemoryRegion *mr; uint8_t *host; ram_addr_t offset; @@ -275,11 +277,10 @@ struct RAMBlock { ram_addr_t max_length; void (*resized)(const char*, uint64_t length, void *host); uint32_t flags; + /* Protected by iothread lock. */ char idstr[256]; - /* Reads can take either the iothread or the ramlist lock. - * Writes must take both locks. - */ - QTAILQ_ENTRY(RAMBlock) next; + /* RCU-enabled, writes protected by the ramlist lock */ + QLIST_ENTRY(RAMBlock) next; int fd; }; @@ -295,8 +296,8 @@ typedef struct RAMList { /* Protected by the iothread lock. */ unsigned long *dirty_memory[DIRTY_MEMORY_NUM]; RAMBlock *mru_block; - /* Protected by the ramlist lock. */ - QTAILQ_HEAD(, RAMBlock) blocks; + /* RCU-enabled, writes protected by the ramlist lock. */ + QLIST_HEAD(, RAMBlock) blocks; uint32_t version; } RAMList; extern RAMList ram_list; diff --git a/include/exec/cputlb.h b/include/exec/cputlb.h index b8ecd6f68d..e0da9d7ad3 100644 --- a/include/exec/cputlb.h +++ b/include/exec/cputlb.h @@ -34,7 +34,7 @@ extern int tlb_flush_count; void tb_flush_jmp_cache(CPUState *cpu, target_ulong addr); MemoryRegionSection * -address_space_translate_for_iotlb(AddressSpace *as, hwaddr addr, hwaddr *xlat, +address_space_translate_for_iotlb(CPUState *cpu, hwaddr addr, hwaddr *xlat, hwaddr *plen); hwaddr memory_region_section_get_iotlb(CPUState *cpu, MemoryRegionSection *section, diff --git a/include/exec/exec-all.h b/include/exec/exec-all.h index 6a154485ba..8eb0db3910 100644 --- a/include/exec/exec-all.h +++ b/include/exec/exec-all.h @@ -96,6 +96,8 @@ void tb_invalidate_phys_page_range(tb_page_addr_t start, tb_page_addr_t end, void tb_invalidate_phys_range(tb_page_addr_t start, tb_page_addr_t end, int is_cpu_write_access); #if !defined(CONFIG_USER_ONLY) +bool qemu_in_vcpu_thread(void); +void cpu_reload_memory_map(CPUState *cpu); void tcg_cpu_address_space_init(CPUState *cpu, AddressSpace *as); /* cputlb.c */ void tlb_flush_page(CPUState *cpu, target_ulong addr); @@ -337,7 +339,8 @@ extern uintptr_t tci_tb_ptr; void phys_mem_set_alloc(void *(*alloc)(size_t, uint64_t *align)); -struct MemoryRegion *iotlb_to_region(AddressSpace *as, hwaddr index); +struct MemoryRegion *iotlb_to_region(CPUState *cpu, + hwaddr index); bool io_mem_read(struct MemoryRegion *mr, hwaddr addr, uint64_t *pvalue, unsigned size); bool io_mem_write(struct MemoryRegion *mr, hwaddr addr, diff --git a/include/hw/isa/isa.h b/include/hw/isa/isa.h index e0c749f9e9..cf7bd343c7 100644 --- a/include/hw/isa/isa.h +++ b/include/hw/isa/isa.h @@ -36,6 +36,7 @@ struct ISABus { BusState parent_obj; /*< public >*/ + MemoryRegion *address_space; MemoryRegion *address_space_io; qemu_irq *irqs; }; @@ -50,7 +51,8 @@ struct ISADevice { int ioport_id; }; -ISABus *isa_bus_new(DeviceState *dev, MemoryRegion *address_space_io); +ISABus *isa_bus_new(DeviceState *dev, MemoryRegion *address_space, + MemoryRegion *address_space_io); void isa_bus_irqs(ISABus *bus, qemu_irq *irqs); qemu_irq isa_get_irq(ISADevice *dev, int isairq); void isa_init_irq(ISADevice *dev, qemu_irq *p, int isairq); @@ -97,8 +99,6 @@ static inline ISABus *isa_bus_from_device(ISADevice *d) return ISA_BUS(qdev_get_parent_bus(DEVICE(d))); } -extern hwaddr isa_mem_base; - /* dma.c */ int DMA_get_channel_mode (int nchan); int DMA_read_memory (int nchan, void *buf, int pos, int size); diff --git a/include/hw/pci/shpc.h b/include/hw/pci/shpc.h index 025bc5b268..9bbea39996 100644 --- a/include/hw/pci/shpc.h +++ b/include/hw/pci/shpc.h @@ -41,6 +41,7 @@ void shpc_reset(PCIDevice *d); int shpc_bar_size(PCIDevice *dev); int shpc_init(PCIDevice *dev, PCIBus *sec_bus, MemoryRegion *bar, unsigned off); void shpc_cleanup(PCIDevice *dev, MemoryRegion *bar); +void shpc_free(PCIDevice *dev); void shpc_cap_write_config(PCIDevice *d, uint32_t addr, uint32_t val, int len); diff --git a/include/hw/qdev-core.h b/include/hw/qdev-core.h index 15a226f24a..4e673f9d29 100644 --- a/include/hw/qdev-core.h +++ b/include/hw/qdev-core.h @@ -342,6 +342,7 @@ void qbus_reset_all_fn(void *opaque); BusState *sysbus_get_default(void); char *qdev_get_fw_dev_path(DeviceState *dev); +char *qdev_get_own_fw_dev_path_from_handler(BusState *bus, DeviceState *dev); /** * @qdev_machine_init diff --git a/include/hw/virtio/dataplane/vring-accessors.h b/include/hw/virtio/dataplane/vring-accessors.h new file mode 100644 index 0000000000..b508b87900 --- /dev/null +++ b/include/hw/virtio/dataplane/vring-accessors.h @@ -0,0 +1,75 @@ +#ifndef VRING_ACCESSORS_H +#define VRING_ACCESSORS_H + +#include "hw/virtio/virtio_ring.h" +#include "hw/virtio/virtio.h" +#include "hw/virtio/virtio-access.h" + +static inline uint16_t vring_get_used_idx(VirtIODevice *vdev, Vring *vring) +{ + return virtio_tswap16(vdev, vring->vr.used->idx); +} + +static inline void vring_set_used_idx(VirtIODevice *vdev, Vring *vring, + uint16_t idx) +{ + vring->vr.used->idx = virtio_tswap16(vdev, idx); +} + +static inline uint16_t vring_get_avail_idx(VirtIODevice *vdev, Vring *vring) +{ + return virtio_tswap16(vdev, vring->vr.avail->idx); +} + +static inline uint16_t vring_get_avail_ring(VirtIODevice *vdev, Vring *vring, + int i) +{ + return virtio_tswap16(vdev, vring->vr.avail->ring[i]); +} + +static inline void vring_set_used_ring_id(VirtIODevice *vdev, Vring *vring, + int i, uint32_t id) +{ + vring->vr.used->ring[i].id = virtio_tswap32(vdev, id); +} + +static inline void vring_set_used_ring_len(VirtIODevice *vdev, Vring *vring, + int i, uint32_t len) +{ + vring->vr.used->ring[i].len = virtio_tswap32(vdev, len); +} + +static inline uint16_t vring_get_used_flags(VirtIODevice *vdev, Vring *vring) +{ + return virtio_tswap16(vdev, vring->vr.used->flags); +} + +static inline uint16_t vring_get_avail_flags(VirtIODevice *vdev, Vring *vring) +{ + return virtio_tswap16(vdev, vring->vr.avail->flags); +} + +static inline void vring_set_used_flags(VirtIODevice *vdev, Vring *vring, + uint16_t flags) +{ + vring->vr.used->flags |= virtio_tswap16(vdev, flags); +} + +static inline void vring_clear_used_flags(VirtIODevice *vdev, Vring *vring, + uint16_t flags) +{ + vring->vr.used->flags &= virtio_tswap16(vdev, ~flags); +} + +static inline unsigned int vring_get_num(Vring *vring) +{ + return vring->vr.num; +} + +/* Are there more descriptors available? */ +static inline bool vring_more_avail(VirtIODevice *vdev, Vring *vring) +{ + return vring_get_avail_idx(vdev, vring) != vring->last_avail_idx; +} + +#endif diff --git a/include/hw/virtio/dataplane/vring.h b/include/hw/virtio/dataplane/vring.h index d3e086aefc..e42c0fc9cc 100644 --- a/include/hw/virtio/dataplane/vring.h +++ b/include/hw/virtio/dataplane/vring.h @@ -31,17 +31,6 @@ typedef struct { bool broken; /* was there a fatal error? */ } Vring; -static inline unsigned int vring_get_num(Vring *vring) -{ - return vring->vr.num; -} - -/* Are there more descriptors available? */ -static inline bool vring_more_avail(Vring *vring) -{ - return vring->vr.avail->idx != vring->last_avail_idx; -} - /* Fail future vring_pop() and vring_push() calls until reset */ static inline void vring_set_broken(Vring *vring) { @@ -54,6 +43,7 @@ void vring_disable_notification(VirtIODevice *vdev, Vring *vring); bool vring_enable_notification(VirtIODevice *vdev, Vring *vring); bool vring_should_notify(VirtIODevice *vdev, Vring *vring); int vring_pop(VirtIODevice *vdev, Vring *vring, VirtQueueElement *elem); -void vring_push(Vring *vring, VirtQueueElement *elem, int len); +void vring_push(VirtIODevice *vdev, Vring *vring, VirtQueueElement *elem, + int len); #endif /* VRING_H */ diff --git a/include/hw/virtio/vhost-scsi.h b/include/hw/virtio/vhost-scsi.h index 85cc031281..dea0075626 100644 --- a/include/hw/virtio/vhost-scsi.h +++ b/include/hw/virtio/vhost-scsi.h @@ -60,11 +60,16 @@ typedef struct VHostSCSI { Error *migration_blocker; struct vhost_dev dev; + int32_t bootindex; + int channel; + int target; + int lun; } VHostSCSI; #define DEFINE_VHOST_SCSI_PROPERTIES(_state, _conf_field) \ DEFINE_PROP_STRING("vhostfd", _state, _conf_field.vhostfd), \ DEFINE_PROP_STRING("wwpn", _state, _conf_field.wwpn), \ + DEFINE_PROP_UINT32("boot_tpgt", _state, _conf_field.boot_tpgt, 0), \ DEFINE_PROP_UINT32("num_queues", _state, _conf_field.num_queues, 1), \ DEFINE_PROP_UINT32("max_sectors", _state, _conf_field.max_sectors, 0xFFFF), \ DEFINE_PROP_UINT32("cmd_per_lun", _state, _conf_field.cmd_per_lun, 128) diff --git a/include/hw/virtio/virtio-scsi.h b/include/hw/virtio/virtio-scsi.h index bf17cc9ea5..c122e7ae5c 100644 --- a/include/hw/virtio/virtio-scsi.h +++ b/include/hw/virtio/virtio-scsi.h @@ -153,6 +153,7 @@ struct VirtIOSCSIConf { uint32_t cmd_per_lun; char *vhostfd; char *wwpn; + uint32_t boot_tpgt; IOThread *iothread; }; diff --git a/include/qemu-io.h b/include/qemu-io.h index 5d6006f73b..4d402b9b01 100644 --- a/include/qemu-io.h +++ b/include/qemu-io.h @@ -22,7 +22,7 @@ #define CMD_FLAG_GLOBAL ((int)0x80000000) /* don't iterate "args" */ -typedef int (*cfunc_t)(BlockDriverState *bs, int argc, char **argv); +typedef int (*cfunc_t)(BlockBackend *blk, int argc, char **argv); typedef void (*helpfunc_t)(void); typedef struct cmdinfo { @@ -40,7 +40,7 @@ typedef struct cmdinfo { extern bool qemuio_misalign; -bool qemuio_command(BlockDriverState *bs, const char *cmd); +bool qemuio_command(BlockBackend *blk, const char *cmd); void qemuio_add_command(const cmdinfo_t *ci); int qemuio_command_usage(const cmdinfo_t *ci); diff --git a/include/qemu/queue.h b/include/qemu/queue.h index c602797652..80941506ce 100644 --- a/include/qemu/queue.h +++ b/include/qemu/queue.h @@ -139,17 +139,6 @@ struct { \ (elm)->field.le_prev = &(head)->lh_first; \ } while (/*CONSTCOND*/0) -#define QLIST_INSERT_HEAD_RCU(head, elm, field) do { \ - (elm)->field.le_prev = &(head)->lh_first; \ - (elm)->field.le_next = (head)->lh_first; \ - smp_wmb(); /* fill elm before linking it */ \ - if ((head)->lh_first != NULL) { \ - (head)->lh_first->field.le_prev = &(elm)->field.le_next; \ - } \ - (head)->lh_first = (elm); \ - smp_wmb(); \ -} while (/* CONSTCOND*/0) - #define QLIST_REMOVE(elm, field) do { \ if ((elm)->field.le_next != NULL) \ (elm)->field.le_next->field.le_prev = \ diff --git a/include/qemu/rcu.h b/include/qemu/rcu.h index 068a279a79..506ab58eaf 100644 --- a/include/qemu/rcu.h +++ b/include/qemu/rcu.h @@ -140,6 +140,14 @@ extern void call_rcu1(struct rcu_head *head, RCUCBFunc *func); }), \ (RCUCBFunc *)(func)) +#define g_free_rcu(obj, field) \ + call_rcu1(({ \ + char __attribute__((unused)) \ + offset_must_be_zero[-offsetof(typeof(*(obj)), field)]; \ + &(obj)->field; \ + }), \ + (RCUCBFunc *)g_free); + #ifdef __cplusplus } #endif diff --git a/include/qemu/rcu_queue.h b/include/qemu/rcu_queue.h new file mode 100644 index 0000000000..3aca7a57e3 --- /dev/null +++ b/include/qemu/rcu_queue.h @@ -0,0 +1,134 @@ +#ifndef QEMU_RCU_QUEUE_H +#define QEMU_RCU_QUEUE_H + +/* + * rcu_queue.h + * + * RCU-friendly versions of the queue.h primitives. + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + * + * Copyright (c) 2013 Mike D. Day, IBM Corporation. + * + * IBM's contributions to this file may be relicensed under LGPLv2 or later. + */ + +#include "qemu/queue.h" +#include "qemu/atomic.h" + +#ifdef __cplusplus +extern "C" { +#endif + + +/* + * List access methods. + */ +#define QLIST_EMPTY_RCU(head) (atomic_rcu_read(&(head)->lh_first) == NULL) +#define QLIST_FIRST_RCU(head) (atomic_rcu_read(&(head)->lh_first)) +#define QLIST_NEXT_RCU(elm, field) (atomic_rcu_read(&(elm)->field.le_next)) + +/* + * List functions. + */ + + +/* + * The difference between atomic_read/set and atomic_rcu_read/set + * is in the including of a read/write memory barrier to the volatile + * access. atomic_rcu_* macros include the memory barrier, the + * plain atomic macros do not. Therefore, it should be correct to + * issue a series of reads or writes to the same element using only + * the atomic_* macro, until the last read or write, which should be + * atomic_rcu_* to introduce a read or write memory barrier as + * appropriate. + */ + +/* Upon publication of the listelm->next value, list readers + * will see the new node when following next pointers from + * antecedent nodes, but may not see the new node when following + * prev pointers from subsequent nodes until after the RCU grace + * period expires. + * see linux/include/rculist.h __list_add_rcu(new, prev, next) + */ +#define QLIST_INSERT_AFTER_RCU(listelm, elm, field) do { \ + (elm)->field.le_next = (listelm)->field.le_next; \ + (elm)->field.le_prev = &(listelm)->field.le_next; \ + atomic_rcu_set(&(listelm)->field.le_next, (elm)); \ + if ((elm)->field.le_next != NULL) { \ + (elm)->field.le_next->field.le_prev = \ + &(elm)->field.le_next; \ + } \ +} while (/*CONSTCOND*/0) + +/* Upon publication of the listelm->prev->next value, list + * readers will see the new element when following prev pointers + * from subsequent elements, but may not see the new element + * when following next pointers from antecedent elements + * until after the RCU grace period expires. + */ +#define QLIST_INSERT_BEFORE_RCU(listelm, elm, field) do { \ + (elm)->field.le_prev = (listelm)->field.le_prev; \ + (elm)->field.le_next = (listelm); \ + atomic_rcu_set((listelm)->field.le_prev, (elm)); \ + (listelm)->field.le_prev = &(elm)->field.le_next; \ +} while (/*CONSTCOND*/0) + +/* Upon publication of the head->first value, list readers + * will see the new element when following the head, but may + * not see the new element when following prev pointers from + * subsequent elements until after the RCU grace period has + * expired. + */ +#define QLIST_INSERT_HEAD_RCU(head, elm, field) do { \ + (elm)->field.le_prev = &(head)->lh_first; \ + (elm)->field.le_next = (head)->lh_first; \ + atomic_rcu_set((&(head)->lh_first), (elm)); \ + if ((elm)->field.le_next != NULL) { \ + (elm)->field.le_next->field.le_prev = \ + &(elm)->field.le_next; \ + } \ +} while (/*CONSTCOND*/0) + + +/* prior to publication of the elm->prev->next value, some list + * readers may still see the removed element when following + * the antecedent's next pointer. + */ +#define QLIST_REMOVE_RCU(elm, field) do { \ + if ((elm)->field.le_next != NULL) { \ + (elm)->field.le_next->field.le_prev = \ + (elm)->field.le_prev; \ + } \ + *(elm)->field.le_prev = (elm)->field.le_next; \ +} while (/*CONSTCOND*/0) + +/* List traversal must occur within an RCU critical section. */ +#define QLIST_FOREACH_RCU(var, head, field) \ + for ((var) = atomic_rcu_read(&(head)->lh_first); \ + (var); \ + (var) = atomic_rcu_read(&(var)->field.le_next)) + +/* List traversal must occur within an RCU critical section. */ +#define QLIST_FOREACH_SAFE_RCU(var, head, field, next_var) \ + for ((var) = (atomic_rcu_read(&(head)->lh_first)); \ + (var) && \ + ((next_var) = atomic_rcu_read(&(var)->field.le_next), 1); \ + (var) = (next_var)) + +#ifdef __cplusplus +} +#endif +#endif /* QEMU_RCU_QUEUE.H */ diff --git a/include/qom/cpu.h b/include/qom/cpu.h index 2098f1cb50..48fd6fb1d2 100644 --- a/include/qom/cpu.h +++ b/include/qom/cpu.h @@ -256,6 +256,7 @@ struct CPUState { sigjmp_buf jmp_env; AddressSpace *as; + struct AddressSpaceDispatch *memory_dispatch; MemoryListener *tcg_as_listener; void *env_ptr; /* CPUArchState */ diff --git a/include/sysemu/block-backend.h b/include/sysemu/block-backend.h index 33eb400534..3ff9aeeb09 100644 --- a/include/sysemu/block-backend.h +++ b/include/sysemu/block-backend.h @@ -62,6 +62,9 @@ typedef struct BlockDevOps { BlockBackend *blk_new(const char *name, Error **errp); BlockBackend *blk_new_with_bs(const char *name, Error **errp); +BlockBackend *blk_new_open(const char *name, const char *filename, + const char *reference, QDict *options, int flags, + Error **errp); void blk_ref(BlockBackend *blk); void blk_unref(BlockBackend *blk); const char *blk_name(BlockBackend *blk); @@ -91,6 +94,7 @@ int blk_pread(BlockBackend *blk, int64_t offset, void *buf, int count); int blk_pwrite(BlockBackend *blk, int64_t offset, const void *buf, int count); int64_t blk_getlength(BlockBackend *blk); void blk_get_geometry(BlockBackend *blk, uint64_t *nb_sectors_ptr); +int64_t blk_nb_sectors(BlockBackend *blk); BlockAIOCB *blk_aio_readv(BlockBackend *blk, int64_t sector_num, QEMUIOVector *iov, int nb_sectors, BlockCompletionFunc *cb, void *opaque); @@ -151,5 +155,14 @@ BlockAcctStats *blk_get_stats(BlockBackend *blk); void *blk_aio_get(const AIOCBInfo *aiocb_info, BlockBackend *blk, BlockCompletionFunc *cb, void *opaque); +int coroutine_fn blk_co_write_zeroes(BlockBackend *blk, int64_t sector_num, + int nb_sectors, BdrvRequestFlags flags); +int blk_write_compressed(BlockBackend *blk, int64_t sector_num, + const uint8_t *buf, int nb_sectors); +int blk_truncate(BlockBackend *blk, int64_t offset); +int blk_discard(BlockBackend *blk, int64_t sector_num, int nb_sectors); +int blk_save_vmstate(BlockBackend *blk, const uint8_t *buf, + int64_t pos, int size); +int blk_load_vmstate(BlockBackend *blk, uint8_t *buf, int64_t pos, int size); #endif diff --git a/include/sysemu/os-win32.h b/include/sysemu/os-win32.h index af3fbc47d8..9cc9e08139 100644 --- a/include/sysemu/os-win32.h +++ b/include/sysemu/os-win32.h @@ -81,7 +81,6 @@ struct tm *gmtime_r(const time_t *timep, struct tm *result); #undef localtime_r struct tm *localtime_r(const time_t *timep, struct tm *result); -char *strtok_r(char *str, const char *delim, char **saveptr); static inline void os_setup_signal_handling(void) {} static inline void os_daemonize(void) {} diff --git a/include/ui/console.h b/include/ui/console.h index de88bba478..0f97d86728 100644 --- a/include/ui/console.h +++ b/include/ui/console.h @@ -335,6 +335,7 @@ void vnc_display_init(const char *id); void vnc_display_open(const char *id, Error **errp); void vnc_display_add_client(const char *id, int csock, bool skipauth); char *vnc_display_local_addr(const char *id); +void vnc_auto_assign_id(QemuOptsList *olist, QemuOpts *opts); #ifdef CONFIG_VNC int vnc_display_password(const char *id, const char *password); int vnc_display_pw_expire(const char *id, time_t expires); diff --git a/linux-user/syscall_defs.h b/linux-user/syscall_defs.h index 9ed6de88ec..edd5f3c80b 100644 --- a/linux-user/syscall_defs.h +++ b/linux-user/syscall_defs.h @@ -1607,73 +1607,25 @@ struct target_stat { #elif defined(TARGET_ABI_MIPSN32) struct target_stat { - unsigned st_dev; - int st_pad1[3]; /* Reserved for network id */ - unsigned int st_ino; - unsigned int st_mode; - unsigned int st_nlink; - int st_uid; - int st_gid; - unsigned st_rdev; - unsigned int st_pad2[2]; - unsigned int st_size; - unsigned int st_pad3; - /* - * Actually this should be timestruc_t st_atime, st_mtime and st_ctime - * but we don't have it under Linux. - */ - unsigned int target_st_atime; - unsigned int target_st_atime_nsec; - unsigned int target_st_mtime; - unsigned int target_st_mtime_nsec; - unsigned int target_st_ctime; - unsigned int target_st_ctime_nsec; - unsigned int st_blksize; - unsigned int st_blocks; - unsigned int st_pad4[14]; -}; - -/* - * This matches struct stat64 in glibc2.1, hence the absolutely insane - * amounts of padding around dev_t's. The memory layout is the same as of - * struct stat of the 64-bit kernel. - */ - -#define TARGET_HAS_STRUCT_STAT64 -struct target_stat64 { - unsigned int st_dev; - unsigned int st_pad0[3]; /* Reserved for st_dev expansion */ - - target_ulong st_ino; - - unsigned int st_mode; - unsigned int st_nlink; - - int st_uid; - int st_gid; - - unsigned int st_rdev; - unsigned int st_pad1[3]; /* Reserved for st_rdev expansion */ - - int st_size; - - /* - * Actually this should be timestruc_t st_atime, st_mtime and st_ctime - * but we don't have it under Linux. - */ - int target_st_atime; - unsigned int target_st_atime_nsec; /* Reserved for st_atime expansion */ - - int target_st_mtime; - unsigned int target_st_mtime_nsec; /* Reserved for st_mtime expansion */ - - int target_st_ctime; - unsigned int target_st_ctime_nsec; /* Reserved for st_ctime expansion */ - - unsigned int st_blksize; - unsigned int st_pad2; - - int st_blocks; + abi_ulong st_dev; + abi_ulong st_pad0[3]; /* Reserved for st_dev expansion */ + uint64_t st_ino; + unsigned int st_mode; + unsigned int st_nlink; + int st_uid; + int st_gid; + abi_ulong st_rdev; + abi_ulong st_pad1[3]; /* Reserved for st_rdev expansion */ + int64_t st_size; + abi_long target_st_atime; + abi_ulong target_st_atime_nsec; /* Reserved for st_atime expansion */ + abi_long target_st_mtime; + abi_ulong target_st_mtime_nsec; /* Reserved for st_mtime expansion */ + abi_long target_st_ctime; + abi_ulong target_st_ctime_nsec; /* Reserved for st_ctime expansion */ + abi_ulong st_blksize; + abi_ulong st_pad2; + int64_t st_blocks; }; #elif defined(TARGET_ABI_MIPSO32) @@ -1943,6 +1943,7 @@ void memory_listener_unregister(MemoryListener *listener) void address_space_init(AddressSpace *as, MemoryRegion *root, const char *name) { + memory_region_ref(root); memory_region_transaction_begin(); as->root = root; as->current_map = g_new(FlatView, 1); @@ -1969,10 +1970,13 @@ static void do_address_space_destroy(AddressSpace *as) flatview_unref(as->current_map); g_free(as->name); g_free(as->ioeventfds); + memory_region_unref(as->root); } void address_space_destroy(AddressSpace *as) { + MemoryRegion *root = as->root; + /* Flush out anything from MemoryListeners listening in on this */ memory_region_transaction_begin(); as->root = NULL; @@ -1984,6 +1988,7 @@ void address_space_destroy(AddressSpace *as) * entries that the guest should never use. Wait for the old * values to expire before freeing the data. */ + as->root = root; call_rcu(as, do_address_space_destroy, rcu); } @@ -874,7 +874,7 @@ void nbd_client_put(NBDClient *client) { if (--client->refcount == 0) { /* The last reference should be dropped by client->close, - * which is called by nbd_client_close. + * which is called by client_close. */ assert(client->closing); @@ -889,7 +889,7 @@ void nbd_client_put(NBDClient *client) } } -void nbd_client_close(NBDClient *client) +static void client_close(NBDClient *client) { if (client->closing) { return; @@ -1026,7 +1026,7 @@ void nbd_export_close(NBDExport *exp) nbd_export_get(exp); QTAILQ_FOREACH_SAFE(client, &exp->clients, next, next) { - nbd_client_close(client); + client_close(client); } nbd_export_set_name(exp, NULL); nbd_export_put(exp); @@ -1311,7 +1311,7 @@ done: out: nbd_request_put(req); - nbd_client_close(client); + client_close(client); } static void nbd_read(void *opaque) diff --git a/qemu-img.c b/qemu-img.c index e148af8a3e..0155e36283 100644 --- a/qemu-img.c +++ b/qemu-img.c @@ -261,6 +261,7 @@ static int print_block_option_help(const char *filename, const char *fmt) { BlockDriver *drv, *proto_drv; QemuOptsList *create_opts = NULL; + Error *local_err = NULL; /* Find driver and parse its options */ drv = bdrv_find_format(fmt); @@ -271,9 +272,10 @@ static int print_block_option_help(const char *filename, const char *fmt) create_opts = qemu_opts_append(create_opts, drv->create_opts); if (filename) { - proto_drv = bdrv_find_protocol(filename, true); + proto_drv = bdrv_find_protocol(filename, true, &local_err); if (!proto_drv) { - error_report("Unknown protocol '%s'", filename); + qerror_report_err(local_err); + error_free(local_err); qemu_opts_free(create_opts); return 1; } @@ -291,32 +293,24 @@ static BlockBackend *img_open(const char *id, const char *filename, { BlockBackend *blk; BlockDriverState *bs; - BlockDriver *drv; char password[256]; Error *local_err = NULL; - int ret; - - blk = blk_new_with_bs(id, &error_abort); - bs = blk_bs(blk); + QDict *options = NULL; if (fmt) { - drv = bdrv_find_format(fmt); - if (!drv) { - error_report("Unknown file format '%s'", fmt); - goto fail; - } - } else { - drv = NULL; + options = qdict_new(); + qdict_put(options, "driver", qstring_from_str(fmt)); } - ret = bdrv_open(&bs, filename, NULL, NULL, flags, drv, &local_err); - if (ret < 0) { + blk = blk_new_open(id, filename, NULL, options, flags, &local_err); + if (!blk) { error_report("Could not open '%s': %s", filename, error_get_pretty(local_err)); error_free(local_err); goto fail; } + bs = blk_bs(blk); if (bdrv_is_encrypted(bs) && require_io) { qprintf(quiet, "Disk image '%s' is encrypted.\n", filename); if (read_password(password, sizeof(password)) < 0) { @@ -1016,19 +1010,19 @@ static int64_t sectors_to_process(int64_t total, int64_t from) * Returns 0 in case sectors are filled with 0, 1 if sectors contain non-zero * data and negative value on error. * - * @param bs: Driver used for accessing file + * @param blk: BlockBackend for the image * @param sect_num: Number of first sector to check * @param sect_count: Number of sectors to check * @param filename: Name of disk file we are checking (logging purpose) * @param buffer: Allocated buffer for storing read data * @param quiet: Flag for quiet mode */ -static int check_empty_sectors(BlockDriverState *bs, int64_t sect_num, +static int check_empty_sectors(BlockBackend *blk, int64_t sect_num, int sect_count, const char *filename, uint8_t *buffer, bool quiet) { int pnum, ret = 0; - ret = bdrv_read(bs, sect_num, buffer, sect_count); + ret = blk_read(blk, sect_num, buffer, sect_count); if (ret < 0) { error_report("Error while reading offset %" PRId64 " of %s: %s", sectors_to_bytes(sect_num), filename, strerror(-ret)); @@ -1138,16 +1132,16 @@ static int img_compare(int argc, char **argv) } bs2 = blk_bs(blk2); - buf1 = qemu_blockalign(bs1, IO_BUF_SIZE); - buf2 = qemu_blockalign(bs2, IO_BUF_SIZE); - total_sectors1 = bdrv_nb_sectors(bs1); + buf1 = blk_blockalign(blk1, IO_BUF_SIZE); + buf2 = blk_blockalign(blk2, IO_BUF_SIZE); + total_sectors1 = blk_nb_sectors(blk1); if (total_sectors1 < 0) { error_report("Can't get size of %s: %s", filename1, strerror(-total_sectors1)); ret = 4; goto out; } - total_sectors2 = bdrv_nb_sectors(bs2); + total_sectors2 = blk_nb_sectors(blk2); if (total_sectors2 < 0) { error_report("Can't get size of %s: %s", filename2, strerror(-total_sectors2)); @@ -1189,7 +1183,7 @@ static int img_compare(int argc, char **argv) if (allocated1 == allocated2) { if (allocated1) { - ret = bdrv_read(bs1, sector_num, buf1, nb_sectors); + ret = blk_read(blk1, sector_num, buf1, nb_sectors); if (ret < 0) { error_report("Error while reading offset %" PRId64 " of %s:" " %s", sectors_to_bytes(sector_num), filename1, @@ -1197,7 +1191,7 @@ static int img_compare(int argc, char **argv) ret = 4; goto out; } - ret = bdrv_read(bs2, sector_num, buf2, nb_sectors); + ret = blk_read(blk2, sector_num, buf2, nb_sectors); if (ret < 0) { error_report("Error while reading offset %" PRId64 " of %s: %s", sectors_to_bytes(sector_num), @@ -1224,10 +1218,10 @@ static int img_compare(int argc, char **argv) } if (allocated1) { - ret = check_empty_sectors(bs1, sector_num, nb_sectors, + ret = check_empty_sectors(blk1, sector_num, nb_sectors, filename1, buf1, quiet); } else { - ret = check_empty_sectors(bs2, sector_num, nb_sectors, + ret = check_empty_sectors(blk2, sector_num, nb_sectors, filename2, buf1, quiet); } if (ret) { @@ -1244,18 +1238,18 @@ static int img_compare(int argc, char **argv) } if (total_sectors1 != total_sectors2) { - BlockDriverState *bs_over; + BlockBackend *blk_over; int64_t total_sectors_over; const char *filename_over; qprintf(quiet, "Warning: Image size mismatch!\n"); if (total_sectors1 > total_sectors2) { total_sectors_over = total_sectors1; - bs_over = bs1; + blk_over = blk1; filename_over = filename1; } else { total_sectors_over = total_sectors2; - bs_over = bs2; + blk_over = blk2; filename_over = filename2; } @@ -1264,7 +1258,7 @@ static int img_compare(int argc, char **argv) if (nb_sectors <= 0) { break; } - ret = bdrv_is_allocated_above(bs_over, NULL, sector_num, + ret = bdrv_is_allocated_above(blk_bs(blk_over), NULL, sector_num, nb_sectors, &pnum); if (ret < 0) { ret = 3; @@ -1275,7 +1269,7 @@ static int img_compare(int argc, char **argv) } nb_sectors = pnum; if (ret) { - ret = check_empty_sectors(bs_over, sector_num, nb_sectors, + ret = check_empty_sectors(blk_over, sector_num, nb_sectors, filename_over, buf1, quiet); if (ret) { if (ret < 0) { @@ -1484,7 +1478,7 @@ static int img_convert(int argc, char **argv) goto out; } bs[bs_i] = blk_bs(blk[bs_i]); - bs_sectors[bs_i] = bdrv_nb_sectors(bs[bs_i]); + bs_sectors[bs_i] = blk_nb_sectors(blk[bs_i]); if (bs_sectors[bs_i] < 0) { error_report("Could not get size of %s: %s", argv[optind + bs_i], strerror(-bs_sectors[bs_i])); @@ -1524,41 +1518,44 @@ static int img_convert(int argc, char **argv) goto out; } - proto_drv = bdrv_find_protocol(out_filename, true); + proto_drv = bdrv_find_protocol(out_filename, true, &local_err); if (!proto_drv) { - error_report("Unknown protocol '%s'", out_filename); + qerror_report_err(local_err); + error_free(local_err); ret = -1; goto out; } - if (!drv->create_opts) { - error_report("Format driver '%s' does not support image creation", - drv->format_name); - ret = -1; - goto out; - } + if (!skip_create) { + if (!drv->create_opts) { + error_report("Format driver '%s' does not support image creation", + drv->format_name); + ret = -1; + goto out; + } - if (!proto_drv->create_opts) { - error_report("Protocol driver '%s' does not support image creation", - proto_drv->format_name); - ret = -1; - goto out; - } + if (!proto_drv->create_opts) { + error_report("Protocol driver '%s' does not support image creation", + proto_drv->format_name); + ret = -1; + goto out; + } - create_opts = qemu_opts_append(create_opts, drv->create_opts); - create_opts = qemu_opts_append(create_opts, proto_drv->create_opts); + create_opts = qemu_opts_append(create_opts, drv->create_opts); + create_opts = qemu_opts_append(create_opts, proto_drv->create_opts); - opts = qemu_opts_create(create_opts, NULL, 0, &error_abort); - if (options && qemu_opts_do_parse(opts, options, NULL)) { - error_report("Invalid options for file format '%s'", out_fmt); - ret = -1; - goto out; - } + opts = qemu_opts_create(create_opts, NULL, 0, &error_abort); + if (options && qemu_opts_do_parse(opts, options, NULL)) { + error_report("Invalid options for file format '%s'", out_fmt); + ret = -1; + goto out; + } - qemu_opt_set_number(opts, BLOCK_OPT_SIZE, total_sectors * 512); - ret = add_old_style_options(out_fmt, opts, out_baseimg, NULL); - if (ret < 0) { - goto out; + qemu_opt_set_number(opts, BLOCK_OPT_SIZE, total_sectors * 512); + ret = add_old_style_options(out_fmt, opts, out_baseimg, NULL); + if (ret < 0) { + goto out; + } } /* Get backing file name if -o backing_file was used */ @@ -1633,10 +1630,10 @@ static int img_convert(int argc, char **argv) out_bs->bl.discard_alignment)) ); - buf = qemu_blockalign(out_bs, bufsectors * BDRV_SECTOR_SIZE); + buf = blk_blockalign(out_blk, bufsectors * BDRV_SECTOR_SIZE); if (skip_create) { - int64_t output_sectors = bdrv_nb_sectors(out_bs); + int64_t output_sectors = blk_nb_sectors(out_blk); if (output_sectors < 0) { error_report("unable to get output image length: %s\n", strerror(-output_sectors)); @@ -1704,7 +1701,7 @@ static int img_convert(int argc, char **argv) nlow = remainder > bs_sectors[bs_i] - bs_num ? bs_sectors[bs_i] - bs_num : remainder; - ret = bdrv_read(bs[bs_i], bs_num, buf2, nlow); + ret = blk_read(blk[bs_i], bs_num, buf2, nlow); if (ret < 0) { error_report("error while reading sector %" PRId64 ": %s", bs_num, strerror(-ret)); @@ -1719,7 +1716,7 @@ static int img_convert(int argc, char **argv) assert (remainder == 0); if (!buffer_is_zero(buf, n * BDRV_SECTOR_SIZE)) { - ret = bdrv_write_compressed(out_bs, sector_num, buf, n); + ret = blk_write_compressed(out_blk, sector_num, buf, n); if (ret != 0) { error_report("error while compressing sector %" PRId64 ": %s", sector_num, strerror(-ret)); @@ -1730,7 +1727,7 @@ static int img_convert(int argc, char **argv) qemu_progress_print(100.0 * sector_num / total_sectors, 0); } /* signal EOF to align */ - bdrv_write_compressed(out_bs, 0, NULL, 0); + blk_write_compressed(out_blk, 0, NULL, 0); } else { int64_t sectors_to_read, sectors_read, sector_num_next_status; bool count_allocated_sectors; @@ -1831,7 +1828,7 @@ restart: } n1 = n; - ret = bdrv_read(bs[bs_i], sector_num - bs_offset, buf, n); + ret = blk_read(blk[bs_i], sector_num - bs_offset, buf, n); if (ret < 0) { error_report("error while reading sector %" PRId64 ": %s", sector_num - bs_offset, strerror(-ret)); @@ -1844,7 +1841,7 @@ restart: while (n > 0) { if (!has_zero_init || is_allocated_sectors_min(buf1, n, &n1, min_sparse)) { - ret = bdrv_write(out_bs, sector_num, buf1, n1); + ret = blk_write(out_blk, sector_num, buf1, n1); if (ret < 0) { error_report("error while writing sector %" PRId64 ": %s", sector_num, strerror(-ret)); @@ -2268,7 +2265,7 @@ static int img_map(int argc, char **argv) printf("%-16s%-16s%-16s%s\n", "Offset", "Length", "Mapped to", "File"); } - length = bdrv_getlength(bs); + length = blk_getlength(blk); while (curr.start + curr.length < length) { int64_t nsectors_left; int64_t sector_num; @@ -2437,8 +2434,7 @@ static int img_snapshot(int argc, char **argv) static int img_rebase(int argc, char **argv) { BlockBackend *blk = NULL, *blk_old_backing = NULL, *blk_new_backing = NULL; - BlockDriverState *bs = NULL, *bs_old_backing = NULL, *bs_new_backing = NULL; - BlockDriver *old_backing_drv, *new_backing_drv; + BlockDriverState *bs = NULL; char *filename; const char *fmt, *cache, *src_cache, *out_basefmt, *out_baseimg; int c, flags, src_flags, ret; @@ -2532,22 +2528,8 @@ static int img_rebase(int argc, char **argv) } bs = blk_bs(blk); - /* Find the right drivers for the backing files */ - old_backing_drv = NULL; - new_backing_drv = NULL; - - if (!unsafe && bs->backing_format[0] != '\0') { - old_backing_drv = bdrv_find_format(bs->backing_format); - if (old_backing_drv == NULL) { - error_report("Invalid format name: '%s'", bs->backing_format); - ret = -1; - goto out; - } - } - if (out_basefmt != NULL) { - new_backing_drv = bdrv_find_format(out_basefmt); - if (new_backing_drv == NULL) { + if (bdrv_find_format(out_basefmt) == NULL) { error_report("Invalid format name: '%s'", out_basefmt); ret = -1; goto out; @@ -2557,24 +2539,34 @@ static int img_rebase(int argc, char **argv) /* For safe rebasing we need to compare old and new backing file */ if (!unsafe) { char backing_name[PATH_MAX]; + QDict *options = NULL; + + if (bs->backing_format[0] != '\0') { + options = qdict_new(); + qdict_put(options, "driver", qstring_from_str(bs->backing_format)); + } - blk_old_backing = blk_new_with_bs("old_backing", &error_abort); - bs_old_backing = blk_bs(blk_old_backing); bdrv_get_backing_filename(bs, backing_name, sizeof(backing_name)); - ret = bdrv_open(&bs_old_backing, backing_name, NULL, NULL, src_flags, - old_backing_drv, &local_err); - if (ret) { + blk_old_backing = blk_new_open("old_backing", backing_name, NULL, + options, src_flags, &local_err); + if (!blk_old_backing) { error_report("Could not open old backing file '%s': %s", backing_name, error_get_pretty(local_err)); error_free(local_err); goto out; } + if (out_baseimg[0]) { - blk_new_backing = blk_new_with_bs("new_backing", &error_abort); - bs_new_backing = blk_bs(blk_new_backing); - ret = bdrv_open(&bs_new_backing, out_baseimg, NULL, NULL, src_flags, - new_backing_drv, &local_err); - if (ret) { + if (out_basefmt) { + options = qdict_new(); + qdict_put(options, "driver", qstring_from_str(out_basefmt)); + } else { + options = NULL; + } + + blk_new_backing = blk_new_open("new_backing", out_baseimg, NULL, + options, src_flags, &local_err); + if (!blk_new_backing) { error_report("Could not open new backing file '%s': %s", out_baseimg, error_get_pretty(local_err)); error_free(local_err); @@ -2602,17 +2594,17 @@ static int img_rebase(int argc, char **argv) uint8_t * buf_new; float local_progress = 0; - buf_old = qemu_blockalign(bs, IO_BUF_SIZE); - buf_new = qemu_blockalign(bs, IO_BUF_SIZE); + buf_old = blk_blockalign(blk, IO_BUF_SIZE); + buf_new = blk_blockalign(blk, IO_BUF_SIZE); - num_sectors = bdrv_nb_sectors(bs); + num_sectors = blk_nb_sectors(blk); if (num_sectors < 0) { error_report("Could not get size of '%s': %s", filename, strerror(-num_sectors)); ret = -1; goto out; } - old_backing_num_sectors = bdrv_nb_sectors(bs_old_backing); + old_backing_num_sectors = blk_nb_sectors(blk_old_backing); if (old_backing_num_sectors < 0) { char backing_name[PATH_MAX]; @@ -2622,8 +2614,8 @@ static int img_rebase(int argc, char **argv) ret = -1; goto out; } - if (bs_new_backing) { - new_backing_num_sectors = bdrv_nb_sectors(bs_new_backing); + if (blk_new_backing) { + new_backing_num_sectors = blk_nb_sectors(blk_new_backing); if (new_backing_num_sectors < 0) { error_report("Could not get size of '%s': %s", out_baseimg, strerror(-new_backing_num_sectors)); @@ -2668,21 +2660,21 @@ static int img_rebase(int argc, char **argv) n = old_backing_num_sectors - sector; } - ret = bdrv_read(bs_old_backing, sector, buf_old, n); + ret = blk_read(blk_old_backing, sector, buf_old, n); if (ret < 0) { error_report("error while reading from old backing file"); goto out; } } - if (sector >= new_backing_num_sectors || !bs_new_backing) { + if (sector >= new_backing_num_sectors || !blk_new_backing) { memset(buf_new, 0, n * BDRV_SECTOR_SIZE); } else { if (sector + n > new_backing_num_sectors) { n = new_backing_num_sectors - sector; } - ret = bdrv_read(bs_new_backing, sector, buf_new, n); + ret = blk_read(blk_new_backing, sector, buf_new, n); if (ret < 0) { error_report("error while reading from new backing file"); goto out; @@ -2698,8 +2690,8 @@ static int img_rebase(int argc, char **argv) if (compare_sectors(buf_old + written * 512, buf_new + written * 512, n - written, &pnum)) { - ret = bdrv_write(bs, sector + written, - buf_old + written * 512, pnum); + ret = blk_write(blk, sector + written, + buf_old + written * 512, pnum); if (ret < 0) { error_report("Error while writing to COW image: %s", strerror(-ret)); @@ -2764,7 +2756,6 @@ static int img_resize(int argc, char **argv) int64_t n, total_size; bool quiet = false; BlockBackend *blk = NULL; - BlockDriverState *bs = NULL; QemuOpts *param; static QemuOptsList resize_options = { .name = "resize_options", @@ -2846,10 +2837,9 @@ static int img_resize(int argc, char **argv) ret = -1; goto out; } - bs = blk_bs(blk); if (relative) { - total_size = bdrv_getlength(bs) + n * relative; + total_size = blk_getlength(blk) + n * relative; } else { total_size = n; } @@ -2859,7 +2849,7 @@ static int img_resize(int argc, char **argv) goto out; } - ret = bdrv_truncate(bs, total_size); + ret = blk_truncate(blk, total_size); switch (ret) { case 0: qprintf(quiet, "Image resized.\n"); diff --git a/qemu-io-cmds.c b/qemu-io-cmds.c index e70855254a..1afcfc01a5 100644 --- a/qemu-io-cmds.c +++ b/qemu-io-cmds.c @@ -9,10 +9,13 @@ */ #include "qemu-io.h" -#include "block/block_int.h" +#include "sysemu/block-backend.h" +#include "block/block.h" +#include "block/block_int.h" /* for info_f() */ #include "block/qapi.h" #include "qemu/main-loop.h" #include "qemu/timer.h" +#include "sysemu/block-backend.h" #define CMD_NOFILE_OK 0x01 @@ -40,24 +43,24 @@ int qemuio_command_usage(const cmdinfo_t *ci) return 0; } -static int init_check_command(BlockDriverState *bs, const cmdinfo_t *ct) +static int init_check_command(BlockBackend *blk, const cmdinfo_t *ct) { if (ct->flags & CMD_FLAG_GLOBAL) { return 1; } - if (!(ct->flags & CMD_NOFILE_OK) && !bs) { + if (!(ct->flags & CMD_NOFILE_OK) && !blk) { fprintf(stderr, "no file open, try 'help open'\n"); return 0; } return 1; } -static int command(BlockDriverState *bs, const cmdinfo_t *ct, int argc, +static int command(BlockBackend *blk, const cmdinfo_t *ct, int argc, char **argv) { char *cmd = argv[0]; - if (!init_check_command(bs, ct)) { + if (!init_check_command(blk, ct)) { return 0; } @@ -78,7 +81,7 @@ static int command(BlockDriverState *bs, const cmdinfo_t *ct, int argc, return 0; } optind = 0; - return ct->cfunc(bs, argc, argv); + return ct->cfunc(blk, argc, argv); } static const cmdinfo_t *find_command(const char *cmd) @@ -267,14 +270,14 @@ static int parse_pattern(const char *arg) */ #define MISALIGN_OFFSET 16 -static void *qemu_io_alloc(BlockDriverState *bs, size_t len, int pattern) +static void *qemu_io_alloc(BlockBackend *blk, size_t len, int pattern) { void *buf; if (qemuio_misalign) { len += MISALIGN_OFFSET; } - buf = qemu_blockalign(bs, len); + buf = blk_blockalign(blk, len); memset(buf, pattern, len); if (qemuio_misalign) { buf += MISALIGN_OFFSET; @@ -340,7 +343,7 @@ static void print_report(const char *op, struct timeval *t, int64_t offset, * vector matching it. */ static void * -create_iovec(BlockDriverState *bs, QEMUIOVector *qiov, char **argv, int nr_iov, +create_iovec(BlockBackend *blk, QEMUIOVector *qiov, char **argv, int nr_iov, int pattern) { size_t *sizes = g_new0(size_t, nr_iov); @@ -377,7 +380,7 @@ create_iovec(BlockDriverState *bs, QEMUIOVector *qiov, char **argv, int nr_iov, qemu_iovec_init(qiov, nr_iov); - buf = p = qemu_io_alloc(bs, count, pattern); + buf = p = qemu_io_alloc(blk, count, pattern); for (i = 0; i < nr_iov; i++) { qemu_iovec_add(qiov, p, sizes[i]); @@ -389,12 +392,12 @@ fail: return buf; } -static int do_read(BlockDriverState *bs, char *buf, int64_t offset, int count, +static int do_read(BlockBackend *blk, char *buf, int64_t offset, int count, int *total) { int ret; - ret = bdrv_read(bs, offset >> 9, (uint8_t *)buf, count >> 9); + ret = blk_read(blk, offset >> 9, (uint8_t *)buf, count >> 9); if (ret < 0) { return ret; } @@ -402,12 +405,12 @@ static int do_read(BlockDriverState *bs, char *buf, int64_t offset, int count, return 1; } -static int do_write(BlockDriverState *bs, char *buf, int64_t offset, int count, +static int do_write(BlockBackend *blk, char *buf, int64_t offset, int count, int *total) { int ret; - ret = bdrv_write(bs, offset >> 9, (uint8_t *)buf, count >> 9); + ret = blk_write(blk, offset >> 9, (uint8_t *)buf, count >> 9); if (ret < 0) { return ret; } @@ -415,20 +418,20 @@ static int do_write(BlockDriverState *bs, char *buf, int64_t offset, int count, return 1; } -static int do_pread(BlockDriverState *bs, char *buf, int64_t offset, int count, +static int do_pread(BlockBackend *blk, char *buf, int64_t offset, int count, int *total) { - *total = bdrv_pread(bs, offset, (uint8_t *)buf, count); + *total = blk_pread(blk, offset, (uint8_t *)buf, count); if (*total < 0) { return *total; } return 1; } -static int do_pwrite(BlockDriverState *bs, char *buf, int64_t offset, int count, +static int do_pwrite(BlockBackend *blk, char *buf, int64_t offset, int count, int *total) { - *total = bdrv_pwrite(bs, offset, (uint8_t *)buf, count); + *total = blk_pwrite(blk, offset, (uint8_t *)buf, count); if (*total < 0) { return *total; } @@ -436,7 +439,7 @@ static int do_pwrite(BlockDriverState *bs, char *buf, int64_t offset, int count, } typedef struct { - BlockDriverState *bs; + BlockBackend *blk; int64_t offset; int count; int *total; @@ -448,8 +451,8 @@ static void coroutine_fn co_write_zeroes_entry(void *opaque) { CoWriteZeroes *data = opaque; - data->ret = bdrv_co_write_zeroes(data->bs, data->offset / BDRV_SECTOR_SIZE, - data->count / BDRV_SECTOR_SIZE, 0); + data->ret = blk_co_write_zeroes(data->blk, data->offset / BDRV_SECTOR_SIZE, + data->count / BDRV_SECTOR_SIZE, 0); data->done = true; if (data->ret < 0) { *data->total = data->ret; @@ -459,12 +462,12 @@ static void coroutine_fn co_write_zeroes_entry(void *opaque) *data->total = data->count; } -static int do_co_write_zeroes(BlockDriverState *bs, int64_t offset, int count, +static int do_co_write_zeroes(BlockBackend *blk, int64_t offset, int count, int *total) { Coroutine *co; CoWriteZeroes data = { - .bs = bs, + .blk = blk, .offset = offset, .count = count, .total = total, @@ -474,7 +477,7 @@ static int do_co_write_zeroes(BlockDriverState *bs, int64_t offset, int count, co = qemu_coroutine_create(co_write_zeroes_entry); qemu_coroutine_enter(co, &data); while (!data.done) { - aio_poll(bdrv_get_aio_context(bs), true); + aio_poll(blk_get_aio_context(blk), true); } if (data.ret < 0) { return data.ret; @@ -483,12 +486,12 @@ static int do_co_write_zeroes(BlockDriverState *bs, int64_t offset, int count, } } -static int do_write_compressed(BlockDriverState *bs, char *buf, int64_t offset, +static int do_write_compressed(BlockBackend *blk, char *buf, int64_t offset, int count, int *total) { int ret; - ret = bdrv_write_compressed(bs, offset >> 9, (uint8_t *)buf, count >> 9); + ret = blk_write_compressed(blk, offset >> 9, (uint8_t *)buf, count >> 9); if (ret < 0) { return ret; } @@ -496,20 +499,20 @@ static int do_write_compressed(BlockDriverState *bs, char *buf, int64_t offset, return 1; } -static int do_load_vmstate(BlockDriverState *bs, char *buf, int64_t offset, +static int do_load_vmstate(BlockBackend *blk, char *buf, int64_t offset, int count, int *total) { - *total = bdrv_load_vmstate(bs, (uint8_t *)buf, offset, count); + *total = blk_load_vmstate(blk, (uint8_t *)buf, offset, count); if (*total < 0) { return *total; } return 1; } -static int do_save_vmstate(BlockDriverState *bs, char *buf, int64_t offset, +static int do_save_vmstate(BlockBackend *blk, char *buf, int64_t offset, int count, int *total) { - *total = bdrv_save_vmstate(bs, (uint8_t *)buf, offset, count); + *total = blk_save_vmstate(blk, (uint8_t *)buf, offset, count); if (*total < 0) { return *total; } @@ -522,13 +525,13 @@ static void aio_rw_done(void *opaque, int ret) *(int *)opaque = ret; } -static int do_aio_readv(BlockDriverState *bs, QEMUIOVector *qiov, +static int do_aio_readv(BlockBackend *blk, QEMUIOVector *qiov, int64_t offset, int *total) { int async_ret = NOT_DONE; - bdrv_aio_readv(bs, offset >> 9, qiov, qiov->size >> 9, - aio_rw_done, &async_ret); + blk_aio_readv(blk, offset >> 9, qiov, qiov->size >> 9, + aio_rw_done, &async_ret); while (async_ret == NOT_DONE) { main_loop_wait(false); } @@ -537,13 +540,13 @@ static int do_aio_readv(BlockDriverState *bs, QEMUIOVector *qiov, return async_ret < 0 ? async_ret : 1; } -static int do_aio_writev(BlockDriverState *bs, QEMUIOVector *qiov, +static int do_aio_writev(BlockBackend *blk, QEMUIOVector *qiov, int64_t offset, int *total) { int async_ret = NOT_DONE; - bdrv_aio_writev(bs, offset >> 9, qiov, qiov->size >> 9, - aio_rw_done, &async_ret); + blk_aio_writev(blk, offset >> 9, qiov, qiov->size >> 9, + aio_rw_done, &async_ret); while (async_ret == NOT_DONE) { main_loop_wait(false); } @@ -567,7 +570,7 @@ static void multiwrite_cb(void *opaque, int ret) } } -static int do_aio_multiwrite(BlockDriverState *bs, BlockRequest* reqs, +static int do_aio_multiwrite(BlockBackend *blk, BlockRequest* reqs, int num_reqs, int *total) { int i, ret; @@ -583,7 +586,7 @@ static int do_aio_multiwrite(BlockDriverState *bs, BlockRequest* reqs, *total += reqs[i].qiov->size; } - ret = bdrv_aio_multiwrite(bs, reqs, num_reqs); + ret = blk_aio_multiwrite(blk, reqs, num_reqs); if (ret < 0) { return ret; } @@ -609,7 +612,7 @@ static void read_help(void) " -b, -- read from the VM state rather than the virtual disk\n" " -C, -- report statistics in a machine parsable format\n" " -l, -- length for pattern verification (only with -P)\n" -" -p, -- use bdrv_pread to read the file\n" +" -p, -- use blk_pread to read the file\n" " -P, -- use a pattern to verify read data\n" " -q, -- quiet mode, do not show I/O statistics\n" " -s, -- start offset for pattern verification (only with -P)\n" @@ -617,7 +620,7 @@ static void read_help(void) "\n"); } -static int read_f(BlockDriverState *bs, int argc, char **argv); +static int read_f(BlockBackend *blk, int argc, char **argv); static const cmdinfo_t read_cmd = { .name = "read", @@ -630,7 +633,7 @@ static const cmdinfo_t read_cmd = { .help = read_help, }; -static int read_f(BlockDriverState *bs, int argc, char **argv) +static int read_f(BlockBackend *blk, int argc, char **argv) { struct timeval t1, t2; int Cflag = 0, pflag = 0, qflag = 0, vflag = 0; @@ -736,15 +739,15 @@ static int read_f(BlockDriverState *bs, int argc, char **argv) } } - buf = qemu_io_alloc(bs, count, 0xab); + buf = qemu_io_alloc(blk, count, 0xab); gettimeofday(&t1, NULL); if (pflag) { - cnt = do_pread(bs, buf, offset, count, &total); + cnt = do_pread(blk, buf, offset, count, &total); } else if (bflag) { - cnt = do_load_vmstate(bs, buf, offset, count, &total); + cnt = do_load_vmstate(blk, buf, offset, count, &total); } else { - cnt = do_read(bs, buf, offset, count, &total); + cnt = do_read(blk, buf, offset, count, &total); } gettimeofday(&t2, NULL); @@ -801,7 +804,7 @@ static void readv_help(void) "\n"); } -static int readv_f(BlockDriverState *bs, int argc, char **argv); +static int readv_f(BlockBackend *blk, int argc, char **argv); static const cmdinfo_t readv_cmd = { .name = "readv", @@ -813,7 +816,7 @@ static const cmdinfo_t readv_cmd = { .help = readv_help, }; -static int readv_f(BlockDriverState *bs, int argc, char **argv) +static int readv_f(BlockBackend *blk, int argc, char **argv) { struct timeval t1, t2; int Cflag = 0, qflag = 0, vflag = 0; @@ -869,13 +872,13 @@ static int readv_f(BlockDriverState *bs, int argc, char **argv) } nr_iov = argc - optind; - buf = create_iovec(bs, &qiov, &argv[optind], nr_iov, 0xab); + buf = create_iovec(blk, &qiov, &argv[optind], nr_iov, 0xab); if (buf == NULL) { return 0; } gettimeofday(&t1, NULL); - cnt = do_aio_readv(bs, &qiov, offset, &total); + cnt = do_aio_readv(blk, &qiov, offset, &total); gettimeofday(&t2, NULL); if (cnt < 0) { @@ -923,16 +926,16 @@ static void write_help(void) " Writes into a segment of the currently open file, using a buffer\n" " filled with a set pattern (0xcdcdcdcd).\n" " -b, -- write to the VM state rather than the virtual disk\n" -" -c, -- write compressed data with bdrv_write_compressed\n" -" -p, -- use bdrv_pwrite to write the file\n" +" -c, -- write compressed data with blk_write_compressed\n" +" -p, -- use blk_pwrite to write the file\n" " -P, -- use different pattern to fill file\n" " -C, -- report statistics in a machine parsable format\n" " -q, -- quiet mode, do not show I/O statistics\n" -" -z, -- write zeroes using bdrv_co_write_zeroes\n" +" -z, -- write zeroes using blk_co_write_zeroes\n" "\n"); } -static int write_f(BlockDriverState *bs, int argc, char **argv); +static int write_f(BlockBackend *blk, int argc, char **argv); static const cmdinfo_t write_cmd = { .name = "write", @@ -945,7 +948,7 @@ static const cmdinfo_t write_cmd = { .help = write_help, }; -static int write_f(BlockDriverState *bs, int argc, char **argv) +static int write_f(BlockBackend *blk, int argc, char **argv) { struct timeval t1, t2; int Cflag = 0, pflag = 0, qflag = 0, bflag = 0, Pflag = 0, zflag = 0; @@ -1032,20 +1035,20 @@ static int write_f(BlockDriverState *bs, int argc, char **argv) } if (!zflag) { - buf = qemu_io_alloc(bs, count, pattern); + buf = qemu_io_alloc(blk, count, pattern); } gettimeofday(&t1, NULL); if (pflag) { - cnt = do_pwrite(bs, buf, offset, count, &total); + cnt = do_pwrite(blk, buf, offset, count, &total); } else if (bflag) { - cnt = do_save_vmstate(bs, buf, offset, count, &total); + cnt = do_save_vmstate(blk, buf, offset, count, &total); } else if (zflag) { - cnt = do_co_write_zeroes(bs, offset, count, &total); + cnt = do_co_write_zeroes(blk, offset, count, &total); } else if (cflag) { - cnt = do_write_compressed(bs, buf, offset, count, &total); + cnt = do_write_compressed(blk, buf, offset, count, &total); } else { - cnt = do_write(bs, buf, offset, count, &total); + cnt = do_write(blk, buf, offset, count, &total); } gettimeofday(&t2, NULL); @@ -1088,7 +1091,7 @@ writev_help(void) "\n"); } -static int writev_f(BlockDriverState *bs, int argc, char **argv); +static int writev_f(BlockBackend *blk, int argc, char **argv); static const cmdinfo_t writev_cmd = { .name = "writev", @@ -1100,7 +1103,7 @@ static const cmdinfo_t writev_cmd = { .help = writev_help, }; -static int writev_f(BlockDriverState *bs, int argc, char **argv) +static int writev_f(BlockBackend *blk, int argc, char **argv) { struct timeval t1, t2; int Cflag = 0, qflag = 0; @@ -1150,13 +1153,13 @@ static int writev_f(BlockDriverState *bs, int argc, char **argv) } nr_iov = argc - optind; - buf = create_iovec(bs, &qiov, &argv[optind], nr_iov, pattern); + buf = create_iovec(blk, &qiov, &argv[optind], nr_iov, pattern); if (buf == NULL) { return 0; } gettimeofday(&t1, NULL); - cnt = do_aio_writev(bs, &qiov, offset, &total); + cnt = do_aio_writev(blk, &qiov, offset, &total); gettimeofday(&t2, NULL); if (cnt < 0) { @@ -1197,7 +1200,7 @@ static void multiwrite_help(void) "\n"); } -static int multiwrite_f(BlockDriverState *bs, int argc, char **argv); +static int multiwrite_f(BlockBackend *blk, int argc, char **argv); static const cmdinfo_t multiwrite_cmd = { .name = "multiwrite", @@ -1209,7 +1212,7 @@ static const cmdinfo_t multiwrite_cmd = { .help = multiwrite_help, }; -static int multiwrite_f(BlockDriverState *bs, int argc, char **argv) +static int multiwrite_f(BlockBackend *blk, int argc, char **argv) { struct timeval t1, t2; int Cflag = 0, qflag = 0; @@ -1290,7 +1293,7 @@ static int multiwrite_f(BlockDriverState *bs, int argc, char **argv) nr_iov = j - optind; /* Build request */ - buf[i] = create_iovec(bs, &qiovs[i], &argv[optind], nr_iov, pattern); + buf[i] = create_iovec(blk, &qiovs[i], &argv[optind], nr_iov, pattern); if (buf[i] == NULL) { goto out; } @@ -1308,7 +1311,7 @@ static int multiwrite_f(BlockDriverState *bs, int argc, char **argv) nr_reqs = i; gettimeofday(&t1, NULL); - cnt = do_aio_multiwrite(bs, reqs, nr_reqs, &total); + cnt = do_aio_multiwrite(blk, reqs, nr_reqs, &total); gettimeofday(&t2, NULL); if (cnt < 0) { @@ -1337,6 +1340,7 @@ out: } struct aio_ctx { + BlockBackend *blk; QEMUIOVector qiov; int64_t offset; char *buf; @@ -1344,6 +1348,7 @@ struct aio_ctx { int vflag; int Cflag; int Pflag; + BlockAcctCookie acct; int pattern; struct timeval t1; }; @@ -1361,6 +1366,8 @@ static void aio_write_done(void *opaque, int ret) goto out; } + block_acct_done(blk_get_stats(ctx->blk), &ctx->acct); + if (ctx->qflag) { goto out; } @@ -1398,6 +1405,8 @@ static void aio_read_done(void *opaque, int ret) g_free(cmp_buf); } + block_acct_done(blk_get_stats(ctx->blk), &ctx->acct); + if (ctx->qflag) { goto out; } @@ -1436,7 +1445,7 @@ static void aio_read_help(void) "\n"); } -static int aio_read_f(BlockDriverState *bs, int argc, char **argv); +static int aio_read_f(BlockBackend *blk, int argc, char **argv); static const cmdinfo_t aio_read_cmd = { .name = "aio_read", @@ -1448,11 +1457,12 @@ static const cmdinfo_t aio_read_cmd = { .help = aio_read_help, }; -static int aio_read_f(BlockDriverState *bs, int argc, char **argv) +static int aio_read_f(BlockBackend *blk, int argc, char **argv) { int nr_iov, c; struct aio_ctx *ctx = g_new0(struct aio_ctx, 1); + ctx->blk = blk; while ((c = getopt(argc, argv, "CP:qv")) != EOF) { switch (c) { case 'C': @@ -1499,15 +1509,17 @@ static int aio_read_f(BlockDriverState *bs, int argc, char **argv) } nr_iov = argc - optind; - ctx->buf = create_iovec(bs, &ctx->qiov, &argv[optind], nr_iov, 0xab); + ctx->buf = create_iovec(blk, &ctx->qiov, &argv[optind], nr_iov, 0xab); if (ctx->buf == NULL) { g_free(ctx); return 0; } gettimeofday(&ctx->t1, NULL); - bdrv_aio_readv(bs, ctx->offset >> 9, &ctx->qiov, - ctx->qiov.size >> 9, aio_read_done, ctx); + block_acct_start(blk_get_stats(blk), &ctx->acct, ctx->qiov.size, + BLOCK_ACCT_READ); + blk_aio_readv(blk, ctx->offset >> 9, &ctx->qiov, + ctx->qiov.size >> 9, aio_read_done, ctx); return 0; } @@ -1531,7 +1543,7 @@ static void aio_write_help(void) "\n"); } -static int aio_write_f(BlockDriverState *bs, int argc, char **argv); +static int aio_write_f(BlockBackend *blk, int argc, char **argv); static const cmdinfo_t aio_write_cmd = { .name = "aio_write", @@ -1543,12 +1555,13 @@ static const cmdinfo_t aio_write_cmd = { .help = aio_write_help, }; -static int aio_write_f(BlockDriverState *bs, int argc, char **argv) +static int aio_write_f(BlockBackend *blk, int argc, char **argv) { int nr_iov, c; int pattern = 0xcd; struct aio_ctx *ctx = g_new0(struct aio_ctx, 1); + ctx->blk = blk; while ((c = getopt(argc, argv, "CqP:")) != EOF) { switch (c) { case 'C': @@ -1591,21 +1604,23 @@ static int aio_write_f(BlockDriverState *bs, int argc, char **argv) } nr_iov = argc - optind; - ctx->buf = create_iovec(bs, &ctx->qiov, &argv[optind], nr_iov, pattern); + ctx->buf = create_iovec(blk, &ctx->qiov, &argv[optind], nr_iov, pattern); if (ctx->buf == NULL) { g_free(ctx); return 0; } gettimeofday(&ctx->t1, NULL); - bdrv_aio_writev(bs, ctx->offset >> 9, &ctx->qiov, - ctx->qiov.size >> 9, aio_write_done, ctx); + block_acct_start(blk_get_stats(blk), &ctx->acct, ctx->qiov.size, + BLOCK_ACCT_WRITE); + blk_aio_writev(blk, ctx->offset >> 9, &ctx->qiov, + ctx->qiov.size >> 9, aio_write_done, ctx); return 0; } -static int aio_flush_f(BlockDriverState *bs, int argc, char **argv) +static int aio_flush_f(BlockBackend *blk, int argc, char **argv) { - bdrv_drain_all(); + blk_drain_all(); return 0; } @@ -1615,9 +1630,9 @@ static const cmdinfo_t aio_flush_cmd = { .oneline = "completes all outstanding aio requests" }; -static int flush_f(BlockDriverState *bs, int argc, char **argv) +static int flush_f(BlockBackend *blk, int argc, char **argv) { - bdrv_flush(bs); + blk_flush(blk); return 0; } @@ -1628,7 +1643,7 @@ static const cmdinfo_t flush_cmd = { .oneline = "flush all in-core file state to disk", }; -static int truncate_f(BlockDriverState *bs, int argc, char **argv) +static int truncate_f(BlockBackend *blk, int argc, char **argv) { int64_t offset; int ret; @@ -1639,7 +1654,7 @@ static int truncate_f(BlockDriverState *bs, int argc, char **argv) return 0; } - ret = bdrv_truncate(bs, offset); + ret = blk_truncate(blk, offset); if (ret < 0) { printf("truncate: %s\n", strerror(-ret)); return 0; @@ -1658,12 +1673,12 @@ static const cmdinfo_t truncate_cmd = { .oneline = "truncates the current file at the given offset", }; -static int length_f(BlockDriverState *bs, int argc, char **argv) +static int length_f(BlockBackend *blk, int argc, char **argv) { int64_t size; char s1[64]; - size = bdrv_getlength(bs); + size = blk_getlength(blk); if (size < 0) { printf("getlength: %s\n", strerror(-size)); return 0; @@ -1683,8 +1698,9 @@ static const cmdinfo_t length_cmd = { }; -static int info_f(BlockDriverState *bs, int argc, char **argv) +static int info_f(BlockBackend *blk, int argc, char **argv) { + BlockDriverState *bs = blk_bs(blk); BlockDriverInfo bdi; ImageInfoSpecific *spec_info; char s1[64], s2[64]; @@ -1742,7 +1758,7 @@ static void discard_help(void) "\n"); } -static int discard_f(BlockDriverState *bs, int argc, char **argv); +static int discard_f(BlockBackend *blk, int argc, char **argv); static const cmdinfo_t discard_cmd = { .name = "discard", @@ -1755,7 +1771,7 @@ static const cmdinfo_t discard_cmd = { .help = discard_help, }; -static int discard_f(BlockDriverState *bs, int argc, char **argv) +static int discard_f(BlockBackend *blk, int argc, char **argv) { struct timeval t1, t2; int Cflag = 0, qflag = 0; @@ -1794,8 +1810,8 @@ static int discard_f(BlockDriverState *bs, int argc, char **argv) } gettimeofday(&t1, NULL); - ret = bdrv_discard(bs, offset >> BDRV_SECTOR_BITS, - count >> BDRV_SECTOR_BITS); + ret = blk_discard(blk, offset >> BDRV_SECTOR_BITS, + count >> BDRV_SECTOR_BITS); gettimeofday(&t2, NULL); if (ret < 0) { @@ -1813,8 +1829,9 @@ out: return 0; } -static int alloc_f(BlockDriverState *bs, int argc, char **argv) +static int alloc_f(BlockBackend *blk, int argc, char **argv) { + BlockDriverState *bs = blk_bs(blk); int64_t offset, sector_num; int nb_sectors, remaining; char s1[64]; @@ -1910,20 +1927,27 @@ static int map_is_allocated(BlockDriverState *bs, int64_t sector_num, return firstret; } -static int map_f(BlockDriverState *bs, int argc, char **argv) +static int map_f(BlockBackend *blk, int argc, char **argv) { int64_t offset; - int64_t nb_sectors; + int64_t nb_sectors, total_sectors; char s1[64]; int64_t num; int ret; const char *retstr; offset = 0; - nb_sectors = bs->total_sectors; + total_sectors = blk_nb_sectors(blk); + if (total_sectors < 0) { + error_report("Failed to query image length: %s", + strerror(-total_sectors)); + return 0; + } + + nb_sectors = total_sectors; do { - ret = map_is_allocated(bs, offset, nb_sectors, &num); + ret = map_is_allocated(blk_bs(blk), offset, nb_sectors, &num); if (ret < 0) { error_report("Failed to get allocation status: %s", strerror(-ret)); return 0; @@ -1940,7 +1964,7 @@ static int map_f(BlockDriverState *bs, int argc, char **argv) offset += num; nb_sectors -= num; - } while (offset < bs->total_sectors); + } while (offset < total_sectors); return 0; } @@ -1954,11 +1978,11 @@ static const cmdinfo_t map_cmd = { .oneline = "prints the allocated areas of a file", }; -static int break_f(BlockDriverState *bs, int argc, char **argv) +static int break_f(BlockBackend *blk, int argc, char **argv) { int ret; - ret = bdrv_debug_breakpoint(bs, argv[1], argv[2]); + ret = bdrv_debug_breakpoint(blk_bs(blk), argv[1], argv[2]); if (ret < 0) { printf("Could not set breakpoint: %s\n", strerror(-ret)); } @@ -1966,11 +1990,11 @@ static int break_f(BlockDriverState *bs, int argc, char **argv) return 0; } -static int remove_break_f(BlockDriverState *bs, int argc, char **argv) +static int remove_break_f(BlockBackend *blk, int argc, char **argv) { int ret; - ret = bdrv_debug_remove_breakpoint(bs, argv[1]); + ret = bdrv_debug_remove_breakpoint(blk_bs(blk), argv[1]); if (ret < 0) { printf("Could not remove breakpoint %s: %s\n", argv[1], strerror(-ret)); } @@ -1997,11 +2021,11 @@ static const cmdinfo_t remove_break_cmd = { .oneline = "remove a breakpoint by tag", }; -static int resume_f(BlockDriverState *bs, int argc, char **argv) +static int resume_f(BlockBackend *blk, int argc, char **argv) { int ret; - ret = bdrv_debug_resume(bs, argv[1]); + ret = bdrv_debug_resume(blk_bs(blk), argv[1]); if (ret < 0) { printf("Could not resume request: %s\n", strerror(-ret)); } @@ -2018,10 +2042,10 @@ static const cmdinfo_t resume_cmd = { .oneline = "resumes the request tagged as tag", }; -static int wait_break_f(BlockDriverState *bs, int argc, char **argv) +static int wait_break_f(BlockBackend *blk, int argc, char **argv) { - while (!bdrv_debug_is_suspended(bs, argv[1])) { - aio_poll(bdrv_get_aio_context(bs), true); + while (!bdrv_debug_is_suspended(blk_bs(blk), argv[1])) { + aio_poll(blk_get_aio_context(blk), true); } return 0; @@ -2036,7 +2060,7 @@ static const cmdinfo_t wait_break_cmd = { .oneline = "waits for the suspension of a request", }; -static int abort_f(BlockDriverState *bs, int argc, char **argv) +static int abort_f(BlockBackend *blk, int argc, char **argv) { abort(); } @@ -2062,7 +2086,7 @@ static void sigraise_help(void) "\n", SIGTERM); } -static int sigraise_f(BlockDriverState *bs, int argc, char **argv); +static int sigraise_f(BlockBackend *blk, int argc, char **argv); static const cmdinfo_t sigraise_cmd = { .name = "sigraise", @@ -2075,7 +2099,7 @@ static const cmdinfo_t sigraise_cmd = { .help = sigraise_help, }; -static int sigraise_f(BlockDriverState *bs, int argc, char **argv) +static int sigraise_f(BlockBackend *blk, int argc, char **argv) { int sig = cvtnum(argv[1]); if (sig < 0) { @@ -2099,7 +2123,7 @@ static void sleep_cb(void *opaque) *expired = true; } -static int sleep_f(BlockDriverState *bs, int argc, char **argv) +static int sleep_f(BlockBackend *blk, int argc, char **argv) { char *endptr; long ms; @@ -2168,7 +2192,7 @@ static void help_all(void) printf("\nUse 'help commandname' for extended help.\n"); } -static int help_f(BlockDriverState *bs, int argc, char **argv) +static int help_f(BlockBackend *blk, int argc, char **argv) { const cmdinfo_t *ct; @@ -2198,7 +2222,7 @@ static const cmdinfo_t help_cmd = { .oneline = "help for one or all commands", }; -bool qemuio_command(BlockDriverState *bs, const char *cmd) +bool qemuio_command(BlockBackend *blk, const char *cmd) { char *input; const cmdinfo_t *ct; @@ -2211,7 +2235,7 @@ bool qemuio_command(BlockDriverState *bs, const char *cmd) if (c) { ct = find_command(v[0]); if (ct) { - done = command(bs, ct, c, v); + done = command(blk, ct, c, v); } else { fprintf(stderr, "command \"%s\" not found\n", v[0]); } @@ -28,7 +28,6 @@ static char *progname; static BlockBackend *qemuio_blk; -static BlockDriverState *qemuio_bs; /* qemu-io commands passed using -c */ static int ncmdline; @@ -36,10 +35,9 @@ static char **cmdline; static ReadLineState *readline_state; -static int close_f(BlockDriverState *bs, int argc, char **argv) +static int close_f(BlockBackend *blk, int argc, char **argv) { blk_unref(qemuio_blk); - qemuio_bs = NULL; qemuio_blk = NULL; return 0; } @@ -51,32 +49,22 @@ static const cmdinfo_t close_cmd = { .oneline = "close the current open file", }; -static int openfile(char *name, BlockDriver *drv, int flags, int growable, - QDict *opts) +static int openfile(char *name, int flags, QDict *opts) { Error *local_err = NULL; - if (qemuio_bs) { + if (qemuio_blk) { fprintf(stderr, "file open already, try 'help close'\n"); QDECREF(opts); return 1; } - qemuio_blk = blk_new_with_bs("hda", &error_abort); - qemuio_bs = blk_bs(qemuio_blk); - - if (growable) { - flags |= BDRV_O_PROTOCOL; - } - - if (bdrv_open(&qemuio_bs, name, NULL, opts, flags, drv, &local_err) < 0) { + qemuio_blk = blk_new_open("hda", name, NULL, opts, flags, &local_err); + if (!qemuio_blk) { fprintf(stderr, "%s: can't open%s%s: %s\n", progname, name ? " device " : "", name ?: "", error_get_pretty(local_err)); error_free(local_err); - blk_unref(qemuio_blk); - qemuio_bs = NULL; - qemuio_blk = NULL; return 1; } @@ -96,12 +84,11 @@ static void open_help(void) " -r, -- open file read-only\n" " -s, -- use snapshot file\n" " -n, -- disable host cache\n" -" -g, -- allow file to grow (only applies to protocols)\n" " -o, -- options to be given to the block driver" "\n"); } -static int open_f(BlockDriverState *bs, int argc, char **argv); +static int open_f(BlockBackend *blk, int argc, char **argv); static const cmdinfo_t open_cmd = { .name = "open", @@ -125,11 +112,10 @@ static QemuOptsList empty_opts = { }, }; -static int open_f(BlockDriverState *bs, int argc, char **argv) +static int open_f(BlockBackend *blk, int argc, char **argv) { int flags = 0; int readonly = 0; - int growable = 0; int c; QemuOpts *qopts; QDict *opts; @@ -145,9 +131,6 @@ static int open_f(BlockDriverState *bs, int argc, char **argv) case 'r': readonly = 1; break; - case 'g': - growable = 1; - break; case 'o': if (!qemu_opts_parse(&empty_opts, optarg, 0)) { printf("could not parse option list -- %s\n", optarg); @@ -170,16 +153,16 @@ static int open_f(BlockDriverState *bs, int argc, char **argv) qemu_opts_reset(&empty_opts); if (optind == argc - 1) { - return openfile(argv[optind], NULL, flags, growable, opts); + return openfile(argv[optind], flags, opts); } else if (optind == argc) { - return openfile(NULL, NULL, flags, growable, opts); + return openfile(NULL, flags, opts); } else { QDECREF(opts); return qemuio_command_usage(&open_cmd); } } -static int quit_f(BlockDriverState *bs, int argc, char **argv) +static int quit_f(BlockBackend *blk, int argc, char **argv) { return 1; } @@ -206,7 +189,6 @@ static void usage(const char *name) " -r, --read-only export read-only\n" " -s, --snapshot use snapshot file\n" " -n, --nocache disable host cache\n" -" -g, --growable allow file to grow (only applies to protocols)\n" " -m, --misalign misalign allocations for O_DIRECT\n" " -k, --native-aio use kernel AIO implementation (on Linux only)\n" " -t, --cache=MODE use the given cache mode for the image\n" @@ -317,7 +299,7 @@ static void command_loop(void) char *input; for (i = 0; !done && i < ncmdline; i++) { - done = qemuio_command(qemuio_bs, cmdline[i]); + done = qemuio_command(qemuio_blk, cmdline[i]); } if (cmdline) { g_free(cmdline); @@ -342,7 +324,7 @@ static void command_loop(void) if (input == NULL) { break; } - done = qemuio_command(qemuio_bs, input); + done = qemuio_command(qemuio_blk, input); g_free(input); prompted = 0; @@ -365,7 +347,6 @@ static void reenable_tty_echo(void) int main(int argc, char **argv) { int readonly = 0; - int growable = 0; const char *sopt = "hVc:d:f:rsnmgkt:T:"; const struct option lopt[] = { { "help", 0, NULL, 'h' }, @@ -377,7 +358,6 @@ int main(int argc, char **argv) { "snapshot", 0, NULL, 's' }, { "nocache", 0, NULL, 'n' }, { "misalign", 0, NULL, 'm' }, - { "growable", 0, NULL, 'g' }, { "native-aio", 0, NULL, 'k' }, { "discard", 1, NULL, 'd' }, { "cache", 1, NULL, 't' }, @@ -387,8 +367,8 @@ int main(int argc, char **argv) int c; int opt_index = 0; int flags = BDRV_O_UNMAP; - BlockDriver *drv = NULL; Error *local_error = NULL; + QDict *opts = NULL; #ifdef CONFIG_POSIX signal(SIGPIPE, SIG_IGN); @@ -414,11 +394,10 @@ int main(int argc, char **argv) } break; case 'f': - drv = bdrv_find_format(optarg); - if (!drv) { - error_report("Invalid format '%s'", optarg); - exit(EXIT_FAILURE); + if (!opts) { + opts = qdict_new(); } + qdict_put(opts, "driver", qstring_from_str(optarg)); break; case 'c': add_user_command(optarg); @@ -429,9 +408,6 @@ int main(int argc, char **argv) case 'm': qemuio_misalign = true; break; - case 'g': - growable = 1; - break; case 'k': flags |= BDRV_O_NATIVE_AIO; break; @@ -489,7 +465,7 @@ int main(int argc, char **argv) } if ((argc - optind) == 1) { - openfile(argv[optind], drv, flags, growable, NULL); + openfile(argv[optind], flags, opts); } command_loop(); diff --git a/qemu-nbd.c b/qemu-nbd.c index 4d8df08385..ac1e5d6f77 100644 --- a/qemu-nbd.c +++ b/qemu-nbd.c @@ -391,7 +391,6 @@ int main(int argc, char **argv) { BlockBackend *blk; BlockDriverState *bs; - BlockDriver *drv; off_t dev_offset = 0; uint32_t nbdflags = 0; bool disconnect = false; @@ -434,7 +433,7 @@ int main(int argc, char **argv) char *end; int flags = BDRV_O_RDWR; int partition = -1; - int ret; + int ret = 0; int fd; bool seen_cache = false; bool seen_discard = false; @@ -445,6 +444,7 @@ int main(int argc, char **argv) const char *fmt = NULL; Error *local_err = NULL; BlockdevDetectZeroesOptions detect_zeroes = BLOCKDEV_DETECT_ZEROES_OPTIONS_OFF; + QDict *options = NULL; /* The client thread uses SIGTERM to interrupt the server. A signal * handler ensures that "qemu-nbd -v -c" exits with a nice status code. @@ -689,24 +689,17 @@ int main(int argc, char **argv) atexit(bdrv_close_all); if (fmt) { - drv = bdrv_find_format(fmt); - if (!drv) { - errx(EXIT_FAILURE, "Unknown file format '%s'", fmt); - } - } else { - drv = NULL; + options = qdict_new(); + qdict_put(options, "driver", qstring_from_str(fmt)); } - blk = blk_new_with_bs("hda", &error_abort); - bs = blk_bs(blk); - srcpath = argv[optind]; - ret = bdrv_open(&bs, srcpath, NULL, NULL, flags, drv, &local_err); - if (ret < 0) { - errno = -ret; - err(EXIT_FAILURE, "Failed to bdrv_open '%s': %s", argv[optind], - error_get_pretty(local_err)); + blk = blk_new_open("hda", srcpath, NULL, options, flags, &local_err); + if (!blk) { + errx(EXIT_FAILURE, "Failed to blk_new_open '%s': %s", argv[optind], + error_get_pretty(local_err)); } + bs = blk_bs(blk); if (sn_opts) { ret = bdrv_snapshot_load_tmp(bs, diff --git a/qga/commands-posix.c b/qga/commands-posix.c index f6f3e3cd8e..d5bb5cb5da 100644 --- a/qga/commands-posix.c +++ b/qga/commands-posix.c @@ -376,13 +376,33 @@ safe_open_or_create(const char *path, const char *mode, Error **errp) return NULL; } +static int guest_file_toggle_flags(int fd, int flags, bool set, Error **err) +{ + int ret, old_flags; + + old_flags = fcntl(fd, F_GETFL); + if (old_flags == -1) { + error_set_errno(err, errno, QERR_QGA_COMMAND_FAILED, + "failed to fetch filehandle flags"); + return -1; + } + + ret = fcntl(fd, F_SETFL, set ? (old_flags | flags) : (old_flags & ~flags)); + if (ret == -1) { + error_set_errno(err, errno, QERR_QGA_COMMAND_FAILED, + "failed to set filehandle flags"); + return -1; + } + + return ret; +} + int64_t qmp_guest_file_open(const char *path, bool has_mode, const char *mode, Error **errp) { FILE *fh; Error *local_err = NULL; - int fd; - int64_t ret = -1, handle; + int64_t handle; if (!has_mode) { mode = "r"; @@ -397,12 +417,7 @@ int64_t qmp_guest_file_open(const char *path, bool has_mode, const char *mode, /* set fd non-blocking to avoid common use cases (like reading from a * named pipe) from hanging the agent */ - fd = fileno(fh); - ret = fcntl(fd, F_GETFL); - ret = fcntl(fd, F_SETFL, ret | O_NONBLOCK); - if (ret == -1) { - error_setg_errno(errp, errno, "failed to make file '%s' non-blocking", - path); + if (guest_file_toggle_flags(fileno(fh), O_NONBLOCK, true, errp) < 0) { fclose(fh); return -1; } @@ -1875,6 +1890,413 @@ int64_t qmp_guest_set_vcpus(GuestLogicalProcessorList *vcpus, Error **errp) return processed; } +void qmp_guest_set_user_password(const char *username, + const char *password, + bool crypted, + Error **errp) +{ + Error *local_err = NULL; + char *passwd_path = NULL; + pid_t pid; + int status; + int datafd[2] = { -1, -1 }; + char *rawpasswddata = NULL; + size_t rawpasswdlen; + char *chpasswddata = NULL; + size_t chpasswdlen; + + rawpasswddata = (char *)g_base64_decode(password, &rawpasswdlen); + rawpasswddata = g_renew(char, rawpasswddata, rawpasswdlen + 1); + rawpasswddata[rawpasswdlen] = '\0'; + + if (strchr(rawpasswddata, '\n')) { + error_setg(errp, "forbidden characters in raw password"); + goto out; + } + + if (strchr(username, '\n') || + strchr(username, ':')) { + error_setg(errp, "forbidden characters in username"); + goto out; + } + + chpasswddata = g_strdup_printf("%s:%s\n", username, rawpasswddata); + chpasswdlen = strlen(chpasswddata); + + passwd_path = g_find_program_in_path("chpasswd"); + + if (!passwd_path) { + error_setg(errp, "cannot find 'passwd' program in PATH"); + goto out; + } + + if (pipe(datafd) < 0) { + error_setg(errp, "cannot create pipe FDs"); + goto out; + } + + pid = fork(); + if (pid == 0) { + close(datafd[1]); + /* child */ + setsid(); + dup2(datafd[0], 0); + reopen_fd_to_null(1); + reopen_fd_to_null(2); + + if (crypted) { + execle(passwd_path, "chpasswd", "-e", NULL, environ); + } else { + execle(passwd_path, "chpasswd", NULL, environ); + } + _exit(EXIT_FAILURE); + } else if (pid < 0) { + error_setg_errno(errp, errno, "failed to create child process"); + goto out; + } + close(datafd[0]); + datafd[0] = -1; + + if (qemu_write_full(datafd[1], chpasswddata, chpasswdlen) != chpasswdlen) { + error_setg_errno(errp, errno, "cannot write new account password"); + goto out; + } + close(datafd[1]); + datafd[1] = -1; + + ga_wait_child(pid, &status, &local_err); + if (local_err) { + error_propagate(errp, local_err); + goto out; + } + + if (!WIFEXITED(status)) { + error_setg(errp, "child process has terminated abnormally"); + goto out; + } + + if (WEXITSTATUS(status)) { + error_setg(errp, "child process has failed to set user password"); + goto out; + } + +out: + g_free(chpasswddata); + g_free(rawpasswddata); + g_free(passwd_path); + if (datafd[0] != -1) { + close(datafd[0]); + } + if (datafd[1] != -1) { + close(datafd[1]); + } +} + +static void ga_read_sysfs_file(int dirfd, const char *pathname, char *buf, + int size, Error **errp) +{ + int fd; + int res; + + errno = 0; + fd = openat(dirfd, pathname, O_RDONLY); + if (fd == -1) { + error_setg_errno(errp, errno, "open sysfs file \"%s\"", pathname); + return; + } + + res = pread(fd, buf, size, 0); + if (res == -1) { + error_setg_errno(errp, errno, "pread sysfs file \"%s\"", pathname); + } else if (res == 0) { + error_setg(errp, "pread sysfs file \"%s\": unexpected EOF", pathname); + } + close(fd); +} + +static void ga_write_sysfs_file(int dirfd, const char *pathname, + const char *buf, int size, Error **errp) +{ + int fd; + + errno = 0; + fd = openat(dirfd, pathname, O_WRONLY); + if (fd == -1) { + error_setg_errno(errp, errno, "open sysfs file \"%s\"", pathname); + return; + } + + if (pwrite(fd, buf, size, 0) == -1) { + error_setg_errno(errp, errno, "pwrite sysfs file \"%s\"", pathname); + } + + close(fd); +} + +/* Transfer online/offline status between @mem_blk and the guest system. + * + * On input either @errp or *@errp must be NULL. + * + * In system-to-@mem_blk direction, the following @mem_blk fields are accessed: + * - R: mem_blk->phys_index + * - W: mem_blk->online + * - W: mem_blk->can_offline + * + * In @mem_blk-to-system direction, the following @mem_blk fields are accessed: + * - R: mem_blk->phys_index + * - R: mem_blk->online + *- R: mem_blk->can_offline + * Written members remain unmodified on error. + */ +static void transfer_memory_block(GuestMemoryBlock *mem_blk, bool sys2memblk, + GuestMemoryBlockResponse *result, + Error **errp) +{ + char *dirpath; + int dirfd; + char *status; + Error *local_err = NULL; + + if (!sys2memblk) { + DIR *dp; + + if (!result) { + error_setg(errp, "Internal error, 'result' should not be NULL"); + return; + } + errno = 0; + dp = opendir("/sys/devices/system/memory/"); + /* if there is no 'memory' directory in sysfs, + * we think this VM does not support online/offline memory block, + * any other solution? + */ + if (!dp && errno == ENOENT) { + result->response = + GUEST_MEMORY_BLOCK_RESPONSE_TYPE_OPERATION_NOT_SUPPORTED; + goto out1; + } + closedir(dp); + } + + dirpath = g_strdup_printf("/sys/devices/system/memory/memory%" PRId64 "/", + mem_blk->phys_index); + dirfd = open(dirpath, O_RDONLY | O_DIRECTORY); + if (dirfd == -1) { + if (sys2memblk) { + error_setg_errno(errp, errno, "open(\"%s\")", dirpath); + } else { + if (errno == ENOENT) { + result->response = GUEST_MEMORY_BLOCK_RESPONSE_TYPE_NOT_FOUND; + } else { + result->response = + GUEST_MEMORY_BLOCK_RESPONSE_TYPE_OPERATION_FAILED; + } + } + g_free(dirpath); + goto out1; + } + g_free(dirpath); + + status = g_malloc0(10); + ga_read_sysfs_file(dirfd, "state", status, 10, &local_err); + if (local_err) { + /* treat with sysfs file that not exist in old kernel */ + if (errno == ENOENT) { + error_free(local_err); + if (sys2memblk) { + mem_blk->online = true; + mem_blk->can_offline = false; + } else if (!mem_blk->online) { + result->response = + GUEST_MEMORY_BLOCK_RESPONSE_TYPE_OPERATION_NOT_SUPPORTED; + } + } else { + if (sys2memblk) { + error_propagate(errp, local_err); + } else { + result->response = + GUEST_MEMORY_BLOCK_RESPONSE_TYPE_OPERATION_FAILED; + } + } + goto out2; + } + + if (sys2memblk) { + char removable = '0'; + + mem_blk->online = (strncmp(status, "online", 6) == 0); + + ga_read_sysfs_file(dirfd, "removable", &removable, 1, &local_err); + if (local_err) { + /* if no 'removable' file, it does't support offline mem blk */ + if (errno == ENOENT) { + error_free(local_err); + mem_blk->can_offline = false; + } else { + error_propagate(errp, local_err); + } + } else { + mem_blk->can_offline = (removable != '0'); + } + } else { + if (mem_blk->online != (strncmp(status, "online", 6) == 0)) { + char *new_state = mem_blk->online ? g_strdup("online") : + g_strdup("offline"); + + ga_write_sysfs_file(dirfd, "state", new_state, strlen(new_state), + &local_err); + g_free(new_state); + if (local_err) { + error_free(local_err); + result->response = + GUEST_MEMORY_BLOCK_RESPONSE_TYPE_OPERATION_FAILED; + goto out2; + } + + result->response = GUEST_MEMORY_BLOCK_RESPONSE_TYPE_SUCCESS; + result->has_error_code = false; + } /* otherwise pretend successful re-(on|off)-lining */ + } + g_free(status); + close(dirfd); + return; + +out2: + g_free(status); + close(dirfd); +out1: + if (!sys2memblk) { + result->has_error_code = true; + result->error_code = errno; + } +} + +GuestMemoryBlockList *qmp_guest_get_memory_blocks(Error **errp) +{ + GuestMemoryBlockList *head, **link; + Error *local_err = NULL; + struct dirent *de; + DIR *dp; + + head = NULL; + link = &head; + + dp = opendir("/sys/devices/system/memory/"); + if (!dp) { + error_setg_errno(errp, errno, "Can't open directory" + "\"/sys/devices/system/memory/\"\n"); + return NULL; + } + + /* Note: the phys_index of memory block may be discontinuous, + * this is because a memblk is the unit of the Sparse Memory design, which + * allows discontinuous memory ranges (ex. NUMA), so here we should + * traverse the memory block directory. + */ + while ((de = readdir(dp)) != NULL) { + GuestMemoryBlock *mem_blk; + GuestMemoryBlockList *entry; + + if ((strncmp(de->d_name, "memory", 6) != 0) || + !(de->d_type & DT_DIR)) { + continue; + } + + mem_blk = g_malloc0(sizeof *mem_blk); + /* The d_name is "memoryXXX", phys_index is block id, same as XXX */ + mem_blk->phys_index = strtoul(&de->d_name[6], NULL, 10); + mem_blk->has_can_offline = true; /* lolspeak ftw */ + transfer_memory_block(mem_blk, true, NULL, &local_err); + + entry = g_malloc0(sizeof *entry); + entry->value = mem_blk; + + *link = entry; + link = &entry->next; + } + + closedir(dp); + if (local_err == NULL) { + /* there's no guest with zero memory blocks */ + if (head == NULL) { + error_setg(errp, "guest reported zero memory blocks!"); + } + return head; + } + + qapi_free_GuestMemoryBlockList(head); + error_propagate(errp, local_err); + return NULL; +} + +GuestMemoryBlockResponseList * +qmp_guest_set_memory_blocks(GuestMemoryBlockList *mem_blks, Error **errp) +{ + GuestMemoryBlockResponseList *head, **link; + Error *local_err = NULL; + + head = NULL; + link = &head; + + while (mem_blks != NULL) { + GuestMemoryBlockResponse *result; + GuestMemoryBlockResponseList *entry; + GuestMemoryBlock *current_mem_blk = mem_blks->value; + + result = g_malloc0(sizeof(*result)); + result->phys_index = current_mem_blk->phys_index; + transfer_memory_block(current_mem_blk, false, result, &local_err); + if (local_err) { /* should never happen */ + goto err; + } + entry = g_malloc0(sizeof *entry); + entry->value = result; + + *link = entry; + link = &entry->next; + mem_blks = mem_blks->next; + } + + return head; +err: + qapi_free_GuestMemoryBlockResponseList(head); + error_propagate(errp, local_err); + return NULL; +} + +GuestMemoryBlockInfo *qmp_guest_get_memory_block_info(Error **errp) +{ + Error *local_err = NULL; + char *dirpath; + int dirfd; + char *buf; + GuestMemoryBlockInfo *info; + + dirpath = g_strdup_printf("/sys/devices/system/memory/"); + dirfd = open(dirpath, O_RDONLY | O_DIRECTORY); + if (dirfd == -1) { + error_setg_errno(errp, errno, "open(\"%s\")", dirpath); + g_free(dirpath); + return NULL; + } + g_free(dirpath); + + buf = g_malloc0(20); + ga_read_sysfs_file(dirfd, "block_size_bytes", buf, 20, &local_err); + if (local_err) { + g_free(buf); + error_propagate(errp, local_err); + return NULL; + } + + info = g_new0(GuestMemoryBlockInfo, 1); + info->size = strtol(buf, NULL, 16); /* the unit is bytes */ + + g_free(buf); + + return info; +} + #else /* defined(__linux__) */ void qmp_guest_suspend_disk(Error **errp) @@ -1910,6 +2332,33 @@ int64_t qmp_guest_set_vcpus(GuestLogicalProcessorList *vcpus, Error **errp) return -1; } +void qmp_guest_set_user_password(const char *username, + const char *password, + bool crypted, + Error **errp) +{ + error_set(errp, QERR_UNSUPPORTED); +} + +GuestMemoryBlockList *qmp_guest_get_memory_blocks(Error **errp) +{ + error_set(errp, QERR_UNSUPPORTED); + return NULL; +} + +GuestMemoryBlockResponseList * +qmp_guest_set_memory_blocks(GuestMemoryBlockList *mem_blks, Error **errp) +{ + error_set(errp, QERR_UNSUPPORTED); + return NULL; +} + +GuestMemoryBlockInfo *qmp_guest_get_memory_block_info(Error **errp) +{ + error_set(errp, QERR_UNSUPPORTED); + return NULL; +} + #endif #if !defined(CONFIG_FSFREEZE) @@ -1966,7 +2415,9 @@ GList *ga_command_blacklist_init(GList *blacklist) const char *list[] = { "guest-suspend-disk", "guest-suspend-ram", "guest-suspend-hybrid", "guest-network-get-interfaces", - "guest-get-vcpus", "guest-set-vcpus", NULL}; + "guest-get-vcpus", "guest-set-vcpus", + "guest-get-memory-blocks", "guest-set-memory-blocks", + "guest-get-memory-block-size", NULL}; char **p = (char **)list; while (*p) { diff --git a/qga/commands-win32.c b/qga/commands-win32.c index 3bcbeae8ff..3ef0549c0f 100644 --- a/qga/commands-win32.c +++ b/qga/commands-win32.c @@ -14,10 +14,13 @@ #include <glib.h> #include <wtypes.h> #include <powrprof.h> +#include <stdio.h> +#include <string.h> #include "qga/guest-agent-core.h" #include "qga/vss-win32.h" #include "qga-qmp-commands.h" #include "qapi/qmp/qerror.h" +#include "qemu/queue.h" #ifndef SHTDN_REASON_FLAG_PLANNED #define SHTDN_REASON_FLAG_PLANNED 0x80000000 @@ -29,6 +32,146 @@ (365 * (1970 - 1601) + \ (1970 - 1601) / 4 - 3)) +#define INVALID_SET_FILE_POINTER ((DWORD)-1) + +typedef struct GuestFileHandle { + int64_t id; + HANDLE fh; + QTAILQ_ENTRY(GuestFileHandle) next; +} GuestFileHandle; + +static struct { + QTAILQ_HEAD(, GuestFileHandle) filehandles; +} guest_file_state; + + +typedef struct OpenFlags { + const char *forms; + DWORD desired_access; + DWORD creation_disposition; +} OpenFlags; +static OpenFlags guest_file_open_modes[] = { + {"r", GENERIC_READ, OPEN_EXISTING}, + {"rb", GENERIC_READ, OPEN_EXISTING}, + {"w", GENERIC_WRITE, CREATE_ALWAYS}, + {"wb", GENERIC_WRITE, CREATE_ALWAYS}, + {"a", GENERIC_WRITE, OPEN_ALWAYS }, + {"r+", GENERIC_WRITE|GENERIC_READ, OPEN_EXISTING}, + {"rb+", GENERIC_WRITE|GENERIC_READ, OPEN_EXISTING}, + {"r+b", GENERIC_WRITE|GENERIC_READ, OPEN_EXISTING}, + {"w+", GENERIC_WRITE|GENERIC_READ, CREATE_ALWAYS}, + {"wb+", GENERIC_WRITE|GENERIC_READ, CREATE_ALWAYS}, + {"w+b", GENERIC_WRITE|GENERIC_READ, CREATE_ALWAYS}, + {"a+", GENERIC_WRITE|GENERIC_READ, OPEN_ALWAYS }, + {"ab+", GENERIC_WRITE|GENERIC_READ, OPEN_ALWAYS }, + {"a+b", GENERIC_WRITE|GENERIC_READ, OPEN_ALWAYS } +}; + +static OpenFlags *find_open_flag(const char *mode_str) +{ + int mode; + Error **errp = NULL; + + for (mode = 0; mode < ARRAY_SIZE(guest_file_open_modes); ++mode) { + OpenFlags *flags = guest_file_open_modes + mode; + + if (strcmp(flags->forms, mode_str) == 0) { + return flags; + } + } + + error_setg(errp, "invalid file open mode '%s'", mode_str); + return NULL; +} + +static int64_t guest_file_handle_add(HANDLE fh, Error **errp) +{ + GuestFileHandle *gfh; + int64_t handle; + + handle = ga_get_fd_handle(ga_state, errp); + if (handle < 0) { + return -1; + } + gfh = g_malloc0(sizeof(GuestFileHandle)); + gfh->id = handle; + gfh->fh = fh; + QTAILQ_INSERT_TAIL(&guest_file_state.filehandles, gfh, next); + + return handle; +} + +static GuestFileHandle *guest_file_handle_find(int64_t id, Error **errp) +{ + GuestFileHandle *gfh; + QTAILQ_FOREACH(gfh, &guest_file_state.filehandles, next) { + if (gfh->id == id) { + return gfh; + } + } + error_setg(errp, "handle '%" PRId64 "' has not been found", id); + return NULL; +} + +int64_t qmp_guest_file_open(const char *path, bool has_mode, + const char *mode, Error **errp) +{ + int64_t fd; + HANDLE fh; + HANDLE templ_file = NULL; + DWORD share_mode = FILE_SHARE_READ; + DWORD flags_and_attr = FILE_ATTRIBUTE_NORMAL; + LPSECURITY_ATTRIBUTES sa_attr = NULL; + OpenFlags *guest_flags; + + if (!has_mode) { + mode = "r"; + } + slog("guest-file-open called, filepath: %s, mode: %s", path, mode); + guest_flags = find_open_flag(mode); + if (guest_flags == NULL) { + error_setg(errp, "invalid file open mode"); + return -1; + } + + fh = CreateFile(path, guest_flags->desired_access, share_mode, sa_attr, + guest_flags->creation_disposition, flags_and_attr, + templ_file); + if (fh == INVALID_HANDLE_VALUE) { + error_setg_win32(errp, GetLastError(), "failed to open file '%s'", + path); + return -1; + } + + fd = guest_file_handle_add(fh, errp); + if (fd < 0) { + CloseHandle(&fh); + error_setg(errp, "failed to add handle to qmp handle table"); + return -1; + } + + slog("guest-file-open, handle: % " PRId64, fd); + return fd; +} + +void qmp_guest_file_close(int64_t handle, Error **errp) +{ + bool ret; + GuestFileHandle *gfh = guest_file_handle_find(handle, errp); + slog("guest-file-close called, handle: %" PRId64, handle); + if (gfh == NULL) { + return; + } + ret = CloseHandle(gfh->fh); + if (!ret) { + error_setg_win32(errp, GetLastError(), "failed close handle"); + return; + } + + QTAILQ_REMOVE(&guest_file_state.filehandles, gfh, next); + g_free(gfh); +} + static void acquire_privilege(const char *name, Error **errp) { HANDLE token = NULL; @@ -113,43 +256,130 @@ void qmp_guest_shutdown(bool has_mode, const char *mode, Error **errp) } } -int64_t qmp_guest_file_open(const char *path, bool has_mode, const char *mode, - Error **errp) -{ - error_set(errp, QERR_UNSUPPORTED); - return 0; -} - -void qmp_guest_file_close(int64_t handle, Error **errp) -{ - error_set(errp, QERR_UNSUPPORTED); -} - GuestFileRead *qmp_guest_file_read(int64_t handle, bool has_count, int64_t count, Error **errp) { - error_set(errp, QERR_UNSUPPORTED); - return 0; + GuestFileRead *read_data = NULL; + guchar *buf; + HANDLE fh; + bool is_ok; + DWORD read_count; + GuestFileHandle *gfh = guest_file_handle_find(handle, errp); + + if (!gfh) { + return NULL; + } + if (!has_count) { + count = QGA_READ_COUNT_DEFAULT; + } else if (count < 0) { + error_setg(errp, "value '%" PRId64 + "' is invalid for argument count", count); + return NULL; + } + + fh = gfh->fh; + buf = g_malloc0(count+1); + is_ok = ReadFile(fh, buf, count, &read_count, NULL); + if (!is_ok) { + error_setg_win32(errp, GetLastError(), "failed to read file"); + slog("guest-file-read failed, handle %" PRId64, handle); + } else { + buf[read_count] = 0; + read_data = g_malloc0(sizeof(GuestFileRead)); + read_data->count = (size_t)read_count; + read_data->eof = read_count == 0; + + if (read_count != 0) { + read_data->buf_b64 = g_base64_encode(buf, read_count); + } + } + g_free(buf); + + return read_data; } GuestFileWrite *qmp_guest_file_write(int64_t handle, const char *buf_b64, bool has_count, int64_t count, Error **errp) { - error_set(errp, QERR_UNSUPPORTED); - return 0; + GuestFileWrite *write_data = NULL; + guchar *buf; + gsize buf_len; + bool is_ok; + DWORD write_count; + GuestFileHandle *gfh = guest_file_handle_find(handle, errp); + HANDLE fh; + + if (!gfh) { + return NULL; + } + fh = gfh->fh; + buf = g_base64_decode(buf_b64, &buf_len); + + if (!has_count) { + count = buf_len; + } else if (count < 0 || count > buf_len) { + error_setg(errp, "value '%" PRId64 + "' is invalid for argument count", count); + goto done; + } + + is_ok = WriteFile(fh, buf, count, &write_count, NULL); + if (!is_ok) { + error_setg_win32(errp, GetLastError(), "failed to write to file"); + slog("guest-file-write-failed, handle: %" PRId64, handle); + } else { + write_data = g_malloc0(sizeof(GuestFileWrite)); + write_data->count = (size_t) write_count; + } + +done: + g_free(buf); + return write_data; } GuestFileSeek *qmp_guest_file_seek(int64_t handle, int64_t offset, int64_t whence, Error **errp) { - error_set(errp, QERR_UNSUPPORTED); - return 0; + GuestFileHandle *gfh; + GuestFileSeek *seek_data; + HANDLE fh; + LARGE_INTEGER new_pos, off_pos; + off_pos.QuadPart = offset; + BOOL res; + gfh = guest_file_handle_find(handle, errp); + if (!gfh) { + return NULL; + } + + fh = gfh->fh; + res = SetFilePointerEx(fh, off_pos, &new_pos, whence); + if (!res) { + error_setg_win32(errp, GetLastError(), "failed to seek file"); + return NULL; + } + seek_data = g_new0(GuestFileSeek, 1); + seek_data->position = new_pos.QuadPart; + return seek_data; } void qmp_guest_file_flush(int64_t handle, Error **errp) { - error_set(errp, QERR_UNSUPPORTED); + HANDLE fh; + GuestFileHandle *gfh = guest_file_handle_find(handle, errp); + if (!gfh) { + return; + } + + fh = gfh->fh; + if (!FlushFileBuffers(fh)) { + error_setg_win32(errp, GetLastError(), "failed to flush file"); + } +} + +static void guest_file_init(void) +{ + QTAILQ_INIT(&guest_file_state.filehandles); } GuestFilesystemInfoList *qmp_guest_get_fsinfo(Error **errp) @@ -395,31 +625,31 @@ void qmp_guest_set_time(bool has_time, int64_t time_ns, Error **errp) FILETIME tf; LONGLONG time; - if (has_time) { - /* Okay, user passed a time to set. Validate it. */ - if (time_ns < 0 || time_ns / 100 > INT64_MAX - W32_FT_OFFSET) { - error_setg(errp, "Time %" PRId64 "is invalid", time_ns); - return; - } + if (!has_time) { + /* Unfortunately, Windows libraries don't provide an easy way to access + * RTC yet: + * + * https://msdn.microsoft.com/en-us/library/aa908981.aspx + */ + error_setg(errp, "Time argument is required on this platform"); + return; + } - time = time_ns / 100 + W32_FT_OFFSET; + /* Validate time passed by user. */ + if (time_ns < 0 || time_ns / 100 > INT64_MAX - W32_FT_OFFSET) { + error_setg(errp, "Time %" PRId64 "is invalid", time_ns); + return; + } - tf.dwLowDateTime = (DWORD) time; - tf.dwHighDateTime = (DWORD) (time >> 32); + time = time_ns / 100 + W32_FT_OFFSET; - if (!FileTimeToSystemTime(&tf, &ts)) { - error_setg(errp, "Failed to convert system time %d", - (int)GetLastError()); - return; - } - } else { - /* Otherwise read the time from RTC which contains the correct value. - * Hopefully. */ - GetSystemTime(&ts); - if (ts.wYear < 1601 || ts.wYear > 30827) { - error_setg(errp, "Failed to get time"); - return; - } + tf.dwLowDateTime = (DWORD) time; + tf.dwHighDateTime = (DWORD) (time >> 32); + + if (!FileTimeToSystemTime(&tf, &ts)) { + error_setg(errp, "Failed to convert system time %d", + (int)GetLastError()); + return; } acquire_privilege(SE_SYSTEMTIME_NAME, &local_err); @@ -446,14 +676,42 @@ int64_t qmp_guest_set_vcpus(GuestLogicalProcessorList *vcpus, Error **errp) return -1; } +void qmp_guest_set_user_password(const char *username, + const char *password, + bool crypted, + Error **errp) +{ + error_set(errp, QERR_UNSUPPORTED); +} + +GuestMemoryBlockList *qmp_guest_get_memory_blocks(Error **errp) +{ + error_set(errp, QERR_UNSUPPORTED); + return NULL; +} + +GuestMemoryBlockResponseList * +qmp_guest_set_memory_blocks(GuestMemoryBlockList *mem_blks, Error **errp) +{ + error_set(errp, QERR_UNSUPPORTED); + return NULL; +} + +GuestMemoryBlockInfo *qmp_guest_get_memory_block_info(Error **errp) +{ + error_set(errp, QERR_UNSUPPORTED); + return NULL; +} + /* add unsupported commands to the blacklist */ GList *ga_command_blacklist_init(GList *blacklist) { const char *list_unsupported[] = { - "guest-file-open", "guest-file-close", "guest-file-read", - "guest-file-write", "guest-file-seek", "guest-file-flush", "guest-suspend-hybrid", "guest-network-get-interfaces", "guest-get-vcpus", "guest-set-vcpus", + "guest-set-user-password", + "guest-get-memory-blocks", "guest-set-memory-blocks", + "guest-get-memory-block-size", "guest-fsfreeze-freeze-list", "guest-get-fsinfo", "guest-fstrim", NULL}; char **p = (char **)list_unsupported; @@ -482,4 +740,5 @@ void ga_command_state_init(GAState *s, GACommandState *cs) if (!vss_initialized()) { ga_command_state_add(cs, NULL, guest_fsfreeze_cleanup); } + ga_command_state_add(cs, guest_file_init, NULL); } diff --git a/qga/qapi-schema.json b/qga/qapi-schema.json index 376e79f583..95f49e369c 100644 --- a/qga/qapi-schema.json +++ b/qga/qapi-schema.json @@ -121,7 +121,10 @@ # given value, then sets the Hardware Clock (RTC) to the # current System Time. This will make it easier for a guest # to resynchronize without waiting for NTP. If no @time is -# specified, then the time to set is read from RTC. +# specified, then the time to set is read from RTC. However, +# this may not be supported on all platforms (i.e. Windows). +# If that's the case users are advised to always pass a +# value. # # @time: #optional time of nanoseconds, relative to the Epoch # of 1970-01-01 in UTC. @@ -738,3 +741,153 @@ ## { 'command': 'guest-get-fsinfo', 'returns': ['GuestFilesystemInfo'] } + +## +# @guest-set-user-password +# +# @username: the user account whose password to change +# @password: the new password entry string, base64 encoded +# @crypted: true if password is already crypt()d, false if raw +# +# If the @crypted flag is true, it is the caller's responsibility +# to ensure the correct crypt() encryption scheme is used. This +# command does not attempt to interpret or report on the encryption +# scheme. Refer to the documentation of the guest operating system +# in question to determine what is supported. +# +# Note all guest operating systems will support use of the +# @crypted flag, as they may require the clear-text password +# +# The @password parameter must always be base64 encoded before +# transmission, even if already crypt()d, to ensure it is 8-bit +# safe when passed as JSON. +# +# Returns: Nothing on success. +# +# Since 2.3 +## +{ 'command': 'guest-set-user-password', + 'data': { 'username': 'str', 'password': 'str', 'crypted': 'bool' } } + +# @GuestMemoryBlock: +# +# @phys-index: Arbitrary guest-specific unique identifier of the MEMORY BLOCK. +# +# @online: Whether the MEMORY BLOCK is enabled in guest. +# +# @can-offline: #optional Whether offlining the MEMORY BLOCK is possible. +# This member is always filled in by the guest agent when the +# structure is returned, and always ignored on input (hence it +# can be omitted then). +# +# Since: 2.3 +## +{ 'type': 'GuestMemoryBlock', + 'data': {'phys-index': 'uint64', + 'online': 'bool', + '*can-offline': 'bool'} } + +## +# @guest-get-memory-blocks: +# +# Retrieve the list of the guest's memory blocks. +# +# This is a read-only operation. +# +# Returns: The list of all memory blocks the guest knows about. +# Each memory block is put on the list exactly once, but their order +# is unspecified. +# +# Since: 2.3 +## +{ 'command': 'guest-get-memory-blocks', + 'returns': ['GuestMemoryBlock'] } + +## +# @GuestMemoryBlockResponseType +# +# An enumeration of memory block operation result. +# +# @sucess: the operation of online/offline memory block is successful. +# @not-found: can't find the corresponding memoryXXX directory in sysfs. +# @operation-not-supported: for some old kernels, it does not support +# online or offline memory block. +# @operation-failed: the operation of online/offline memory block fails, +# because of some errors happen. +# +# Since: 2.3 +## +{ 'enum': 'GuestMemoryBlockResponseType', + 'data': ['success', 'not-found', 'operation-not-supported', + 'operation-failed'] } + +## +# @GuestMemoryBlockResponse: +# +# @phys-index: same with the 'phys-index' member of @GuestMemoryBlock. +# +# @response: the result of memory block operation. +# +# @error-code: #optional the error number. +# When memory block operation fails, we assign the value of +# 'errno' to this member, it indicates what goes wrong. +# When the operation succeeds, it will be omitted. +# +# Since: 2.3 +## +{ 'type': 'GuestMemoryBlockResponse', + 'data': { 'phys-index': 'uint64', + 'response': 'GuestMemoryBlockResponseType', + '*error-code': 'int' }} + +## +# @guest-set-memory-blocks: +# +# Attempt to reconfigure (currently: enable/disable) state of memory blocks +# inside the guest. +# +# The input list is processed node by node in order. In each node @phys-index +# is used to look up the guest MEMORY BLOCK, for which @online specifies the +# requested state. The set of distinct @phys-index's is only required to be a +# subset of the guest-supported identifiers. There's no restriction on list +# length or on repeating the same @phys-index (with possibly different @online +# field). +# Preferably the input list should describe a modified subset of +# @guest-get-memory-blocks' return value. +# +# Returns: The operation results, it is a list of @GuestMemoryBlockResponse, +# which is corresponding to the input list. +# +# Note: it will return NULL if the @mem-blks list was empty on input, +# or there is an error, and in this case, guest state will not be +# changed. +# +# Since: 2.3 +## +{ 'command': 'guest-set-memory-blocks', + 'data': {'mem-blks': ['GuestMemoryBlock'] }, + 'returns': ['GuestMemoryBlockResponse'] } + +# @GuestMemoryBlockInfo: +# +# @size: the size (in bytes) of the guest memory blocks, +# which are the minimal units of memory block online/offline +# operations (also called Logical Memory Hotplug). +# +# Since: 2.3 +## +{ 'type': 'GuestMemoryBlockInfo', + 'data': {'size': 'uint64'} } + +## +# @guest-get-memory-block-info: +# +# Get information relating to guest memory blocks. +# +# Returns: memory block size in bytes. +# Returns: @GuestMemoryBlockInfo +# +# Since 2.3 +## +{ 'command': 'guest-get-memory-block-info', + 'returns': 'GuestMemoryBlockInfo' } @@ -387,6 +387,11 @@ static void qmp_change_vnc_listen(const char *target, Error **errp) qemu_opts_del(opts); } opts = vnc_parse_func(target); + if (!opts) { + return; + } + + vnc_auto_assign_id(olist, opts); vnc_display_open("default", errp); } @@ -821,7 +821,7 @@ void qemu_savevm_state_cancel(void) } } -static int qemu_savevm_state(QEMUFile *f) +static int qemu_savevm_state(QEMUFile *f, Error **errp) { int ret; MigrationParams params = { @@ -829,7 +829,7 @@ static int qemu_savevm_state(QEMUFile *f) .shared = 0 }; - if (qemu_savevm_state_blocked(NULL)) { + if (qemu_savevm_state_blocked(errp)) { return -EINVAL; } @@ -850,6 +850,7 @@ static int qemu_savevm_state(QEMUFile *f) } if (ret != 0) { qemu_savevm_state_cancel(); + error_setg_errno(errp, -ret, "Error while writing VM state"); } return ret; } @@ -1102,6 +1103,7 @@ void hmp_savevm(Monitor *mon, const QDict *qdict) qemu_timeval tv; struct tm tm; const char *name = qdict_get_try_str(qdict, "name"); + Error *local_err = NULL; /* Verify if there is a device that doesn't support snapshots and is writable */ bs = NULL; @@ -1160,11 +1162,12 @@ void hmp_savevm(Monitor *mon, const QDict *qdict) monitor_printf(mon, "Could not open VM state file\n"); goto the_end; } - ret = qemu_savevm_state(f); + ret = qemu_savevm_state(f, &local_err); vm_state_size = qemu_ftell(f); qemu_fclose(f); if (ret < 0) { - monitor_printf(mon, "Error %d while writing VM\n", ret); + monitor_printf(mon, "%s\n", error_get_pretty(local_err)); + error_free(local_err); goto the_end; } diff --git a/scripts/dump-guest-memory.py b/scripts/dump-guest-memory.py index 1ed8b67883..dc8e44acf8 100644 --- a/scripts/dump-guest-memory.py +++ b/scripts/dump-guest-memory.py @@ -108,16 +108,16 @@ shape and this command should mostly work.""" assert (val["hi"] == 0) return val["lo"] - def qtailq_foreach(self, head, field_str): - var_p = head["tqh_first"] + def qlist_foreach(self, head, field_str): + var_p = head["lh_first"] while (var_p != 0): var = var_p.dereference() yield var - var_p = var[field_str]["tqe_next"] + var_p = var[field_str]["le_next"] def qemu_get_ram_block(self, ram_addr): ram_blocks = gdb.parse_and_eval("ram_list.blocks") - for block in self.qtailq_foreach(ram_blocks, "next"): + for block in self.qlist_foreach(ram_blocks, "next"): if (ram_addr - block["offset"] < block["length"]): return block raise gdb.GdbError("Bad ram offset %x" % ram_addr) diff --git a/scripts/kvm/kvm_stat b/scripts/kvm/kvm_stat index c0c4ff0de3..c65cabda5a 100755 --- a/scripts/kvm/kvm_stat +++ b/scripts/kvm/kvm_stat @@ -145,6 +145,45 @@ svm_exit_reasons = { 0x400: 'NPF', } +# EC definition of HSR (from arch/arm64/include/asm/kvm_arm.h) +aarch64_exit_reasons = { + 0x00: 'UNKNOWN', + 0x01: 'WFI', + 0x03: 'CP15_32', + 0x04: 'CP15_64', + 0x05: 'CP14_MR', + 0x06: 'CP14_LS', + 0x07: 'FP_ASIMD', + 0x08: 'CP10_ID', + 0x0C: 'CP14_64', + 0x0E: 'ILL_ISS', + 0x11: 'SVC32', + 0x12: 'HVC32', + 0x13: 'SMC32', + 0x15: 'SVC64', + 0x16: 'HVC64', + 0x17: 'SMC64', + 0x18: 'SYS64', + 0x20: 'IABT', + 0x21: 'IABT_HYP', + 0x22: 'PC_ALIGN', + 0x24: 'DABT', + 0x25: 'DABT_HYP', + 0x26: 'SP_ALIGN', + 0x28: 'FP_EXC32', + 0x2C: 'FP_EXC64', + 0x2F: 'SERROR', + 0x30: 'BREAKPT', + 0x31: 'BREAKPT_HYP', + 0x32: 'SOFTSTP', + 0x33: 'SOFTSTP_HYP', + 0x34: 'WATCHPT', + 0x35: 'WATCHPT_HYP', + 0x38: 'BKPT32', + 0x3A: 'VECTOR32', + 0x3C: 'BRK64', +} + # From include/uapi/linux/kvm.h, KVM_EXIT_xxx userspace_exit_reasons = { 0: 'UNKNOWN', @@ -212,7 +251,8 @@ def ppc_init(): def aarch64_init(): globals().update({ - 'sc_perf_evt_open' : 241 + 'sc_perf_evt_open' : 241, + 'exit_reasons' : aarch64_exit_reasons, }) def detect_platform(): diff --git a/scripts/qtest.py b/scripts/qtest.py new file mode 100644 index 0000000000..a9714453a2 --- /dev/null +++ b/scripts/qtest.py @@ -0,0 +1,71 @@ +# QEMU qtest library +# +# Copyright (C) 2015 Red Hat Inc. +# +# Authors: +# Fam Zheng <famz@redhat.com> +# +# This work is licensed under the terms of the GNU GPL, version 2. See +# the COPYING file in the top-level directory. +# +# Based on qmp.py. +# + +import errno +import socket + +class QEMUQtestProtocol(object): + def __init__(self, address, server=False): + """ + Create a QEMUQtestProtocol object. + + @param address: QEMU address, can be either a unix socket path (string) + or a tuple in the form ( address, port ) for a TCP + connection + @param server: server mode, listens on the socket (bool) + @raise socket.error on socket connection errors + @note No connection is established, this is done by the connect() or + accept() methods + """ + self._address = address + self._sock = self._get_sock() + if server: + self._sock.bind(self._address) + self._sock.listen(1) + + def _get_sock(self): + if isinstance(self._address, tuple): + family = socket.AF_INET + else: + family = socket.AF_UNIX + return socket.socket(family, socket.SOCK_STREAM) + + def connect(self): + """ + Connect to the qtest socket. + + @raise socket.error on socket connection errors + """ + self._sock.connect(self._address) + + def accept(self): + """ + Await connection from QEMU. + + @raise socket.error on socket connection errors + """ + self._sock, _ = self._sock.accept() + + def cmd(self, qtest_cmd): + """ + Send a qtest command on the wire. + + @param qtest_cmd: qtest command text to be sent + """ + self._sock.sendall(qtest_cmd + "\n") + + def close(self): + self._sock.close() + + def settimeout(self, timeout): + self._sock.settimeout(timeout) diff --git a/softmmu_template.h b/softmmu_template.h index 6b4e615dbf..0e3dd35fe1 100644 --- a/softmmu_template.h +++ b/softmmu_template.h @@ -149,7 +149,7 @@ static inline DATA_TYPE glue(io_read, SUFFIX)(CPUArchState *env, { uint64_t val; CPUState *cpu = ENV_GET_CPU(env); - MemoryRegion *mr = iotlb_to_region(cpu->as, physaddr); + MemoryRegion *mr = iotlb_to_region(cpu, physaddr); physaddr = (physaddr & TARGET_PAGE_MASK) + addr; cpu->mem_io_pc = retaddr; @@ -369,7 +369,7 @@ static inline void glue(io_write, SUFFIX)(CPUArchState *env, uintptr_t retaddr) { CPUState *cpu = ENV_GET_CPU(env); - MemoryRegion *mr = iotlb_to_region(cpu->as, physaddr); + MemoryRegion *mr = iotlb_to_region(cpu, physaddr); physaddr = (physaddr & TARGET_PAGE_MASK) + addr; if (mr != &io_mem_rom && mr != &io_mem_notdirty && !cpu_can_do_io(cpu)) { diff --git a/target-mips/machine.c b/target-mips/machine.c index 0ba7d736db..6c76dfbe03 100644 --- a/target-mips/machine.c +++ b/target-mips/machine.c @@ -285,6 +285,10 @@ int cpu_load(QEMUFile *f, void *opaque, int version_id) qemu_get_sbe32s(f, &env->CP0_SRSConf4); qemu_get_sbe32s(f, &env->CP0_HWREna); qemu_get_betls(f, &env->CP0_BadVAddr); + if (version_id >= 5) { + qemu_get_be32s(f, &env->CP0_BadInstr); + qemu_get_be32s(f, &env->CP0_BadInstrP); + } qemu_get_sbe32s(f, &env->CP0_Count); qemu_get_betls(f, &env->CP0_EntryHi); qemu_get_sbe32s(f, &env->CP0_Compare); @@ -319,8 +323,6 @@ int cpu_load(QEMUFile *f, void *opaque, int version_id) qemu_get_betls(f, &env->CP0_ErrorEPC); qemu_get_sbe32s(f, &env->CP0_DESAVE); if (version_id >= 5) { - qemu_get_be32s(f, &env->CP0_BadInstr); - qemu_get_be32s(f, &env->CP0_BadInstrP); for (i = 0; i < MIPS_KSCRATCH_NUM; i++) { qemu_get_betls(f, &env->CP0_KScratch[i]); } diff --git a/target-mips/op_helper.c b/target-mips/op_helper.c index ea7d95f36c..73a8e458fc 100644 --- a/target-mips/op_helper.c +++ b/target-mips/op_helper.c @@ -304,16 +304,20 @@ static inline hwaddr do_translate_address(CPUMIPSState *env, } } -#define HELPER_LD_ATOMIC(name, insn) \ +#define HELPER_LD_ATOMIC(name, insn, almask) \ target_ulong helper_##name(CPUMIPSState *env, target_ulong arg, int mem_idx) \ { \ + if (arg & almask) { \ + env->CP0_BadVAddr = arg; \ + helper_raise_exception(env, EXCP_AdEL); \ + } \ env->lladdr = do_translate_address(env, arg, 0); \ env->llval = do_##insn(env, arg, mem_idx); \ return env->llval; \ } -HELPER_LD_ATOMIC(ll, lw) +HELPER_LD_ATOMIC(ll, lw, 0x3) #ifdef TARGET_MIPS64 -HELPER_LD_ATOMIC(lld, ld) +HELPER_LD_ATOMIC(lld, ld, 0x7) #endif #undef HELPER_LD_ATOMIC diff --git a/target-mips/translate.c b/target-mips/translate.c index 881e7fb6d6..ca51149872 100644 --- a/target-mips/translate.c +++ b/target-mips/translate.c @@ -4947,7 +4947,7 @@ static void gen_mfc0(DisasContext *ctx, TCGv arg, int reg, int sel) #if defined(TARGET_MIPS64) if (ctx->rxi) { TCGv tmp = tcg_temp_new(); - tcg_gen_andi_tl(tmp, arg, (3ull << 62)); + tcg_gen_andi_tl(tmp, arg, (3ull << CP0EnLo_XI)); tcg_gen_shri_tl(tmp, tmp, 32); tcg_gen_or_tl(arg, arg, tmp); tcg_temp_free(tmp); @@ -5002,7 +5002,7 @@ static void gen_mfc0(DisasContext *ctx, TCGv arg, int reg, int sel) #if defined(TARGET_MIPS64) if (ctx->rxi) { TCGv tmp = tcg_temp_new(); - tcg_gen_andi_tl(tmp, arg, (3ull << 62)); + tcg_gen_andi_tl(tmp, arg, (3ull << CP0EnLo_XI)); tcg_gen_shri_tl(tmp, tmp, 32); tcg_gen_or_tl(arg, arg, tmp); tcg_temp_free(tmp); @@ -13653,7 +13653,7 @@ static void decode_micromips32_opc (CPUMIPSState *env, DisasContext *ctx, target. */ break; case LUI: - gen_logic_imm(ctx, OPC_LUI, rs, -1, imm); + gen_logic_imm(ctx, OPC_LUI, rs, 0, imm); break; case SYNCI: /* Break the TB to be able to sync copied instructions @@ -19095,6 +19095,7 @@ gen_intermediate_code_internal(MIPSCPU *cpu, TranslationBlock *tb, CPUMIPSState *env = &cpu->env; DisasContext ctx; target_ulong pc_start; + target_ulong next_page_start; CPUBreakpoint *bp; int j, lj = -1; int num_insns; @@ -19106,6 +19107,7 @@ gen_intermediate_code_internal(MIPSCPU *cpu, TranslationBlock *tb, qemu_log("search pc %d\n", search_pc); pc_start = tb->pc; + next_page_start = (pc_start & TARGET_PAGE_MASK) + TARGET_PAGE_SIZE; ctx.pc = pc_start; ctx.saved_pc = -1; ctx.singlestep_enabled = cs->singlestep_enabled; @@ -19204,8 +19206,9 @@ gen_intermediate_code_internal(MIPSCPU *cpu, TranslationBlock *tb, break; } - if ((ctx.pc & (TARGET_PAGE_SIZE - 1)) == 0) + if (ctx.pc >= next_page_start) { break; + } if (tcg_op_buf_full()) { break; diff --git a/target-mips/translate_init.c b/target-mips/translate_init.c index 1543f6c388..9e8433a919 100644 --- a/target-mips/translate_init.c +++ b/target-mips/translate_init.c @@ -474,7 +474,7 @@ static const mips_def_t mips_defs[] = .CP0_LLAddr_shift = 4, .SYNCI_Step = 32, .CCRes = 2, - .CP0_Status_rw_bitmask = 0x32F8FFFF, + .CP0_Status_rw_bitmask = 0x12F8FFFF, .SEGBITS = 42, .PABITS = 36, .insn_flags = CPU_MIPS64, @@ -575,7 +575,7 @@ static const mips_def_t mips_defs[] = .CP0_LLAddr_shift = 4, .SYNCI_Step = 32, .CCRes = 2, - .CP0_Status_rw_bitmask = 0x32F8FFFF, + .CP0_Status_rw_bitmask = 0x12F8FFFF, .SEGBITS = 42, .PABITS = 36, .insn_flags = CPU_MIPS64R2, diff --git a/tests/Makefile b/tests/Makefile index d5df16882d..307035c26c 100644 --- a/tests/Makefile +++ b/tests/Makefile @@ -62,6 +62,8 @@ check-unit-y += tests/test-int128$(EXESUF) gcov-files-test-int128-y = check-unit-y += tests/rcutorture$(EXESUF) gcov-files-rcutorture-y = util/rcu.c +check-unit-y += tests/test-rcu-list$(EXESUF) +gcov-files-test-rcu-list-y = util/rcu.c check-unit-y += tests/test-bitops$(EXESUF) check-unit-$(CONFIG_HAS_GLIB_SUBPROCESS_TESTS) += tests/test-qdev-global-props$(EXESUF) check-unit-y += tests/check-qom-interface$(EXESUF) @@ -228,7 +230,7 @@ test-obj-y = tests/check-qint.o tests/check-qstring.o tests/check-qdict.o \ tests/test-qmp-commands.o tests/test-visitor-serialization.o \ tests/test-x86-cpuid.o tests/test-mul64.o tests/test-int128.o \ tests/test-opts-visitor.o tests/test-qmp-event.o \ - tests/rcutorture.o + tests/rcutorture.o tests/test-rcu-list.o test-qapi-obj-y = tests/test-qapi-visit.o tests/test-qapi-types.o \ tests/test-qapi-event.o @@ -257,7 +259,8 @@ tests/test-x86-cpuid$(EXESUF): tests/test-x86-cpuid.o tests/test-xbzrle$(EXESUF): tests/test-xbzrle.o migration/xbzrle.o page_cache.o libqemuutil.a tests/test-cutils$(EXESUF): tests/test-cutils.o util/cutils.o tests/test-int128$(EXESUF): tests/test-int128.o -tests/rcutorture$(EXESUF): tests/rcutorture.o libqemuutil.a +tests/rcutorture$(EXESUF): tests/rcutorture.o libqemuutil.a libqemustub.a +tests/test-rcu-list$(EXESUF): tests/test-rcu-list.o libqemuutil.a libqemustub.a tests/test-qdev-global-props$(EXESUF): tests/test-qdev-global-props.o \ hw/core/qdev.o hw/core/qdev-properties.o hw/core/hotplug.o\ @@ -307,9 +310,10 @@ tests/test-mul64$(EXESUF): tests/test-mul64.o libqemuutil.a tests/test-bitops$(EXESUF): tests/test-bitops.o libqemuutil.a libqos-obj-y = tests/libqos/pci.o tests/libqos/fw_cfg.o tests/libqos/malloc.o -libqos-obj-y += tests/libqos/i2c.o +libqos-obj-y += tests/libqos/i2c.o tests/libqos/libqos.o libqos-pc-obj-y = $(libqos-obj-y) tests/libqos/pci-pc.o -libqos-pc-obj-y += tests/libqos/malloc-pc.o +libqos-pc-obj-y += tests/libqos/malloc-pc.o tests/libqos/libqos-pc.o +libqos-pc-obj-y += tests/libqos/ahci.o libqos-omap-obj-y = $(libqos-obj-y) tests/libqos/i2c-omap.o libqos-virtio-obj-y = $(libqos-obj-y) $(libqos-pc-obj-y) tests/libqos/virtio.o tests/libqos/virtio-pci.o libqos-usb-obj-y = $(libqos-pc-obj-y) tests/libqos/usb.o diff --git a/tests/ahci-test.c b/tests/ahci-test.c index b1a59f21a7..53fd068c8a 100644 --- a/tests/ahci-test.c +++ b/tests/ahci-test.c @@ -29,8 +29,9 @@ #include <glib.h> #include "libqtest.h" +#include "libqos/libqos-pc.h" +#include "libqos/ahci.h" #include "libqos/pci-pc.h" -#include "libqos/malloc-pc.h" #include "qemu-common.h" #include "qemu/host-utils.h" @@ -41,361 +42,18 @@ /* Test-specific defines. */ #define TEST_IMAGE_SIZE (64 * 1024 * 1024) -/*** Supplementary PCI Config Space IDs & Masks ***/ -#define PCI_DEVICE_ID_INTEL_Q35_AHCI (0x2922) -#define PCI_MSI_FLAGS_RESERVED (0xFF00) -#define PCI_PM_CTRL_RESERVED (0xFC) -#define PCI_BCC(REG32) ((REG32) >> 24) -#define PCI_PI(REG32) (((REG32) >> 8) & 0xFF) -#define PCI_SCC(REG32) (((REG32) >> 16) & 0xFF) - -/*** Recognized AHCI Device Types ***/ -#define AHCI_INTEL_ICH9 (PCI_DEVICE_ID_INTEL_Q35_AHCI << 16 | \ - PCI_VENDOR_ID_INTEL) - -/*** AHCI/HBA Register Offsets and Bitmasks ***/ -#define AHCI_CAP (0) -#define AHCI_CAP_NP (0x1F) -#define AHCI_CAP_SXS (0x20) -#define AHCI_CAP_EMS (0x40) -#define AHCI_CAP_CCCS (0x80) -#define AHCI_CAP_NCS (0x1F00) -#define AHCI_CAP_PSC (0x2000) -#define AHCI_CAP_SSC (0x4000) -#define AHCI_CAP_PMD (0x8000) -#define AHCI_CAP_FBSS (0x10000) -#define AHCI_CAP_SPM (0x20000) -#define AHCI_CAP_SAM (0x40000) -#define AHCI_CAP_RESERVED (0x80000) -#define AHCI_CAP_ISS (0xF00000) -#define AHCI_CAP_SCLO (0x1000000) -#define AHCI_CAP_SAL (0x2000000) -#define AHCI_CAP_SALP (0x4000000) -#define AHCI_CAP_SSS (0x8000000) -#define AHCI_CAP_SMPS (0x10000000) -#define AHCI_CAP_SSNTF (0x20000000) -#define AHCI_CAP_SNCQ (0x40000000) -#define AHCI_CAP_S64A (0x80000000) - -#define AHCI_GHC (1) -#define AHCI_GHC_HR (0x01) -#define AHCI_GHC_IE (0x02) -#define AHCI_GHC_MRSM (0x04) -#define AHCI_GHC_RESERVED (0x7FFFFFF8) -#define AHCI_GHC_AE (0x80000000) - -#define AHCI_IS (2) -#define AHCI_PI (3) -#define AHCI_VS (4) - -#define AHCI_CCCCTL (5) -#define AHCI_CCCCTL_EN (0x01) -#define AHCI_CCCCTL_RESERVED (0x06) -#define AHCI_CCCCTL_CC (0xFF00) -#define AHCI_CCCCTL_TV (0xFFFF0000) - -#define AHCI_CCCPORTS (6) -#define AHCI_EMLOC (7) - -#define AHCI_EMCTL (8) -#define AHCI_EMCTL_STSMR (0x01) -#define AHCI_EMCTL_CTLTM (0x100) -#define AHCI_EMCTL_CTLRST (0x200) -#define AHCI_EMCTL_RESERVED (0xF0F0FCFE) - -#define AHCI_CAP2 (9) -#define AHCI_CAP2_BOH (0x01) -#define AHCI_CAP2_NVMP (0x02) -#define AHCI_CAP2_APST (0x04) -#define AHCI_CAP2_RESERVED (0xFFFFFFF8) - -#define AHCI_BOHC (10) -#define AHCI_RESERVED (11) -#define AHCI_NVMHCI (24) -#define AHCI_VENDOR (40) -#define AHCI_PORTS (64) - -/*** Port Memory Offsets & Bitmasks ***/ -#define AHCI_PX_CLB (0) -#define AHCI_PX_CLB_RESERVED (0x1FF) - -#define AHCI_PX_CLBU (1) - -#define AHCI_PX_FB (2) -#define AHCI_PX_FB_RESERVED (0xFF) - -#define AHCI_PX_FBU (3) - -#define AHCI_PX_IS (4) -#define AHCI_PX_IS_DHRS (0x1) -#define AHCI_PX_IS_PSS (0x2) -#define AHCI_PX_IS_DSS (0x4) -#define AHCI_PX_IS_SDBS (0x8) -#define AHCI_PX_IS_UFS (0x10) -#define AHCI_PX_IS_DPS (0x20) -#define AHCI_PX_IS_PCS (0x40) -#define AHCI_PX_IS_DMPS (0x80) -#define AHCI_PX_IS_RESERVED (0x23FFF00) -#define AHCI_PX_IS_PRCS (0x400000) -#define AHCI_PX_IS_IPMS (0x800000) -#define AHCI_PX_IS_OFS (0x1000000) -#define AHCI_PX_IS_INFS (0x4000000) -#define AHCI_PX_IS_IFS (0x8000000) -#define AHCI_PX_IS_HBDS (0x10000000) -#define AHCI_PX_IS_HBFS (0x20000000) -#define AHCI_PX_IS_TFES (0x40000000) -#define AHCI_PX_IS_CPDS (0x80000000) - -#define AHCI_PX_IE (5) -#define AHCI_PX_IE_DHRE (0x1) -#define AHCI_PX_IE_PSE (0x2) -#define AHCI_PX_IE_DSE (0x4) -#define AHCI_PX_IE_SDBE (0x8) -#define AHCI_PX_IE_UFE (0x10) -#define AHCI_PX_IE_DPE (0x20) -#define AHCI_PX_IE_PCE (0x40) -#define AHCI_PX_IE_DMPE (0x80) -#define AHCI_PX_IE_RESERVED (0x23FFF00) -#define AHCI_PX_IE_PRCE (0x400000) -#define AHCI_PX_IE_IPME (0x800000) -#define AHCI_PX_IE_OFE (0x1000000) -#define AHCI_PX_IE_INFE (0x4000000) -#define AHCI_PX_IE_IFE (0x8000000) -#define AHCI_PX_IE_HBDE (0x10000000) -#define AHCI_PX_IE_HBFE (0x20000000) -#define AHCI_PX_IE_TFEE (0x40000000) -#define AHCI_PX_IE_CPDE (0x80000000) - -#define AHCI_PX_CMD (6) -#define AHCI_PX_CMD_ST (0x1) -#define AHCI_PX_CMD_SUD (0x2) -#define AHCI_PX_CMD_POD (0x4) -#define AHCI_PX_CMD_CLO (0x8) -#define AHCI_PX_CMD_FRE (0x10) -#define AHCI_PX_CMD_RESERVED (0xE0) -#define AHCI_PX_CMD_CCS (0x1F00) -#define AHCI_PX_CMD_MPSS (0x2000) -#define AHCI_PX_CMD_FR (0x4000) -#define AHCI_PX_CMD_CR (0x8000) -#define AHCI_PX_CMD_CPS (0x10000) -#define AHCI_PX_CMD_PMA (0x20000) -#define AHCI_PX_CMD_HPCP (0x40000) -#define AHCI_PX_CMD_MPSP (0x80000) -#define AHCI_PX_CMD_CPD (0x100000) -#define AHCI_PX_CMD_ESP (0x200000) -#define AHCI_PX_CMD_FBSCP (0x400000) -#define AHCI_PX_CMD_APSTE (0x800000) -#define AHCI_PX_CMD_ATAPI (0x1000000) -#define AHCI_PX_CMD_DLAE (0x2000000) -#define AHCI_PX_CMD_ALPE (0x4000000) -#define AHCI_PX_CMD_ASP (0x8000000) -#define AHCI_PX_CMD_ICC (0xF0000000) - -#define AHCI_PX_RES1 (7) - -#define AHCI_PX_TFD (8) -#define AHCI_PX_TFD_STS (0xFF) -#define AHCI_PX_TFD_STS_ERR (0x01) -#define AHCI_PX_TFD_STS_CS1 (0x06) -#define AHCI_PX_TFD_STS_DRQ (0x08) -#define AHCI_PX_TFD_STS_CS2 (0x70) -#define AHCI_PX_TFD_STS_BSY (0x80) -#define AHCI_PX_TFD_ERR (0xFF00) -#define AHCI_PX_TFD_RESERVED (0xFFFF0000) - -#define AHCI_PX_SIG (9) -#define AHCI_PX_SIG_SECTOR_COUNT (0xFF) -#define AHCI_PX_SIG_LBA_LOW (0xFF00) -#define AHCI_PX_SIG_LBA_MID (0xFF0000) -#define AHCI_PX_SIG_LBA_HIGH (0xFF000000) - -#define AHCI_PX_SSTS (10) -#define AHCI_PX_SSTS_DET (0x0F) -#define AHCI_PX_SSTS_SPD (0xF0) -#define AHCI_PX_SSTS_IPM (0xF00) -#define AHCI_PX_SSTS_RESERVED (0xFFFFF000) -#define SSTS_DET_NO_DEVICE (0x00) -#define SSTS_DET_PRESENT (0x01) -#define SSTS_DET_ESTABLISHED (0x03) -#define SSTS_DET_OFFLINE (0x04) - -#define AHCI_PX_SCTL (11) - -#define AHCI_PX_SERR (12) -#define AHCI_PX_SERR_ERR (0xFFFF) -#define AHCI_PX_SERR_DIAG (0xFFFF0000) -#define AHCI_PX_SERR_DIAG_X (0x04000000) - -#define AHCI_PX_SACT (13) -#define AHCI_PX_CI (14) -#define AHCI_PX_SNTF (15) - -#define AHCI_PX_FBS (16) -#define AHCI_PX_FBS_EN (0x1) -#define AHCI_PX_FBS_DEC (0x2) -#define AHCI_PX_FBS_SDE (0x4) -#define AHCI_PX_FBS_DEV (0xF00) -#define AHCI_PX_FBS_ADO (0xF000) -#define AHCI_PX_FBS_DWE (0xF0000) -#define AHCI_PX_FBS_RESERVED (0xFFF000F8) - -#define AHCI_PX_RES2 (17) -#define AHCI_PX_VS (28) - -#define HBA_DATA_REGION_SIZE (256) -#define HBA_PORT_DATA_SIZE (128) -#define HBA_PORT_NUM_REG (HBA_PORT_DATA_SIZE/4) - -#define AHCI_VERSION_0_95 (0x00000905) -#define AHCI_VERSION_1_0 (0x00010000) -#define AHCI_VERSION_1_1 (0x00010100) -#define AHCI_VERSION_1_2 (0x00010200) -#define AHCI_VERSION_1_3 (0x00010300) - -/*** Structures ***/ - -/** - * Generic FIS structure. - */ -typedef struct FIS { - uint8_t fis_type; - uint8_t flags; - char data[0]; -} __attribute__((__packed__)) FIS; - -/** - * Register device-to-host FIS structure. - */ -typedef struct RegD2HFIS { - /* DW0 */ - uint8_t fis_type; - uint8_t flags; - uint8_t status; - uint8_t error; - /* DW1 */ - uint8_t lba_low; - uint8_t lba_mid; - uint8_t lba_high; - uint8_t device; - /* DW2 */ - uint8_t lba3; - uint8_t lba4; - uint8_t lba5; - uint8_t res1; - /* DW3 */ - uint16_t count; - uint8_t res2; - uint8_t res3; - /* DW4 */ - uint16_t res4; - uint16_t res5; -} __attribute__((__packed__)) RegD2HFIS; - -/** - * Register host-to-device FIS structure. - */ -typedef struct RegH2DFIS { - /* DW0 */ - uint8_t fis_type; - uint8_t flags; - uint8_t command; - uint8_t feature_low; - /* DW1 */ - uint8_t lba_low; - uint8_t lba_mid; - uint8_t lba_high; - uint8_t device; - /* DW2 */ - uint8_t lba3; - uint8_t lba4; - uint8_t lba5; - uint8_t feature_high; - /* DW3 */ - uint16_t count; - uint8_t icc; - uint8_t control; - /* DW4 */ - uint32_t aux; -} __attribute__((__packed__)) RegH2DFIS; - -/** - * Command List entry structure. - * The command list contains between 1-32 of these structures. - */ -typedef struct AHCICommand { - uint8_t b1; - uint8_t b2; - uint16_t prdtl; /* Phys Region Desc. Table Length */ - uint32_t prdbc; /* Phys Region Desc. Byte Count */ - uint32_t ctba; /* Command Table Descriptor Base Address */ - uint32_t ctbau; /* '' Upper */ - uint32_t res[4]; -} __attribute__((__packed__)) AHCICommand; - -/** - * Physical Region Descriptor; pointed to by the Command List Header, - * struct ahci_command. - */ -typedef struct PRD { - uint32_t dba; /* Data Base Address */ - uint32_t dbau; /* Data Base Address Upper */ - uint32_t res; /* Reserved */ - uint32_t dbc; /* Data Byte Count (0-indexed) & Interrupt Flag (bit 2^31) */ -} PRD; - -typedef struct HBACap { - uint32_t cap; - uint32_t cap2; -} HBACap; - /*** Globals ***/ -static QGuestAllocator *guest_malloc; -static QPCIBus *pcibus; -static uint64_t barsize; static char tmp_path[] = "/tmp/qtest.XXXXXX"; static bool ahci_pedantic; -static uint32_t ahci_fingerprint; - -/*** Macro Utilities ***/ -#define BITANY(data, mask) (((data) & (mask)) != 0) -#define BITSET(data, mask) (((data) & (mask)) == (mask)) -#define BITCLR(data, mask) (((data) & (mask)) == 0) -#define ASSERT_BIT_SET(data, mask) g_assert_cmphex((data) & (mask), ==, (mask)) -#define ASSERT_BIT_CLEAR(data, mask) g_assert_cmphex((data) & (mask), ==, 0) - -/*** IO macros for the AHCI memory registers. ***/ -#define AHCI_READ(OFST) qpci_io_readl(ahci, hba_base + (OFST)) -#define AHCI_WRITE(OFST, VAL) qpci_io_writel(ahci, hba_base + (OFST), (VAL)) -#define AHCI_RREG(regno) AHCI_READ(4 * (regno)) -#define AHCI_WREG(regno, val) AHCI_WRITE(4 * (regno), (val)) -#define AHCI_SET(regno, mask) AHCI_WREG((regno), AHCI_RREG(regno) | (mask)) -#define AHCI_CLR(regno, mask) AHCI_WREG((regno), AHCI_RREG(regno) & ~(mask)) - -/*** IO macros for port-specific offsets inside of AHCI memory. ***/ -#define PX_OFST(port, regno) (HBA_PORT_NUM_REG * (port) + AHCI_PORTS + (regno)) -#define PX_RREG(port, regno) AHCI_RREG(PX_OFST((port), (regno))) -#define PX_WREG(port, regno, val) AHCI_WREG(PX_OFST((port), (regno)), (val)) -#define PX_SET(port, reg, mask) PX_WREG((port), (reg), \ - PX_RREG((port), (reg)) | (mask)); -#define PX_CLR(port, reg, mask) PX_WREG((port), (reg), \ - PX_RREG((port), (reg)) & ~(mask)); - -/* For calculating how big the PRD table needs to be: */ -#define CMD_TBL_SIZ(n) ((0x80 + ((n) * sizeof(PRD)) + 0x7F) & ~0x7F) - /*** Function Declarations ***/ -static QPCIDevice *get_ahci_device(void); -static QPCIDevice *start_ahci_device(QPCIDevice *dev, void **hba_base); -static void free_ahci_device(QPCIDevice *dev); -static void ahci_test_port_spec(QPCIDevice *ahci, void *hba_base, - HBACap *hcap, uint8_t port); -static void ahci_test_pci_spec(QPCIDevice *ahci); -static void ahci_test_pci_caps(QPCIDevice *ahci, uint16_t header, +static void ahci_test_port_spec(AHCIQState *ahci, uint8_t port); +static void ahci_test_pci_spec(AHCIQState *ahci); +static void ahci_test_pci_caps(AHCIQState *ahci, uint16_t header, uint8_t offset); -static void ahci_test_satacap(QPCIDevice *ahci, uint8_t offset); -static void ahci_test_msicap(QPCIDevice *ahci, uint8_t offset); -static void ahci_test_pmcap(QPCIDevice *ahci, uint8_t offset); +static void ahci_test_satacap(AHCIQState *ahci, uint8_t offset); +static void ahci_test_msicap(AHCIQState *ahci, uint8_t offset); +static void ahci_test_pmcap(AHCIQState *ahci, uint8_t offset); /*** Utilities ***/ @@ -410,274 +68,43 @@ static void string_bswap16(uint16_t *s, size_t bytes) } } -/** - * Locate, verify, and return a handle to the AHCI device. - */ -static QPCIDevice *get_ahci_device(void) -{ - QPCIDevice *ahci; - - pcibus = qpci_init_pc(); - - /* Find the AHCI PCI device and verify it's the right one. */ - ahci = qpci_device_find(pcibus, QPCI_DEVFN(0x1F, 0x02)); - g_assert(ahci != NULL); - - ahci_fingerprint = qpci_config_readl(ahci, PCI_VENDOR_ID); - - switch (ahci_fingerprint) { - case AHCI_INTEL_ICH9: - break; - default: - /* Unknown device. */ - g_assert_not_reached(); - } - - return ahci; -} - -static void free_ahci_device(QPCIDevice *ahci) -{ - /* libqos doesn't have a function for this, so free it manually */ - g_free(ahci); - - if (pcibus) { - qpci_free_pc(pcibus); - pcibus = NULL; - } - - /* Clear our cached barsize information. */ - barsize = 0; -} - /*** Test Setup & Teardown ***/ /** - * Launch QEMU with the given command line, - * and then set up interrupts and our guest malloc interface. - */ -static void qtest_boot(const char *cmdline_fmt, ...) -{ - va_list ap; - char *cmdline; - - va_start(ap, cmdline_fmt); - cmdline = g_strdup_vprintf(cmdline_fmt, ap); - va_end(ap); - - qtest_start(cmdline); - qtest_irq_intercept_in(global_qtest, "ioapic"); - guest_malloc = pc_alloc_init(); - - g_free(cmdline); -} - -/** - * Tear down the QEMU instance. - */ -static void qtest_shutdown(void) -{ - g_free(guest_malloc); - guest_malloc = NULL; - qtest_end(); -} - -/** * Start a Q35 machine and bookmark a handle to the AHCI device. */ -static QPCIDevice *ahci_boot(void) -{ - qtest_boot("-drive if=none,id=drive0,file=%s,cache=writeback,serial=%s," - "format=raw" - " -M q35 " - "-device ide-hd,drive=drive0 " - "-global ide-hd.ver=%s", - tmp_path, "testdisk", "version"); - - /* Verify that we have an AHCI device present. */ - return get_ahci_device(); -} - -/** - * Clean up the PCI device, then terminate the QEMU instance. - */ -static void ahci_shutdown(QPCIDevice *ahci) -{ - free_ahci_device(ahci); - qtest_shutdown(); -} - -/*** Logical Device Initialization ***/ - -/** - * Start the PCI device and sanity-check default operation. - */ -static void ahci_pci_enable(QPCIDevice *ahci, void **hba_base) +static AHCIQState *ahci_boot(void) { - uint8_t reg; + AHCIQState *s; + const char *cli; - start_ahci_device(ahci, hba_base); + s = g_malloc0(sizeof(AHCIQState)); - switch (ahci_fingerprint) { - case AHCI_INTEL_ICH9: - /* ICH9 has a register at PCI 0x92 that - * acts as a master port enabler mask. */ - reg = qpci_config_readb(ahci, 0x92); - reg |= 0x3F; - qpci_config_writeb(ahci, 0x92, reg); - /* 0...0111111b -- bit significant, ports 0-5 enabled. */ - ASSERT_BIT_SET(qpci_config_readb(ahci, 0x92), 0x3F); - break; - } - -} + cli = "-drive if=none,id=drive0,file=%s,cache=writeback,serial=%s" + ",format=raw" + " -M q35 " + "-device ide-hd,drive=drive0 " + "-global ide-hd.ver=%s"; + s->parent = qtest_pc_boot(cli, tmp_path, "testdisk", "version"); + alloc_set_flags(s->parent->alloc, ALLOC_LEAK_ASSERT); -/** - * Map BAR5/ABAR, and engage the PCI device. - */ -static QPCIDevice *start_ahci_device(QPCIDevice *ahci, void **hba_base) -{ - /* Map AHCI's ABAR (BAR5) */ - *hba_base = qpci_iomap(ahci, 5, &barsize); - - /* turns on pci.cmd.iose, pci.cmd.mse and pci.cmd.bme */ - qpci_device_enable(ahci); + /* Verify that we have an AHCI device present. */ + s->dev = get_ahci_device(&s->fingerprint); - return ahci; + return s; } /** - * Test and initialize the AHCI's HBA memory areas. - * Initialize and start any ports with devices attached. - * Bring the HBA into the idle state. + * Clean up the PCI device, then terminate the QEMU instance. */ -static void ahci_hba_enable(QPCIDevice *ahci, void *hba_base) +static void ahci_shutdown(AHCIQState *ahci) { - /* Bits of interest in this section: - * GHC.AE Global Host Control / AHCI Enable - * PxCMD.ST Port Command: Start - * PxCMD.SUD "Spin Up Device" - * PxCMD.POD "Power On Device" - * PxCMD.FRE "FIS Receive Enable" - * PxCMD.FR "FIS Receive Running" - * PxCMD.CR "Command List Running" - */ - - g_assert(ahci != NULL); - g_assert(hba_base != NULL); - - uint32_t reg, ports_impl, clb, fb; - uint16_t i; - uint8_t num_cmd_slots; - - g_assert(hba_base != 0); - - /* Set GHC.AE to 1 */ - AHCI_SET(AHCI_GHC, AHCI_GHC_AE); - reg = AHCI_RREG(AHCI_GHC); - ASSERT_BIT_SET(reg, AHCI_GHC_AE); - - /* Read CAP.NCS, how many command slots do we have? */ - reg = AHCI_RREG(AHCI_CAP); - num_cmd_slots = ((reg & AHCI_CAP_NCS) >> ctzl(AHCI_CAP_NCS)) + 1; - g_test_message("Number of Command Slots: %u", num_cmd_slots); - - /* Determine which ports are implemented. */ - ports_impl = AHCI_RREG(AHCI_PI); - - for (i = 0; ports_impl; ports_impl >>= 1, ++i) { - if (!(ports_impl & 0x01)) { - continue; - } - - g_test_message("Initializing port %u", i); - - reg = PX_RREG(i, AHCI_PX_CMD); - if (BITCLR(reg, AHCI_PX_CMD_ST | AHCI_PX_CMD_CR | - AHCI_PX_CMD_FRE | AHCI_PX_CMD_FR)) { - g_test_message("port is idle"); - } else { - g_test_message("port needs to be idled"); - PX_CLR(i, AHCI_PX_CMD, (AHCI_PX_CMD_ST | AHCI_PX_CMD_FRE)); - /* The port has 500ms to disengage. */ - usleep(500000); - reg = PX_RREG(i, AHCI_PX_CMD); - ASSERT_BIT_CLEAR(reg, AHCI_PX_CMD_CR); - ASSERT_BIT_CLEAR(reg, AHCI_PX_CMD_FR); - g_test_message("port is now idle"); - /* The spec does allow for possibly needing a PORT RESET - * or HBA reset if we fail to idle the port. */ - } - - /* Allocate Memory for the Command List Buffer & FIS Buffer */ - /* PxCLB space ... 0x20 per command, as in 4.2.2 p 36 */ - clb = guest_alloc(guest_malloc, num_cmd_slots * 0x20); - g_test_message("CLB: 0x%08x", clb); - PX_WREG(i, AHCI_PX_CLB, clb); - g_assert_cmphex(clb, ==, PX_RREG(i, AHCI_PX_CLB)); - - /* PxFB space ... 0x100, as in 4.2.1 p 35 */ - fb = guest_alloc(guest_malloc, 0x100); - g_test_message("FB: 0x%08x", fb); - PX_WREG(i, AHCI_PX_FB, fb); - g_assert_cmphex(fb, ==, PX_RREG(i, AHCI_PX_FB)); - - /* Clear PxSERR, PxIS, then IS.IPS[x] by writing '1's. */ - PX_WREG(i, AHCI_PX_SERR, 0xFFFFFFFF); - PX_WREG(i, AHCI_PX_IS, 0xFFFFFFFF); - AHCI_WREG(AHCI_IS, (1 << i)); - - /* Verify Interrupts Cleared */ - reg = PX_RREG(i, AHCI_PX_SERR); - g_assert_cmphex(reg, ==, 0); - - reg = PX_RREG(i, AHCI_PX_IS); - g_assert_cmphex(reg, ==, 0); - - reg = AHCI_RREG(AHCI_IS); - ASSERT_BIT_CLEAR(reg, (1 << i)); - - /* Enable All Interrupts: */ - PX_WREG(i, AHCI_PX_IE, 0xFFFFFFFF); - reg = PX_RREG(i, AHCI_PX_IE); - g_assert_cmphex(reg, ==, ~((uint32_t)AHCI_PX_IE_RESERVED)); - - /* Enable the FIS Receive Engine. */ - PX_SET(i, AHCI_PX_CMD, AHCI_PX_CMD_FRE); - reg = PX_RREG(i, AHCI_PX_CMD); - ASSERT_BIT_SET(reg, AHCI_PX_CMD_FR); - - /* AHCI 1.3 spec: if !STS.BSY, !STS.DRQ and PxSSTS.DET indicates - * physical presence, a device is present and may be started. However, - * PxSERR.DIAG.X /may/ need to be cleared a priori. */ - reg = PX_RREG(i, AHCI_PX_SERR); - if (BITSET(reg, AHCI_PX_SERR_DIAG_X)) { - PX_SET(i, AHCI_PX_SERR, AHCI_PX_SERR_DIAG_X); - } - - reg = PX_RREG(i, AHCI_PX_TFD); - if (BITCLR(reg, AHCI_PX_TFD_STS_BSY | AHCI_PX_TFD_STS_DRQ)) { - reg = PX_RREG(i, AHCI_PX_SSTS); - if ((reg & AHCI_PX_SSTS_DET) == SSTS_DET_ESTABLISHED) { - /* Device Found: set PxCMD.ST := 1 */ - PX_SET(i, AHCI_PX_CMD, AHCI_PX_CMD_ST); - ASSERT_BIT_SET(PX_RREG(i, AHCI_PX_CMD), AHCI_PX_CMD_CR); - g_test_message("Started Device %u", i); - } else if ((reg & AHCI_PX_SSTS_DET)) { - /* Device present, but in some unknown state. */ - g_assert_not_reached(); - } - } - } + QOSState *qs = ahci->parent; - /* Enable GHC.IE */ - AHCI_SET(AHCI_GHC, AHCI_GHC_IE); - reg = AHCI_RREG(AHCI_GHC); - ASSERT_BIT_SET(reg, AHCI_GHC_IE); - - /* TODO: The device should now be idling and waiting for commands. - * In the future, a small test-case to inspect the Register D2H FIS - * and clear the initial interrupts might be good. */ + ahci_clean_mem(ahci); + free_ahci_device(ahci->dev); + g_free(ahci); + qtest_shutdown(qs); } /*** Specification Adherence Tests ***/ @@ -685,14 +112,14 @@ static void ahci_hba_enable(QPCIDevice *ahci, void *hba_base) /** * Implementation for test_pci_spec. Ensures PCI configuration space is sane. */ -static void ahci_test_pci_spec(QPCIDevice *ahci) +static void ahci_test_pci_spec(AHCIQState *ahci) { uint8_t datab; uint16_t data; uint32_t datal; /* Most of these bits should start cleared until we turn them on. */ - data = qpci_config_readw(ahci, PCI_COMMAND); + data = qpci_config_readw(ahci->dev, PCI_COMMAND); ASSERT_BIT_CLEAR(data, PCI_COMMAND_MEMORY); ASSERT_BIT_CLEAR(data, PCI_COMMAND_MASTER); ASSERT_BIT_CLEAR(data, PCI_COMMAND_SPECIAL); /* Reserved */ @@ -704,7 +131,7 @@ static void ahci_test_pci_spec(QPCIDevice *ahci) ASSERT_BIT_CLEAR(data, PCI_COMMAND_INTX_DISABLE); ASSERT_BIT_CLEAR(data, 0xF800); /* Reserved */ - data = qpci_config_readw(ahci, PCI_STATUS); + data = qpci_config_readw(ahci->dev, PCI_STATUS); ASSERT_BIT_CLEAR(data, 0x01 | 0x02 | 0x04); /* Reserved */ ASSERT_BIT_CLEAR(data, PCI_STATUS_INTERRUPT); ASSERT_BIT_SET(data, PCI_STATUS_CAP_LIST); /* must be set */ @@ -717,7 +144,7 @@ static void ahci_test_pci_spec(QPCIDevice *ahci) ASSERT_BIT_CLEAR(data, PCI_STATUS_DETECTED_PARITY); /* RID occupies the low byte, CCs occupy the high three. */ - datal = qpci_config_readl(ahci, PCI_CLASS_REVISION); + datal = qpci_config_readl(ahci->dev, PCI_CLASS_REVISION); if (ahci_pedantic) { /* AHCI 1.3 specifies that at-boot, the RID should reset to 0x00, * Though in practice this is likely seldom true. */ @@ -740,40 +167,40 @@ static void ahci_test_pci_spec(QPCIDevice *ahci) g_assert_not_reached(); } - datab = qpci_config_readb(ahci, PCI_CACHE_LINE_SIZE); + datab = qpci_config_readb(ahci->dev, PCI_CACHE_LINE_SIZE); g_assert_cmphex(datab, ==, 0); - datab = qpci_config_readb(ahci, PCI_LATENCY_TIMER); + datab = qpci_config_readb(ahci->dev, PCI_LATENCY_TIMER); g_assert_cmphex(datab, ==, 0); /* Only the bottom 7 bits must be off. */ - datab = qpci_config_readb(ahci, PCI_HEADER_TYPE); + datab = qpci_config_readb(ahci->dev, PCI_HEADER_TYPE); ASSERT_BIT_CLEAR(datab, 0x7F); /* BIST is optional, but the low 7 bits must always start off regardless. */ - datab = qpci_config_readb(ahci, PCI_BIST); + datab = qpci_config_readb(ahci->dev, PCI_BIST); ASSERT_BIT_CLEAR(datab, 0x7F); /* BARS 0-4 do not have a boot spec, but ABAR/BAR5 must be clean. */ - datal = qpci_config_readl(ahci, PCI_BASE_ADDRESS_5); + datal = qpci_config_readl(ahci->dev, PCI_BASE_ADDRESS_5); g_assert_cmphex(datal, ==, 0); - qpci_config_writel(ahci, PCI_BASE_ADDRESS_5, 0xFFFFFFFF); - datal = qpci_config_readl(ahci, PCI_BASE_ADDRESS_5); + qpci_config_writel(ahci->dev, PCI_BASE_ADDRESS_5, 0xFFFFFFFF); + datal = qpci_config_readl(ahci->dev, PCI_BASE_ADDRESS_5); /* ABAR must be 32-bit, memory mapped, non-prefetchable and * must be >= 512 bytes. To that end, bits 0-8 must be off. */ ASSERT_BIT_CLEAR(datal, 0xFF); /* Capability list MUST be present, */ - datal = qpci_config_readl(ahci, PCI_CAPABILITY_LIST); + datal = qpci_config_readl(ahci->dev, PCI_CAPABILITY_LIST); /* But these bits are reserved. */ ASSERT_BIT_CLEAR(datal, ~0xFF); g_assert_cmphex(datal, !=, 0); /* Check specification adherence for capability extenstions. */ - data = qpci_config_readw(ahci, datal); + data = qpci_config_readw(ahci->dev, datal); - switch (ahci_fingerprint) { + switch (ahci->fingerprint) { case AHCI_INTEL_ICH9: /* Intel ICH9 Family Datasheet 14.1.19 p.550 */ g_assert_cmphex((data & 0xFF), ==, PCI_CAP_ID_MSI); @@ -786,18 +213,18 @@ static void ahci_test_pci_spec(QPCIDevice *ahci) ahci_test_pci_caps(ahci, data, (uint8_t)datal); /* Reserved. */ - datal = qpci_config_readl(ahci, PCI_CAPABILITY_LIST + 4); + datal = qpci_config_readl(ahci->dev, PCI_CAPABILITY_LIST + 4); g_assert_cmphex(datal, ==, 0); /* IPIN might vary, but ILINE must be off. */ - datab = qpci_config_readb(ahci, PCI_INTERRUPT_LINE); + datab = qpci_config_readb(ahci->dev, PCI_INTERRUPT_LINE); g_assert_cmphex(datab, ==, 0); } /** * Test PCI capabilities for AHCI specification adherence. */ -static void ahci_test_pci_caps(QPCIDevice *ahci, uint16_t header, +static void ahci_test_pci_caps(AHCIQState *ahci, uint16_t header, uint8_t offset) { uint8_t cid = header & 0xFF; @@ -821,14 +248,14 @@ static void ahci_test_pci_caps(QPCIDevice *ahci, uint16_t header, } if (next) { - ahci_test_pci_caps(ahci, qpci_config_readw(ahci, next), next); + ahci_test_pci_caps(ahci, qpci_config_readw(ahci->dev, next), next); } } /** * Test SATA PCI capabilitity for AHCI specification adherence. */ -static void ahci_test_satacap(QPCIDevice *ahci, uint8_t offset) +static void ahci_test_satacap(AHCIQState *ahci, uint8_t offset) { uint16_t dataw; uint32_t datal; @@ -836,11 +263,11 @@ static void ahci_test_satacap(QPCIDevice *ahci, uint8_t offset) g_test_message("Verifying SATACAP"); /* Assert that the SATACAP version is 1.0, And reserved bits are empty. */ - dataw = qpci_config_readw(ahci, offset + 2); + dataw = qpci_config_readw(ahci->dev, offset + 2); g_assert_cmphex(dataw, ==, 0x10); /* Grab the SATACR1 register. */ - datal = qpci_config_readw(ahci, offset + 4); + datal = qpci_config_readw(ahci->dev, offset + 4); switch (datal & 0x0F) { case 0x04: /* BAR0 */ @@ -863,30 +290,30 @@ static void ahci_test_satacap(QPCIDevice *ahci, uint8_t offset) /** * Test MSI PCI capability for AHCI specification adherence. */ -static void ahci_test_msicap(QPCIDevice *ahci, uint8_t offset) +static void ahci_test_msicap(AHCIQState *ahci, uint8_t offset) { uint16_t dataw; uint32_t datal; g_test_message("Verifying MSICAP"); - dataw = qpci_config_readw(ahci, offset + PCI_MSI_FLAGS); + dataw = qpci_config_readw(ahci->dev, offset + PCI_MSI_FLAGS); ASSERT_BIT_CLEAR(dataw, PCI_MSI_FLAGS_ENABLE); ASSERT_BIT_CLEAR(dataw, PCI_MSI_FLAGS_QSIZE); ASSERT_BIT_CLEAR(dataw, PCI_MSI_FLAGS_RESERVED); - datal = qpci_config_readl(ahci, offset + PCI_MSI_ADDRESS_LO); + datal = qpci_config_readl(ahci->dev, offset + PCI_MSI_ADDRESS_LO); g_assert_cmphex(datal, ==, 0); if (dataw & PCI_MSI_FLAGS_64BIT) { g_test_message("MSICAP is 64bit"); - datal = qpci_config_readl(ahci, offset + PCI_MSI_ADDRESS_HI); + datal = qpci_config_readl(ahci->dev, offset + PCI_MSI_ADDRESS_HI); g_assert_cmphex(datal, ==, 0); - dataw = qpci_config_readw(ahci, offset + PCI_MSI_DATA_64); + dataw = qpci_config_readw(ahci->dev, offset + PCI_MSI_DATA_64); g_assert_cmphex(dataw, ==, 0); } else { g_test_message("MSICAP is 32bit"); - dataw = qpci_config_readw(ahci, offset + PCI_MSI_DATA_32); + dataw = qpci_config_readw(ahci->dev, offset + PCI_MSI_DATA_32); g_assert_cmphex(dataw, ==, 0); } } @@ -894,36 +321,34 @@ static void ahci_test_msicap(QPCIDevice *ahci, uint8_t offset) /** * Test Power Management PCI capability for AHCI specification adherence. */ -static void ahci_test_pmcap(QPCIDevice *ahci, uint8_t offset) +static void ahci_test_pmcap(AHCIQState *ahci, uint8_t offset) { uint16_t dataw; g_test_message("Verifying PMCAP"); - dataw = qpci_config_readw(ahci, offset + PCI_PM_PMC); + dataw = qpci_config_readw(ahci->dev, offset + PCI_PM_PMC); ASSERT_BIT_CLEAR(dataw, PCI_PM_CAP_PME_CLOCK); ASSERT_BIT_CLEAR(dataw, PCI_PM_CAP_RESERVED); ASSERT_BIT_CLEAR(dataw, PCI_PM_CAP_D1); ASSERT_BIT_CLEAR(dataw, PCI_PM_CAP_D2); - dataw = qpci_config_readw(ahci, offset + PCI_PM_CTRL); + dataw = qpci_config_readw(ahci->dev, offset + PCI_PM_CTRL); ASSERT_BIT_CLEAR(dataw, PCI_PM_CTRL_STATE_MASK); ASSERT_BIT_CLEAR(dataw, PCI_PM_CTRL_RESERVED); ASSERT_BIT_CLEAR(dataw, PCI_PM_CTRL_DATA_SEL_MASK); ASSERT_BIT_CLEAR(dataw, PCI_PM_CTRL_DATA_SCALE_MASK); } -static void ahci_test_hba_spec(QPCIDevice *ahci, void *hba_base) +static void ahci_test_hba_spec(AHCIQState *ahci) { - HBACap hcap; unsigned i; - uint32_t cap, cap2, reg; + uint32_t reg; uint32_t ports; uint8_t nports_impl; uint8_t maxports; - g_assert(ahci != 0); - g_assert(hba_base != 0); + g_assert(ahci != NULL); /* * Note that the AHCI spec does expect the BIOS to set up a few things: @@ -946,15 +371,15 @@ static void ahci_test_hba_spec(QPCIDevice *ahci, void *hba_base) */ /* 1 CAP - Capabilities Register */ - cap = AHCI_RREG(AHCI_CAP); - ASSERT_BIT_CLEAR(cap, AHCI_CAP_RESERVED); + ahci->cap = ahci_rreg(ahci, AHCI_CAP); + ASSERT_BIT_CLEAR(ahci->cap, AHCI_CAP_RESERVED); /* 2 GHC - Global Host Control */ - reg = AHCI_RREG(AHCI_GHC); + reg = ahci_rreg(ahci, AHCI_GHC); ASSERT_BIT_CLEAR(reg, AHCI_GHC_HR); ASSERT_BIT_CLEAR(reg, AHCI_GHC_IE); ASSERT_BIT_CLEAR(reg, AHCI_GHC_MRSM); - if (BITSET(cap, AHCI_CAP_SAM)) { + if (BITSET(ahci->cap, AHCI_CAP_SAM)) { g_test_message("Supports AHCI-Only Mode: GHC_AE is Read-Only."); ASSERT_BIT_SET(reg, AHCI_GHC_AE); } else { @@ -963,27 +388,27 @@ static void ahci_test_hba_spec(QPCIDevice *ahci, void *hba_base) } /* 3 IS - Interrupt Status */ - reg = AHCI_RREG(AHCI_IS); + reg = ahci_rreg(ahci, AHCI_IS); g_assert_cmphex(reg, ==, 0); /* 4 PI - Ports Implemented */ - ports = AHCI_RREG(AHCI_PI); + ports = ahci_rreg(ahci, AHCI_PI); /* Ports Implemented must be non-zero. */ g_assert_cmphex(ports, !=, 0); /* Ports Implemented must be <= Number of Ports. */ nports_impl = ctpopl(ports); - g_assert_cmpuint(((AHCI_CAP_NP & cap) + 1), >=, nports_impl); + g_assert_cmpuint(((AHCI_CAP_NP & ahci->cap) + 1), >=, nports_impl); - g_assert_cmphex(barsize, >, 0); /* Ports must be within the proper range. Given a mapping of SIZE, * 256 bytes are used for global HBA control, and the rest is used * for ports data, at 0x80 bytes each. */ - maxports = (barsize - HBA_DATA_REGION_SIZE) / HBA_PORT_DATA_SIZE; + g_assert_cmphex(ahci->barsize, >, 0); + maxports = (ahci->barsize - HBA_DATA_REGION_SIZE) / HBA_PORT_DATA_SIZE; /* e.g, 30 ports for 4K of memory. (4096 - 256) / 128 = 30 */ g_assert_cmphex((reg >> maxports), ==, 0); /* 5 AHCI Version */ - reg = AHCI_RREG(AHCI_VS); + reg = ahci_rreg(ahci, AHCI_VS); switch (reg) { case AHCI_VERSION_0_95: case AHCI_VERSION_1_0: @@ -996,8 +421,8 @@ static void ahci_test_hba_spec(QPCIDevice *ahci, void *hba_base) } /* 6 Command Completion Coalescing Control: depends on CAP.CCCS. */ - reg = AHCI_RREG(AHCI_CCCCTL); - if (BITSET(cap, AHCI_CAP_CCCS)) { + reg = ahci_rreg(ahci, AHCI_CCCCTL); + if (BITSET(ahci->cap, AHCI_CAP_CCCS)) { ASSERT_BIT_CLEAR(reg, AHCI_CCCCTL_EN); ASSERT_BIT_CLEAR(reg, AHCI_CCCCTL_RESERVED); ASSERT_BIT_SET(reg, AHCI_CCCCTL_CC); @@ -1007,19 +432,19 @@ static void ahci_test_hba_spec(QPCIDevice *ahci, void *hba_base) } /* 7 CCC_PORTS */ - reg = AHCI_RREG(AHCI_CCCPORTS); + reg = ahci_rreg(ahci, AHCI_CCCPORTS); /* Must be zeroes initially regardless of CAP.CCCS */ g_assert_cmphex(reg, ==, 0); /* 8 EM_LOC */ - reg = AHCI_RREG(AHCI_EMLOC); - if (BITCLR(cap, AHCI_CAP_EMS)) { + reg = ahci_rreg(ahci, AHCI_EMLOC); + if (BITCLR(ahci->cap, AHCI_CAP_EMS)) { g_assert_cmphex(reg, ==, 0); } /* 9 EM_CTL */ - reg = AHCI_RREG(AHCI_EMCTL); - if (BITSET(cap, AHCI_CAP_EMS)) { + reg = ahci_rreg(ahci, AHCI_EMCTL); + if (BITSET(ahci->cap, AHCI_CAP_EMS)) { ASSERT_BIT_CLEAR(reg, AHCI_EMCTL_STSMR); ASSERT_BIT_CLEAR(reg, AHCI_EMCTL_CTLTM); ASSERT_BIT_CLEAR(reg, AHCI_EMCTL_CTLRST); @@ -1029,25 +454,25 @@ static void ahci_test_hba_spec(QPCIDevice *ahci, void *hba_base) } /* 10 CAP2 -- Capabilities Extended */ - cap2 = AHCI_RREG(AHCI_CAP2); - ASSERT_BIT_CLEAR(cap2, AHCI_CAP2_RESERVED); + ahci->cap2 = ahci_rreg(ahci, AHCI_CAP2); + ASSERT_BIT_CLEAR(ahci->cap2, AHCI_CAP2_RESERVED); /* 11 BOHC -- Bios/OS Handoff Control */ - reg = AHCI_RREG(AHCI_BOHC); + reg = ahci_rreg(ahci, AHCI_BOHC); g_assert_cmphex(reg, ==, 0); /* 12 -- 23: Reserved */ g_test_message("Verifying HBA reserved area is empty."); for (i = AHCI_RESERVED; i < AHCI_NVMHCI; ++i) { - reg = AHCI_RREG(i); + reg = ahci_rreg(ahci, i); g_assert_cmphex(reg, ==, 0); } /* 24 -- 39: NVMHCI */ - if (BITCLR(cap2, AHCI_CAP2_NVMP)) { + if (BITCLR(ahci->cap2, AHCI_CAP2_NVMP)) { g_test_message("Verifying HBA/NVMHCI area is empty."); for (i = AHCI_NVMHCI; i < AHCI_VENDOR; ++i) { - reg = AHCI_RREG(i); + reg = ahci_rreg(ahci, i); g_assert_cmphex(reg, ==, 0); } } @@ -1055,17 +480,15 @@ static void ahci_test_hba_spec(QPCIDevice *ahci, void *hba_base) /* 40 -- 63: Vendor */ g_test_message("Verifying HBA/Vendor area is empty."); for (i = AHCI_VENDOR; i < AHCI_PORTS; ++i) { - reg = AHCI_RREG(i); + reg = ahci_rreg(ahci, i); g_assert_cmphex(reg, ==, 0); } /* 64 -- XX: Port Space */ - hcap.cap = cap; - hcap.cap2 = cap2; for (i = 0; ports || (i < maxports); ports >>= 1, ++i) { if (BITSET(ports, 0x1)) { g_test_message("Testing port %u for spec", i); - ahci_test_port_spec(ahci, hba_base, &hcap, i); + ahci_test_port_spec(ahci, i); } else { uint16_t j; uint16_t low = AHCI_PORTS + (32 * i); @@ -1074,7 +497,7 @@ static void ahci_test_hba_spec(QPCIDevice *ahci, void *hba_base) "(reg [%u-%u]) is empty.", i, low, high - 1); for (j = low; j < high; ++j) { - reg = AHCI_RREG(j); + reg = ahci_rreg(ahci, j); g_assert_cmphex(reg, ==, 0); } } @@ -1084,42 +507,41 @@ static void ahci_test_hba_spec(QPCIDevice *ahci, void *hba_base) /** * Test the memory space for one port for specification adherence. */ -static void ahci_test_port_spec(QPCIDevice *ahci, void *hba_base, - HBACap *hcap, uint8_t port) +static void ahci_test_port_spec(AHCIQState *ahci, uint8_t port) { uint32_t reg; unsigned i; /* (0) CLB */ - reg = PX_RREG(port, AHCI_PX_CLB); + reg = ahci_px_rreg(ahci, port, AHCI_PX_CLB); ASSERT_BIT_CLEAR(reg, AHCI_PX_CLB_RESERVED); /* (1) CLBU */ - if (BITCLR(hcap->cap, AHCI_CAP_S64A)) { - reg = PX_RREG(port, AHCI_PX_CLBU); + if (BITCLR(ahci->cap, AHCI_CAP_S64A)) { + reg = ahci_px_rreg(ahci, port, AHCI_PX_CLBU); g_assert_cmphex(reg, ==, 0); } /* (2) FB */ - reg = PX_RREG(port, AHCI_PX_FB); + reg = ahci_px_rreg(ahci, port, AHCI_PX_FB); ASSERT_BIT_CLEAR(reg, AHCI_PX_FB_RESERVED); /* (3) FBU */ - if (BITCLR(hcap->cap, AHCI_CAP_S64A)) { - reg = PX_RREG(port, AHCI_PX_FBU); + if (BITCLR(ahci->cap, AHCI_CAP_S64A)) { + reg = ahci_px_rreg(ahci, port, AHCI_PX_FBU); g_assert_cmphex(reg, ==, 0); } /* (4) IS */ - reg = PX_RREG(port, AHCI_PX_IS); + reg = ahci_px_rreg(ahci, port, AHCI_PX_IS); g_assert_cmphex(reg, ==, 0); /* (5) IE */ - reg = PX_RREG(port, AHCI_PX_IE); + reg = ahci_px_rreg(ahci, port, AHCI_PX_IE); g_assert_cmphex(reg, ==, 0); /* (6) CMD */ - reg = PX_RREG(port, AHCI_PX_CMD); + reg = ahci_px_rreg(ahci, port, AHCI_PX_CMD); ASSERT_BIT_CLEAR(reg, AHCI_PX_CMD_FRE); ASSERT_BIT_CLEAR(reg, AHCI_PX_CMD_RESERVED); ASSERT_BIT_CLEAR(reg, AHCI_PX_CMD_CCS); @@ -1141,7 +563,7 @@ static void ahci_test_port_spec(QPCIDevice *ahci, void *hba_base, ASSERT_BIT_CLEAR(reg, AHCI_PX_CMD_MPSS); } /* If we do not support MPS, MPSS and MPSP must be off. */ - if (BITCLR(hcap->cap, AHCI_CAP_SMPS)) { + if (BITCLR(ahci->cap, AHCI_CAP_SMPS)) { ASSERT_BIT_CLEAR(reg, AHCI_PX_CMD_MPSS); ASSERT_BIT_CLEAR(reg, AHCI_PX_CMD_MPSP); } @@ -1152,16 +574,16 @@ static void ahci_test_port_spec(QPCIDevice *ahci, void *hba_base, /* HPCP and ESP cannot both be active. */ g_assert(!BITSET(reg, AHCI_PX_CMD_HPCP | AHCI_PX_CMD_ESP)); /* If CAP.FBSS is not set, FBSCP must not be set. */ - if (BITCLR(hcap->cap, AHCI_CAP_FBSS)) { + if (BITCLR(ahci->cap, AHCI_CAP_FBSS)) { ASSERT_BIT_CLEAR(reg, AHCI_PX_CMD_FBSCP); } /* (7) RESERVED */ - reg = PX_RREG(port, AHCI_PX_RES1); + reg = ahci_px_rreg(ahci, port, AHCI_PX_RES1); g_assert_cmphex(reg, ==, 0); /* (8) TFD */ - reg = PX_RREG(port, AHCI_PX_TFD); + reg = ahci_px_rreg(ahci, port, AHCI_PX_TFD); /* At boot, prior to an FIS being received, the TFD register should be 0x7F, * which breaks down as follows, as seen in AHCI 1.3 sec 3.3.8, p. 27. */ ASSERT_BIT_SET(reg, AHCI_PX_TFD_STS_ERR); @@ -1179,53 +601,53 @@ static void ahci_test_port_spec(QPCIDevice *ahci, void *hba_base, * so we cannot expect a value here. AHCI 1.3, sec 3.3.9, pp 27-28 */ /* (10) SSTS / SCR0: SStatus */ - reg = PX_RREG(port, AHCI_PX_SSTS); + reg = ahci_px_rreg(ahci, port, AHCI_PX_SSTS); ASSERT_BIT_CLEAR(reg, AHCI_PX_SSTS_RESERVED); /* Even though the register should be 0 at boot, it is asynchronous and * prone to change, so we cannot test any well known value. */ /* (11) SCTL / SCR2: SControl */ - reg = PX_RREG(port, AHCI_PX_SCTL); + reg = ahci_px_rreg(ahci, port, AHCI_PX_SCTL); g_assert_cmphex(reg, ==, 0); /* (12) SERR / SCR1: SError */ - reg = PX_RREG(port, AHCI_PX_SERR); + reg = ahci_px_rreg(ahci, port, AHCI_PX_SERR); g_assert_cmphex(reg, ==, 0); /* (13) SACT / SCR3: SActive */ - reg = PX_RREG(port, AHCI_PX_SACT); + reg = ahci_px_rreg(ahci, port, AHCI_PX_SACT); g_assert_cmphex(reg, ==, 0); /* (14) CI */ - reg = PX_RREG(port, AHCI_PX_CI); + reg = ahci_px_rreg(ahci, port, AHCI_PX_CI); g_assert_cmphex(reg, ==, 0); /* (15) SNTF */ - reg = PX_RREG(port, AHCI_PX_SNTF); + reg = ahci_px_rreg(ahci, port, AHCI_PX_SNTF); g_assert_cmphex(reg, ==, 0); /* (16) FBS */ - reg = PX_RREG(port, AHCI_PX_FBS); + reg = ahci_px_rreg(ahci, port, AHCI_PX_FBS); ASSERT_BIT_CLEAR(reg, AHCI_PX_FBS_EN); ASSERT_BIT_CLEAR(reg, AHCI_PX_FBS_DEC); ASSERT_BIT_CLEAR(reg, AHCI_PX_FBS_SDE); ASSERT_BIT_CLEAR(reg, AHCI_PX_FBS_DEV); ASSERT_BIT_CLEAR(reg, AHCI_PX_FBS_DWE); ASSERT_BIT_CLEAR(reg, AHCI_PX_FBS_RESERVED); - if (BITSET(hcap->cap, AHCI_CAP_FBSS)) { + if (BITSET(ahci->cap, AHCI_CAP_FBSS)) { /* if Port-Multiplier FIS-based switching avail, ADO must >= 2 */ g_assert((reg & AHCI_PX_FBS_ADO) >> ctzl(AHCI_PX_FBS_ADO) >= 2); } /* [17 -- 27] RESERVED */ for (i = AHCI_PX_RES2; i < AHCI_PX_VS; ++i) { - reg = PX_RREG(port, i); + reg = ahci_px_rreg(ahci, port, i); g_assert_cmphex(reg, ==, 0); } /* [28 -- 31] Vendor-Specific */ for (i = AHCI_PX_VS; i < 32; ++i) { - reg = PX_RREG(port, i); + reg = ahci_px_rreg(ahci, port, i); if (reg) { g_test_message("INFO: Vendor register %u non-empty", i); } @@ -1236,164 +658,46 @@ static void ahci_test_port_spec(QPCIDevice *ahci, void *hba_base, * Utilizing an initialized AHCI HBA, issue an IDENTIFY command to the first * device we see, then read and check the response. */ -static void ahci_test_identify(QPCIDevice *ahci, void *hba_base) +static void ahci_test_identify(AHCIQState *ahci) { - RegD2HFIS *d2h = g_malloc0(0x20); - RegD2HFIS *pio = g_malloc0(0x20); - RegH2DFIS fis; - AHCICommand cmd; - PRD prd; - uint32_t ports, reg, clb, table, fb, data_ptr; uint16_t buff[256]; - unsigned i; + unsigned px; int rc; + uint16_t sect_size; + const size_t buffsize = 512; g_assert(ahci != NULL); - g_assert(hba_base != NULL); - - /* We need to: - * (1) Create a Command Table Buffer and update the Command List Slot #0 - * to point to this buffer. - * (2) Construct an FIS host-to-device command structure, and write it to - * the top of the command table buffer. - * (3) Create a data buffer for the IDENTIFY response to be sent to - * (4) Create a Physical Region Descriptor that points to the data buffer, - * and write it to the bottom (offset 0x80) of the command table. - * (5) Now, PxCLB points to the command list, command 0 points to - * our table, and our table contains an FIS instruction and a - * PRD that points to our rx buffer. - * (6) We inform the HBA via PxCI that there is a command ready in slot #0. + + /** + * This serves as a bit of a tutorial on AHCI device programming: + * + * (1) Create a data buffer for the IDENTIFY response to be sent to + * (2) Create a Command Table buffer, where we will store the + * command and PRDT (Physical Region Descriptor Table) + * (3) Construct an FIS host-to-device command structure, and write it to + * the top of the Command Table buffer. + * (4) Create one or more Physical Region Descriptors (PRDs) that describe + * a location in memory where data may be stored/retrieved. + * (5) Write these PRDTs to the bottom (offset 0x80) of the Command Table. + * (6) Each AHCI port has up to 32 command slots. Each slot contains a + * header that points to a Command Table buffer. Pick an unused slot + * and update it to point to the Command Table we have built. + * (7) Now: Command #n points to our Command Table, and our Command Table + * contains the FIS (that describes our command) and the PRDTL, which + * describes our buffer. + * (8) We inform the HBA via PxCI (Command Issue) that the command in slot + * #n is ready for processing. */ /* Pick the first implemented and running port */ - ports = AHCI_RREG(AHCI_PI); - for (i = 0; i < 32; ports >>= 1, ++i) { - if (ports == 0) { - i = 32; - } - - if (!(ports & 0x01)) { - continue; - } - - reg = PX_RREG(i, AHCI_PX_CMD); - if (BITSET(reg, AHCI_PX_CMD_ST)) { - break; - } - } - g_assert_cmphex(i, <, 32); - g_test_message("Selected port %u for test", i); - - /* Clear out this port's interrupts (ignore the init register d2h fis) */ - reg = PX_RREG(i, AHCI_PX_IS); - PX_WREG(i, AHCI_PX_IS, reg); - g_assert_cmphex(PX_RREG(i, AHCI_PX_IS), ==, 0); - - /* Wipe the FIS-Receive Buffer */ - fb = PX_RREG(i, AHCI_PX_FB); - g_assert_cmphex(fb, !=, 0); - qmemset(fb, 0x00, 0x100); - - /* Create a Command Table buffer. 0x80 is the smallest with a PRDTL of 0. */ - /* We need at least one PRD, so round up to the nearest 0x80 multiple. */ - table = guest_alloc(guest_malloc, CMD_TBL_SIZ(1)); - g_assert(table); - ASSERT_BIT_CLEAR(table, 0x7F); - - /* Create a data buffer ... where we will dump the IDENTIFY data to. */ - data_ptr = guest_alloc(guest_malloc, 512); - g_assert(data_ptr); - - /* Grab the Command List Buffer pointer */ - clb = PX_RREG(i, AHCI_PX_CLB); - g_assert(clb); - - /* Copy the existing Command #0 structure from the CLB into local memory, - * and build a new command #0. */ - memread(clb, &cmd, sizeof(cmd)); - cmd.b1 = 5; /* reg_h2d_fis is 5 double-words long */ - cmd.b2 = 0x04; /* clear PxTFD.STS.BSY when done */ - cmd.prdtl = cpu_to_le16(1); /* One PRD table entry. */ - cmd.prdbc = 0; - cmd.ctba = cpu_to_le32(table); - cmd.ctbau = 0; - - /* Construct our PRD, noting that DBC is 0-indexed. */ - prd.dba = cpu_to_le32(data_ptr); - prd.dbau = 0; - prd.res = 0; - /* 511+1 bytes, request DPS interrupt */ - prd.dbc = cpu_to_le32(511 | 0x80000000); - - /* Construct our Command FIS, Based on http://wiki.osdev.org/AHCI */ - memset(&fis, 0x00, sizeof(fis)); - fis.fis_type = 0x27; /* Register Host-to-Device FIS */ - fis.command = 0xEC; /* IDENTIFY */ - fis.device = 0; - fis.flags = 0x80; /* Indicate this is a command FIS */ - - /* We've committed nothing yet, no interrupts should be posted yet. */ - g_assert_cmphex(PX_RREG(i, AHCI_PX_IS), ==, 0); - - /* Commit the Command FIS to the Command Table */ - memwrite(table, &fis, sizeof(fis)); - - /* Commit the PRD entry to the Command Table */ - memwrite(table + 0x80, &prd, sizeof(prd)); - - /* Commit Command #0, pointing to the Table, to the Command List Buffer. */ - memwrite(clb, &cmd, sizeof(cmd)); - - /* Everything is in place, but we haven't given the go-ahead yet. */ - g_assert_cmphex(PX_RREG(i, AHCI_PX_IS), ==, 0); - - /* Issue Command #0 via PxCI */ - PX_WREG(i, AHCI_PX_CI, (1 << 0)); - while (BITSET(PX_RREG(i, AHCI_PX_TFD), AHCI_PX_TFD_STS_BSY)) { - usleep(50); - } + px = ahci_port_select(ahci); + g_test_message("Selected port %u for test", px); - /* Check for expected interrupts */ - reg = PX_RREG(i, AHCI_PX_IS); - ASSERT_BIT_SET(reg, AHCI_PX_IS_DHRS); - ASSERT_BIT_SET(reg, AHCI_PX_IS_PSS); - /* BUG: we expect AHCI_PX_IS_DPS to be set. */ - ASSERT_BIT_CLEAR(reg, AHCI_PX_IS_DPS); + /* Clear out the FIS Receive area and any pending interrupts. */ + ahci_port_clear(ahci, px); - /* Clear expected interrupts and assert all interrupts now cleared. */ - PX_WREG(i, AHCI_PX_IS, AHCI_PX_IS_DHRS | AHCI_PX_IS_PSS | AHCI_PX_IS_DPS); - g_assert_cmphex(PX_RREG(i, AHCI_PX_IS), ==, 0); - - /* Check for errors. */ - reg = PX_RREG(i, AHCI_PX_SERR); - g_assert_cmphex(reg, ==, 0); - reg = PX_RREG(i, AHCI_PX_TFD); - ASSERT_BIT_CLEAR(reg, AHCI_PX_TFD_STS_ERR); - ASSERT_BIT_CLEAR(reg, AHCI_PX_TFD_ERR); - - /* Investigate CMD #0, assert that we read 512 bytes */ - memread(clb, &cmd, sizeof(cmd)); - g_assert_cmphex(512, ==, le32_to_cpu(cmd.prdbc)); - - /* Investigate FIS responses */ - memread(fb + 0x20, pio, 0x20); - memread(fb + 0x40, d2h, 0x20); - g_assert_cmphex(pio->fis_type, ==, 0x5f); - g_assert_cmphex(d2h->fis_type, ==, 0x34); - g_assert_cmphex(pio->flags, ==, d2h->flags); - g_assert_cmphex(pio->status, ==, d2h->status); - g_assert_cmphex(pio->error, ==, d2h->error); - - reg = PX_RREG(i, AHCI_PX_TFD); - g_assert_cmphex((reg & AHCI_PX_TFD_ERR), ==, pio->error); - g_assert_cmphex((reg & AHCI_PX_TFD_STS), ==, pio->status); - /* The PIO Setup FIS contains a "bytes read" field, which is a - * 16-bit value. The Physical Region Descriptor Byte Count is - * 32-bit, but for small transfers using one PRD, it should match. */ - g_assert_cmphex(le16_to_cpu(pio->res4), ==, le32_to_cpu(cmd.prdbc)); - - /* Last, but not least: Investigate the IDENTIFY response data. */ - memread(data_ptr, &buff, 512); + /* "Read" 512 bytes using CMD_IDENTIFY into the host buffer. */ + ahci_io(ahci, px, CMD_IDENTIFY, &buff, buffsize); /* Check serial number/version in the buffer */ /* NB: IDENTIFY strings are packed in 16bit little endian chunks. @@ -1408,8 +712,48 @@ static void ahci_test_identify(QPCIDevice *ahci, void *hba_base) rc = memcmp(&buff[23], "version ", 8); g_assert_cmphex(rc, ==, 0); - g_free(d2h); - g_free(pio); + sect_size = le16_to_cpu(*((uint16_t *)(&buff[5]))); + g_assert_cmphex(sect_size, ==, 0x200); +} + +static void ahci_test_dma_rw_simple(AHCIQState *ahci) +{ + uint64_t ptr; + uint8_t port; + unsigned i; + const unsigned bufsize = 4096; + unsigned char *tx = g_malloc(bufsize); + unsigned char *rx = g_malloc0(bufsize); + + g_assert(ahci != NULL); + + /* Pick the first running port and clear it. */ + port = ahci_port_select(ahci); + ahci_port_clear(ahci, port); + + /*** Create pattern and transfer to guest ***/ + /* Data buffer in the guest */ + ptr = ahci_alloc(ahci, bufsize); + g_assert(ptr); + + /* Write some indicative pattern to our 4K buffer. */ + for (i = 0; i < bufsize; i++) { + tx[i] = (bufsize - i); + } + memwrite(ptr, tx, bufsize); + + /* Write this buffer to disk, then read it back to the DMA buffer. */ + ahci_guest_io(ahci, port, CMD_WRITE_DMA, ptr, bufsize); + qmemset(ptr, 0x00, bufsize); + ahci_guest_io(ahci, port, CMD_READ_DMA, ptr, bufsize); + + /*** Read back the Data ***/ + memread(ptr, rx, bufsize); + g_assert_cmphex(memcmp(tx, rx, bufsize), ==, 0); + + ahci_free(ahci, ptr); + g_free(tx); + g_free(rx); } /******************************************************************************/ @@ -1421,7 +765,7 @@ static void ahci_test_identify(QPCIDevice *ahci, void *hba_base) */ static void test_sanity(void) { - QPCIDevice *ahci; + AHCIQState *ahci; ahci = ahci_boot(); ahci_shutdown(ahci); } @@ -1432,7 +776,7 @@ static void test_sanity(void) */ static void test_pci_spec(void) { - QPCIDevice *ahci; + AHCIQState *ahci; ahci = ahci_boot(); ahci_test_pci_spec(ahci); ahci_shutdown(ahci); @@ -1444,10 +788,10 @@ static void test_pci_spec(void) */ static void test_pci_enable(void) { - QPCIDevice *ahci; - void *hba_base; + AHCIQState *ahci; + ahci = ahci_boot(); - ahci_pci_enable(ahci, &hba_base); + ahci_pci_enable(ahci); ahci_shutdown(ahci); } @@ -1457,12 +801,11 @@ static void test_pci_enable(void) */ static void test_hba_spec(void) { - QPCIDevice *ahci; - void *hba_base; + AHCIQState *ahci; ahci = ahci_boot(); - ahci_pci_enable(ahci, &hba_base); - ahci_test_hba_spec(ahci, hba_base); + ahci_pci_enable(ahci); + ahci_test_hba_spec(ahci); ahci_shutdown(ahci); } @@ -1472,12 +815,11 @@ static void test_hba_spec(void) */ static void test_hba_enable(void) { - QPCIDevice *ahci; - void *hba_base; + AHCIQState *ahci; ahci = ahci_boot(); - ahci_pci_enable(ahci, &hba_base); - ahci_hba_enable(ahci, hba_base); + ahci_pci_enable(ahci); + ahci_hba_enable(ahci); ahci_shutdown(ahci); } @@ -1487,13 +829,26 @@ static void test_hba_enable(void) */ static void test_identify(void) { - QPCIDevice *ahci; - void *hba_base; + AHCIQState *ahci; + + ahci = ahci_boot(); + ahci_pci_enable(ahci); + ahci_hba_enable(ahci); + ahci_test_identify(ahci); + ahci_shutdown(ahci); +} + +/** + * Perform a simple DMA R/W test, using a single PRD and non-NCQ commands. + */ +static void test_dma_rw_simple(void) +{ + AHCIQState *ahci; ahci = ahci_boot(); - ahci_pci_enable(ahci, &hba_base); - ahci_hba_enable(ahci, hba_base); - ahci_test_identify(ahci, hba_base); + ahci_pci_enable(ahci); + ahci_hba_enable(ahci); + ahci_test_dma_rw_simple(ahci); ahci_shutdown(ahci); } @@ -1552,6 +907,7 @@ int main(int argc, char **argv) qtest_add_func("/ahci/hba_spec", test_hba_spec); qtest_add_func("/ahci/hba_enable", test_hba_enable); qtest_add_func("/ahci/identify", test_identify); + qtest_add_func("/ahci/dma/simple", test_dma_rw_simple); ret = g_test_run(); diff --git a/tests/libqos/ahci.c b/tests/libqos/ahci.c new file mode 100644 index 0000000000..a6105c750f --- /dev/null +++ b/tests/libqos/ahci.c @@ -0,0 +1,838 @@ +/* + * libqos AHCI functions + * + * Copyright (c) 2014 John Snow <jsnow@redhat.com> + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + * THE SOFTWARE. + */ + +#include <glib.h> + +#include "libqtest.h" +#include "libqos/ahci.h" +#include "libqos/pci-pc.h" + +#include "qemu-common.h" +#include "qemu/host-utils.h" + +#include "hw/pci/pci_ids.h" +#include "hw/pci/pci_regs.h" + +typedef struct AHCICommandProp { + uint8_t cmd; /* Command Code */ + bool data; /* Data transfer command? */ + bool pio; + bool dma; + bool lba28; + bool lba48; + bool read; + bool write; + bool atapi; + bool ncq; + uint64_t size; /* Static transfer size, for commands like IDENTIFY. */ + uint32_t interrupts; /* Expected interrupts for this command. */ +} AHCICommandProp; + +AHCICommandProp ahci_command_properties[] = { + { .cmd = CMD_READ_PIO, .data = true, .pio = true, + .lba28 = true, .read = true }, + { .cmd = CMD_WRITE_PIO, .data = true, .pio = true, + .lba28 = true, .write = true }, + { .cmd = CMD_READ_PIO_EXT, .data = true, .pio = true, + .lba48 = true, .read = true }, + { .cmd = CMD_WRITE_PIO_EXT, .data = true, .pio = true, + .lba48 = true, .write = true }, + { .cmd = CMD_READ_DMA, .data = true, .dma = true, + .lba28 = true, .read = true }, + { .cmd = CMD_WRITE_DMA, .data = true, .dma = true, + .lba28 = true, .write = true }, + { .cmd = CMD_READ_DMA_EXT, .data = true, .dma = true, + .lba48 = true, .read = true }, + { .cmd = CMD_WRITE_DMA_EXT, .data = true, .dma = true, + .lba48 = true, .write = true }, + { .cmd = CMD_IDENTIFY, .data = true, .pio = true, + .size = 512, .read = true }, + { .cmd = CMD_READ_MAX, .lba28 = true }, + { .cmd = CMD_READ_MAX_EXT, .lba48 = true }, + { .cmd = CMD_FLUSH_CACHE, .data = false } +}; + +/** + * Allocate space in the guest using information in the AHCIQState object. + */ +uint64_t ahci_alloc(AHCIQState *ahci, size_t bytes) +{ + g_assert(ahci); + g_assert(ahci->parent); + return qmalloc(ahci->parent, bytes); +} + +void ahci_free(AHCIQState *ahci, uint64_t addr) +{ + g_assert(ahci); + g_assert(ahci->parent); + qfree(ahci->parent, addr); +} + +/** + * Locate, verify, and return a handle to the AHCI device. + */ +QPCIDevice *get_ahci_device(uint32_t *fingerprint) +{ + QPCIDevice *ahci; + uint32_t ahci_fingerprint; + QPCIBus *pcibus; + + pcibus = qpci_init_pc(); + + /* Find the AHCI PCI device and verify it's the right one. */ + ahci = qpci_device_find(pcibus, QPCI_DEVFN(0x1F, 0x02)); + g_assert(ahci != NULL); + + ahci_fingerprint = qpci_config_readl(ahci, PCI_VENDOR_ID); + + switch (ahci_fingerprint) { + case AHCI_INTEL_ICH9: + break; + default: + /* Unknown device. */ + g_assert_not_reached(); + } + + if (fingerprint) { + *fingerprint = ahci_fingerprint; + } + return ahci; +} + +void free_ahci_device(QPCIDevice *dev) +{ + QPCIBus *pcibus = dev ? dev->bus : NULL; + + /* libqos doesn't have a function for this, so free it manually */ + g_free(dev); + qpci_free_pc(pcibus); +} + +/* Free all memory in-use by the AHCI device. */ +void ahci_clean_mem(AHCIQState *ahci) +{ + uint8_t port, slot; + + for (port = 0; port < 32; ++port) { + if (ahci->port[port].fb) { + ahci_free(ahci, ahci->port[port].fb); + } + if (ahci->port[port].clb) { + for (slot = 0; slot < 32; slot++) { + ahci_destroy_command(ahci, port, slot); + } + ahci_free(ahci, ahci->port[port].clb); + } + } +} + +/*** Logical Device Initialization ***/ + +/** + * Start the PCI device and sanity-check default operation. + */ +void ahci_pci_enable(AHCIQState *ahci) +{ + uint8_t reg; + + start_ahci_device(ahci); + + switch (ahci->fingerprint) { + case AHCI_INTEL_ICH9: + /* ICH9 has a register at PCI 0x92 that + * acts as a master port enabler mask. */ + reg = qpci_config_readb(ahci->dev, 0x92); + reg |= 0x3F; + qpci_config_writeb(ahci->dev, 0x92, reg); + /* 0...0111111b -- bit significant, ports 0-5 enabled. */ + ASSERT_BIT_SET(qpci_config_readb(ahci->dev, 0x92), 0x3F); + break; + } + +} + +/** + * Map BAR5/ABAR, and engage the PCI device. + */ +void start_ahci_device(AHCIQState *ahci) +{ + /* Map AHCI's ABAR (BAR5) */ + ahci->hba_base = qpci_iomap(ahci->dev, 5, &ahci->barsize); + g_assert(ahci->hba_base); + + /* turns on pci.cmd.iose, pci.cmd.mse and pci.cmd.bme */ + qpci_device_enable(ahci->dev); +} + +/** + * Test and initialize the AHCI's HBA memory areas. + * Initialize and start any ports with devices attached. + * Bring the HBA into the idle state. + */ +void ahci_hba_enable(AHCIQState *ahci) +{ + /* Bits of interest in this section: + * GHC.AE Global Host Control / AHCI Enable + * PxCMD.ST Port Command: Start + * PxCMD.SUD "Spin Up Device" + * PxCMD.POD "Power On Device" + * PxCMD.FRE "FIS Receive Enable" + * PxCMD.FR "FIS Receive Running" + * PxCMD.CR "Command List Running" + */ + uint32_t reg, ports_impl; + uint16_t i; + uint8_t num_cmd_slots; + + g_assert(ahci != NULL); + + /* Set GHC.AE to 1 */ + ahci_set(ahci, AHCI_GHC, AHCI_GHC_AE); + reg = ahci_rreg(ahci, AHCI_GHC); + ASSERT_BIT_SET(reg, AHCI_GHC_AE); + + /* Cache CAP and CAP2. */ + ahci->cap = ahci_rreg(ahci, AHCI_CAP); + ahci->cap2 = ahci_rreg(ahci, AHCI_CAP2); + + /* Read CAP.NCS, how many command slots do we have? */ + num_cmd_slots = ((ahci->cap & AHCI_CAP_NCS) >> ctzl(AHCI_CAP_NCS)) + 1; + g_test_message("Number of Command Slots: %u", num_cmd_slots); + + /* Determine which ports are implemented. */ + ports_impl = ahci_rreg(ahci, AHCI_PI); + + for (i = 0; ports_impl; ports_impl >>= 1, ++i) { + if (!(ports_impl & 0x01)) { + continue; + } + + g_test_message("Initializing port %u", i); + + reg = ahci_px_rreg(ahci, i, AHCI_PX_CMD); + if (BITCLR(reg, AHCI_PX_CMD_ST | AHCI_PX_CMD_CR | + AHCI_PX_CMD_FRE | AHCI_PX_CMD_FR)) { + g_test_message("port is idle"); + } else { + g_test_message("port needs to be idled"); + ahci_px_clr(ahci, i, AHCI_PX_CMD, + (AHCI_PX_CMD_ST | AHCI_PX_CMD_FRE)); + /* The port has 500ms to disengage. */ + usleep(500000); + reg = ahci_px_rreg(ahci, i, AHCI_PX_CMD); + ASSERT_BIT_CLEAR(reg, AHCI_PX_CMD_CR); + ASSERT_BIT_CLEAR(reg, AHCI_PX_CMD_FR); + g_test_message("port is now idle"); + /* The spec does allow for possibly needing a PORT RESET + * or HBA reset if we fail to idle the port. */ + } + + /* Allocate Memory for the Command List Buffer & FIS Buffer */ + /* PxCLB space ... 0x20 per command, as in 4.2.2 p 36 */ + ahci->port[i].clb = ahci_alloc(ahci, num_cmd_slots * 0x20); + qmemset(ahci->port[i].clb, 0x00, 0x100); + g_test_message("CLB: 0x%08" PRIx64, ahci->port[i].clb); + ahci_px_wreg(ahci, i, AHCI_PX_CLB, ahci->port[i].clb); + g_assert_cmphex(ahci->port[i].clb, ==, + ahci_px_rreg(ahci, i, AHCI_PX_CLB)); + + /* PxFB space ... 0x100, as in 4.2.1 p 35 */ + ahci->port[i].fb = ahci_alloc(ahci, 0x100); + qmemset(ahci->port[i].fb, 0x00, 0x100); + g_test_message("FB: 0x%08" PRIx64, ahci->port[i].fb); + ahci_px_wreg(ahci, i, AHCI_PX_FB, ahci->port[i].fb); + g_assert_cmphex(ahci->port[i].fb, ==, + ahci_px_rreg(ahci, i, AHCI_PX_FB)); + + /* Clear PxSERR, PxIS, then IS.IPS[x] by writing '1's. */ + ahci_px_wreg(ahci, i, AHCI_PX_SERR, 0xFFFFFFFF); + ahci_px_wreg(ahci, i, AHCI_PX_IS, 0xFFFFFFFF); + ahci_wreg(ahci, AHCI_IS, (1 << i)); + + /* Verify Interrupts Cleared */ + reg = ahci_px_rreg(ahci, i, AHCI_PX_SERR); + g_assert_cmphex(reg, ==, 0); + + reg = ahci_px_rreg(ahci, i, AHCI_PX_IS); + g_assert_cmphex(reg, ==, 0); + + reg = ahci_rreg(ahci, AHCI_IS); + ASSERT_BIT_CLEAR(reg, (1 << i)); + + /* Enable All Interrupts: */ + ahci_px_wreg(ahci, i, AHCI_PX_IE, 0xFFFFFFFF); + reg = ahci_px_rreg(ahci, i, AHCI_PX_IE); + g_assert_cmphex(reg, ==, ~((uint32_t)AHCI_PX_IE_RESERVED)); + + /* Enable the FIS Receive Engine. */ + ahci_px_set(ahci, i, AHCI_PX_CMD, AHCI_PX_CMD_FRE); + reg = ahci_px_rreg(ahci, i, AHCI_PX_CMD); + ASSERT_BIT_SET(reg, AHCI_PX_CMD_FR); + + /* AHCI 1.3 spec: if !STS.BSY, !STS.DRQ and PxSSTS.DET indicates + * physical presence, a device is present and may be started. However, + * PxSERR.DIAG.X /may/ need to be cleared a priori. */ + reg = ahci_px_rreg(ahci, i, AHCI_PX_SERR); + if (BITSET(reg, AHCI_PX_SERR_DIAG_X)) { + ahci_px_set(ahci, i, AHCI_PX_SERR, AHCI_PX_SERR_DIAG_X); + } + + reg = ahci_px_rreg(ahci, i, AHCI_PX_TFD); + if (BITCLR(reg, AHCI_PX_TFD_STS_BSY | AHCI_PX_TFD_STS_DRQ)) { + reg = ahci_px_rreg(ahci, i, AHCI_PX_SSTS); + if ((reg & AHCI_PX_SSTS_DET) == SSTS_DET_ESTABLISHED) { + /* Device Found: set PxCMD.ST := 1 */ + ahci_px_set(ahci, i, AHCI_PX_CMD, AHCI_PX_CMD_ST); + ASSERT_BIT_SET(ahci_px_rreg(ahci, i, AHCI_PX_CMD), + AHCI_PX_CMD_CR); + g_test_message("Started Device %u", i); + } else if ((reg & AHCI_PX_SSTS_DET)) { + /* Device present, but in some unknown state. */ + g_assert_not_reached(); + } + } + } + + /* Enable GHC.IE */ + ahci_set(ahci, AHCI_GHC, AHCI_GHC_IE); + reg = ahci_rreg(ahci, AHCI_GHC); + ASSERT_BIT_SET(reg, AHCI_GHC_IE); + + /* TODO: The device should now be idling and waiting for commands. + * In the future, a small test-case to inspect the Register D2H FIS + * and clear the initial interrupts might be good. */ +} + +/** + * Pick the first implemented and running port + */ +unsigned ahci_port_select(AHCIQState *ahci) +{ + uint32_t ports, reg; + unsigned i; + + ports = ahci_rreg(ahci, AHCI_PI); + for (i = 0; i < 32; ports >>= 1, ++i) { + if (ports == 0) { + i = 32; + } + + if (!(ports & 0x01)) { + continue; + } + + reg = ahci_px_rreg(ahci, i, AHCI_PX_CMD); + if (BITSET(reg, AHCI_PX_CMD_ST)) { + break; + } + } + g_assert(i < 32); + return i; +} + +/** + * Clear a port's interrupts and status information prior to a test. + */ +void ahci_port_clear(AHCIQState *ahci, uint8_t port) +{ + uint32_t reg; + + /* Clear out this port's interrupts (ignore the init register d2h fis) */ + reg = ahci_px_rreg(ahci, port, AHCI_PX_IS); + ahci_px_wreg(ahci, port, AHCI_PX_IS, reg); + g_assert_cmphex(ahci_px_rreg(ahci, port, AHCI_PX_IS), ==, 0); + + /* Wipe the FIS-Recieve Buffer */ + qmemset(ahci->port[port].fb, 0x00, 0x100); +} + +/** + * Check a port for errors. + */ +void ahci_port_check_error(AHCIQState *ahci, uint8_t port) +{ + uint32_t reg; + + /* The upper 9 bits of the IS register all indicate errors. */ + reg = ahci_px_rreg(ahci, port, AHCI_PX_IS); + reg >>= 23; + g_assert_cmphex(reg, ==, 0); + + /* The Sata Error Register should be empty. */ + reg = ahci_px_rreg(ahci, port, AHCI_PX_SERR); + g_assert_cmphex(reg, ==, 0); + + /* The TFD also has two error sections. */ + reg = ahci_px_rreg(ahci, port, AHCI_PX_TFD); + ASSERT_BIT_CLEAR(reg, AHCI_PX_TFD_STS_ERR); + ASSERT_BIT_CLEAR(reg, AHCI_PX_TFD_ERR); +} + +void ahci_port_check_interrupts(AHCIQState *ahci, uint8_t port, + uint32_t intr_mask) +{ + uint32_t reg; + + /* Check for expected interrupts */ + reg = ahci_px_rreg(ahci, port, AHCI_PX_IS); + ASSERT_BIT_SET(reg, intr_mask); + + /* Clear expected interrupts and assert all interrupts now cleared. */ + ahci_px_wreg(ahci, port, AHCI_PX_IS, intr_mask); + g_assert_cmphex(ahci_px_rreg(ahci, port, AHCI_PX_IS), ==, 0); +} + +void ahci_port_check_nonbusy(AHCIQState *ahci, uint8_t port, uint8_t slot) +{ + uint32_t reg; + + /* Assert that the command slot is no longer busy (NCQ) */ + reg = ahci_px_rreg(ahci, port, AHCI_PX_SACT); + ASSERT_BIT_CLEAR(reg, (1 << slot)); + + /* Non-NCQ */ + reg = ahci_px_rreg(ahci, port, AHCI_PX_CI); + ASSERT_BIT_CLEAR(reg, (1 << slot)); + + /* And assert that we are generally not busy. */ + reg = ahci_px_rreg(ahci, port, AHCI_PX_TFD); + ASSERT_BIT_CLEAR(reg, AHCI_PX_TFD_STS_BSY); + ASSERT_BIT_CLEAR(reg, AHCI_PX_TFD_STS_DRQ); +} + +void ahci_port_check_d2h_sanity(AHCIQState *ahci, uint8_t port, uint8_t slot) +{ + RegD2HFIS *d2h = g_malloc0(0x20); + uint32_t reg; + + memread(ahci->port[port].fb + 0x40, d2h, 0x20); + g_assert_cmphex(d2h->fis_type, ==, 0x34); + + reg = ahci_px_rreg(ahci, port, AHCI_PX_TFD); + g_assert_cmphex((reg & AHCI_PX_TFD_ERR) >> 8, ==, d2h->error); + g_assert_cmphex((reg & AHCI_PX_TFD_STS), ==, d2h->status); + + g_free(d2h); +} + +void ahci_port_check_pio_sanity(AHCIQState *ahci, uint8_t port, + uint8_t slot, size_t buffsize) +{ + PIOSetupFIS *pio = g_malloc0(0x20); + + /* We cannot check the Status or E_Status registers, becuase + * the status may have again changed between the PIO Setup FIS + * and the conclusion of the command with the D2H Register FIS. */ + memread(ahci->port[port].fb + 0x20, pio, 0x20); + g_assert_cmphex(pio->fis_type, ==, 0x5f); + + /* BUG: PIO Setup FIS as utilized by QEMU tries to fit the entire + * transfer size in a uint16_t field. The maximum transfer size can + * eclipse this; the field is meant to convey the size of data per + * each Data FIS, not the entire operation as a whole. For now, + * we will sanity check the broken case where applicable. */ + if (buffsize <= UINT16_MAX) { + g_assert_cmphex(le16_to_cpu(pio->tx_count), ==, buffsize); + } + + g_free(pio); +} + +void ahci_port_check_cmd_sanity(AHCIQState *ahci, uint8_t port, + uint8_t slot, size_t buffsize) +{ + AHCICommandHeader cmd; + + ahci_get_command_header(ahci, port, slot, &cmd); + g_assert_cmphex(buffsize, ==, cmd.prdbc); +} + +/* Get the command in #slot of port #port. */ +void ahci_get_command_header(AHCIQState *ahci, uint8_t port, + uint8_t slot, AHCICommandHeader *cmd) +{ + uint64_t ba = ahci->port[port].clb; + ba += slot * sizeof(AHCICommandHeader); + memread(ba, cmd, sizeof(AHCICommandHeader)); + + cmd->flags = le16_to_cpu(cmd->flags); + cmd->prdtl = le16_to_cpu(cmd->prdtl); + cmd->prdbc = le32_to_cpu(cmd->prdbc); + cmd->ctba = le64_to_cpu(cmd->ctba); +} + +/* Set the command in #slot of port #port. */ +void ahci_set_command_header(AHCIQState *ahci, uint8_t port, + uint8_t slot, AHCICommandHeader *cmd) +{ + AHCICommandHeader tmp; + uint64_t ba = ahci->port[port].clb; + ba += slot * sizeof(AHCICommandHeader); + + tmp.flags = cpu_to_le16(cmd->flags); + tmp.prdtl = cpu_to_le16(cmd->prdtl); + tmp.prdbc = cpu_to_le32(cmd->prdbc); + tmp.ctba = cpu_to_le64(cmd->ctba); + + memwrite(ba, &tmp, sizeof(AHCICommandHeader)); +} + +void ahci_destroy_command(AHCIQState *ahci, uint8_t port, uint8_t slot) +{ + AHCICommandHeader cmd; + + /* Obtain the Nth Command Header */ + ahci_get_command_header(ahci, port, slot, &cmd); + if (cmd.ctba == 0) { + /* No address in it, so just return -- it's empty. */ + goto tidy; + } + + /* Free the Table */ + ahci_free(ahci, cmd.ctba); + + tidy: + /* NULL the header. */ + memset(&cmd, 0x00, sizeof(cmd)); + ahci_set_command_header(ahci, port, slot, &cmd); + ahci->port[port].ctba[slot] = 0; + ahci->port[port].prdtl[slot] = 0; +} + +void ahci_write_fis(AHCIQState *ahci, RegH2DFIS *fis, uint64_t addr) +{ + RegH2DFIS tmp = *fis; + + /* The auxiliary FIS fields are defined per-command and are not + * currently implemented in libqos/ahci.o, but may or may not need + * to be flipped. */ + + /* All other FIS fields are 8 bit and do not need to be flipped. */ + tmp.count = cpu_to_le16(tmp.count); + + memwrite(addr, &tmp, sizeof(tmp)); +} + +unsigned ahci_pick_cmd(AHCIQState *ahci, uint8_t port) +{ + unsigned i; + unsigned j; + uint32_t reg; + + reg = ahci_px_rreg(ahci, port, AHCI_PX_CI); + + /* Pick the least recently used command slot that's available */ + for (i = 0; i < 32; ++i) { + j = ((ahci->port[port].next + i) % 32); + if (reg & (1 << j)) { + continue; + } + ahci_destroy_command(ahci, port, i); + ahci->port[port].next = (j + 1) % 32; + return j; + } + + g_test_message("All command slots were busy."); + g_assert_not_reached(); +} + +inline unsigned size_to_prdtl(unsigned bytes, unsigned bytes_per_prd) +{ + /* Each PRD can describe up to 4MiB */ + g_assert_cmphex(bytes_per_prd, <=, 4096 * 1024); + g_assert_cmphex(bytes_per_prd & 0x01, ==, 0x00); + return (bytes + bytes_per_prd - 1) / bytes_per_prd; +} + +/* Given a guest buffer address, perform an IO operation */ +void ahci_guest_io(AHCIQState *ahci, uint8_t port, uint8_t ide_cmd, + uint64_t buffer, size_t bufsize) +{ + AHCICommand *cmd; + + cmd = ahci_command_create(ide_cmd); + ahci_command_set_buffer(cmd, buffer); + ahci_command_set_size(cmd, bufsize); + ahci_command_commit(ahci, cmd, port); + ahci_command_issue(ahci, cmd); + ahci_command_verify(ahci, cmd); + ahci_command_free(cmd); +} + +struct AHCICommand { + /* Test Management Data */ + uint8_t name; + uint8_t port; + uint8_t slot; + uint32_t interrupts; + uint64_t xbytes; + uint32_t prd_size; + uint64_t buffer; + AHCICommandProp *props; + /* Data to be transferred to the guest */ + AHCICommandHeader header; + RegH2DFIS fis; + void *atapi_cmd; +}; + +static AHCICommandProp *ahci_command_find(uint8_t command_name) +{ + int i; + + for (i = 0; i < ARRAY_SIZE(ahci_command_properties); i++) { + if (ahci_command_properties[i].cmd == command_name) { + return &ahci_command_properties[i]; + } + } + + return NULL; +} + +/* Given a HOST buffer, create a buffer address and perform an IO operation. */ +void ahci_io(AHCIQState *ahci, uint8_t port, uint8_t ide_cmd, + void *buffer, size_t bufsize) +{ + uint64_t ptr; + AHCICommandProp *props; + + props = ahci_command_find(ide_cmd); + g_assert(props); + ptr = ahci_alloc(ahci, bufsize); + g_assert(ptr); + + if (props->write) { + memwrite(ptr, buffer, bufsize); + } + + ahci_guest_io(ahci, port, ide_cmd, ptr, bufsize); + + if (props->read) { + memread(ptr, buffer, bufsize); + } + + ahci_free(ahci, ptr); +} + +/** + * Initializes a basic command header in memory. + * We assume that this is for an ATA command using RegH2DFIS. + */ +static void command_header_init(AHCICommand *cmd) +{ + AHCICommandHeader *hdr = &cmd->header; + AHCICommandProp *props = cmd->props; + + hdr->flags = 5; /* RegH2DFIS is 5 DW long. Must be < 32 */ + hdr->flags |= CMDH_CLR_BSY; /* Clear the BSY bit when done */ + if (props->write) { + hdr->flags |= CMDH_WRITE; + } + if (props->atapi) { + hdr->flags |= CMDH_ATAPI; + } + /* Other flags: PREFETCH, RESET, and BIST */ + hdr->prdtl = size_to_prdtl(cmd->xbytes, cmd->prd_size); + hdr->prdbc = 0; + hdr->ctba = 0; +} + +static void command_table_init(AHCICommand *cmd) +{ + RegH2DFIS *fis = &(cmd->fis); + + fis->fis_type = REG_H2D_FIS; + fis->flags = REG_H2D_FIS_CMD; /* "Command" bit */ + fis->command = cmd->name; + cmd->fis.feature_low = 0x00; + cmd->fis.feature_high = 0x00; + if (cmd->props->lba28 || cmd->props->lba48) { + cmd->fis.device = ATA_DEVICE_LBA; + } + cmd->fis.count = (cmd->xbytes / AHCI_SECTOR_SIZE); + cmd->fis.icc = 0x00; + cmd->fis.control = 0x00; + memset(cmd->fis.aux, 0x00, ARRAY_SIZE(cmd->fis.aux)); +} + +AHCICommand *ahci_command_create(uint8_t command_name) +{ + AHCICommandProp *props = ahci_command_find(command_name); + AHCICommand *cmd; + + g_assert(props); + cmd = g_malloc0(sizeof(AHCICommand)); + g_assert(!(props->dma && props->pio)); + g_assert(!(props->lba28 && props->lba48)); + g_assert(!(props->read && props->write)); + g_assert(!props->size || props->data); + + /* Defaults and book-keeping */ + cmd->props = props; + cmd->name = command_name; + cmd->xbytes = props->size; + cmd->prd_size = 4096; + cmd->buffer = 0xabad1dea; + + cmd->interrupts = AHCI_PX_IS_DHRS; + /* BUG: We expect the DPS interrupt for data commands */ + /* cmd->interrupts |= props->data ? AHCI_PX_IS_DPS : 0; */ + /* BUG: We expect the DMA Setup interrupt for DMA commands */ + /* cmd->interrupts |= props->dma ? AHCI_PX_IS_DSS : 0; */ + cmd->interrupts |= props->pio ? AHCI_PX_IS_PSS : 0; + + command_header_init(cmd); + command_table_init(cmd); + + return cmd; +} + +void ahci_command_free(AHCICommand *cmd) +{ + g_free(cmd); +} + +void ahci_command_set_buffer(AHCICommand *cmd, uint64_t buffer) +{ + cmd->buffer = buffer; +} + +void ahci_command_set_sizes(AHCICommand *cmd, uint64_t xbytes, + unsigned prd_size) +{ + /* Each PRD can describe up to 4MiB, and must not be odd. */ + g_assert_cmphex(prd_size, <=, 4096 * 1024); + g_assert_cmphex(prd_size & 0x01, ==, 0x00); + cmd->prd_size = prd_size; + cmd->xbytes = xbytes; + cmd->fis.count = (cmd->xbytes / AHCI_SECTOR_SIZE); + cmd->header.prdtl = size_to_prdtl(cmd->xbytes, cmd->prd_size); +} + +void ahci_command_set_size(AHCICommand *cmd, uint64_t xbytes) +{ + ahci_command_set_sizes(cmd, xbytes, cmd->prd_size); +} + +void ahci_command_set_prd_size(AHCICommand *cmd, unsigned prd_size) +{ + ahci_command_set_sizes(cmd, cmd->xbytes, prd_size); +} + +void ahci_command_commit(AHCIQState *ahci, AHCICommand *cmd, uint8_t port) +{ + uint16_t i, prdtl; + uint64_t table_size, table_ptr, remaining; + PRD prd; + + /* This command is now tied to this port/command slot */ + cmd->port = port; + cmd->slot = ahci_pick_cmd(ahci, port); + + /* Create a buffer for the command table */ + prdtl = size_to_prdtl(cmd->xbytes, cmd->prd_size); + table_size = CMD_TBL_SIZ(prdtl); + table_ptr = ahci_alloc(ahci, table_size); + g_assert(table_ptr); + /* AHCI 1.3: Must be aligned to 0x80 */ + g_assert((table_ptr & 0x7F) == 0x00); + cmd->header.ctba = table_ptr; + + /* Commit the command header and command FIS */ + ahci_set_command_header(ahci, port, cmd->slot, &(cmd->header)); + ahci_write_fis(ahci, &(cmd->fis), table_ptr); + + /* Construct and write the PRDs to the command table */ + g_assert_cmphex(prdtl, ==, cmd->header.prdtl); + remaining = cmd->xbytes; + for (i = 0; i < prdtl; ++i) { + prd.dba = cpu_to_le64(cmd->buffer + (cmd->prd_size * i)); + prd.res = 0; + if (remaining > cmd->prd_size) { + /* Note that byte count is 0-based. */ + prd.dbc = cpu_to_le32(cmd->prd_size - 1); + remaining -= cmd->prd_size; + } else { + /* Again, dbc is 0-based. */ + prd.dbc = cpu_to_le32(remaining - 1); + remaining = 0; + } + prd.dbc |= cpu_to_le32(0x80000000); /* Request DPS Interrupt */ + + /* Commit the PRD entry to the Command Table */ + memwrite(table_ptr + 0x80 + (i * sizeof(PRD)), + &prd, sizeof(PRD)); + } + + /* Bookmark the PRDTL and CTBA values */ + ahci->port[port].ctba[cmd->slot] = table_ptr; + ahci->port[port].prdtl[cmd->slot] = prdtl; +} + +void ahci_command_issue_async(AHCIQState *ahci, AHCICommand *cmd) +{ + if (cmd->props->ncq) { + ahci_px_wreg(ahci, cmd->port, AHCI_PX_SACT, (1 << cmd->slot)); + } + + ahci_px_wreg(ahci, cmd->port, AHCI_PX_CI, (1 << cmd->slot)); +} + +void ahci_command_wait(AHCIQState *ahci, AHCICommand *cmd) +{ + /* We can't rely on STS_BSY until the command has started processing. + * Therefore, we also use the Command Issue bit as indication of + * a command in-flight. */ + while (BITSET(ahci_px_rreg(ahci, cmd->port, AHCI_PX_TFD), + AHCI_PX_TFD_STS_BSY) || + BITSET(ahci_px_rreg(ahci, cmd->port, AHCI_PX_CI), (1 << cmd->slot))) { + usleep(50); + } +} + +void ahci_command_issue(AHCIQState *ahci, AHCICommand *cmd) +{ + ahci_command_issue_async(ahci, cmd); + ahci_command_wait(ahci, cmd); +} + +void ahci_command_verify(AHCIQState *ahci, AHCICommand *cmd) +{ + uint8_t slot = cmd->slot; + uint8_t port = cmd->port; + + ahci_port_check_error(ahci, port); + ahci_port_check_interrupts(ahci, port, cmd->interrupts); + ahci_port_check_nonbusy(ahci, port, slot); + ahci_port_check_cmd_sanity(ahci, port, slot, cmd->xbytes); + ahci_port_check_d2h_sanity(ahci, port, slot); + if (cmd->props->pio) { + ahci_port_check_pio_sanity(ahci, port, slot, cmd->xbytes); + } +} + +uint8_t ahci_command_slot(AHCICommand *cmd) +{ + return cmd->slot; +} diff --git a/tests/libqos/ahci.h b/tests/libqos/ahci.h new file mode 100644 index 0000000000..39b99d3658 --- /dev/null +++ b/tests/libqos/ahci.h @@ -0,0 +1,549 @@ +#ifndef __libqos_ahci_h +#define __libqos_ahci_h + +/* + * AHCI qtest library functions and definitions + * + * Copyright (c) 2014 John Snow <jsnow@redhat.com> + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + * THE SOFTWARE. + */ + +#include <stdint.h> +#include <stdlib.h> +#include <stdbool.h> +#include "libqos/libqos.h" +#include "libqos/pci.h" +#include "libqos/malloc-pc.h" + +/*** Supplementary PCI Config Space IDs & Masks ***/ +#define PCI_DEVICE_ID_INTEL_Q35_AHCI (0x2922) +#define PCI_MSI_FLAGS_RESERVED (0xFF00) +#define PCI_PM_CTRL_RESERVED (0xFC) +#define PCI_BCC(REG32) ((REG32) >> 24) +#define PCI_PI(REG32) (((REG32) >> 8) & 0xFF) +#define PCI_SCC(REG32) (((REG32) >> 16) & 0xFF) + +/*** Recognized AHCI Device Types ***/ +#define AHCI_INTEL_ICH9 (PCI_DEVICE_ID_INTEL_Q35_AHCI << 16 | \ + PCI_VENDOR_ID_INTEL) + +/*** AHCI/HBA Register Offsets and Bitmasks ***/ +#define AHCI_CAP (0) +#define AHCI_CAP_NP (0x1F) +#define AHCI_CAP_SXS (0x20) +#define AHCI_CAP_EMS (0x40) +#define AHCI_CAP_CCCS (0x80) +#define AHCI_CAP_NCS (0x1F00) +#define AHCI_CAP_PSC (0x2000) +#define AHCI_CAP_SSC (0x4000) +#define AHCI_CAP_PMD (0x8000) +#define AHCI_CAP_FBSS (0x10000) +#define AHCI_CAP_SPM (0x20000) +#define AHCI_CAP_SAM (0x40000) +#define AHCI_CAP_RESERVED (0x80000) +#define AHCI_CAP_ISS (0xF00000) +#define AHCI_CAP_SCLO (0x1000000) +#define AHCI_CAP_SAL (0x2000000) +#define AHCI_CAP_SALP (0x4000000) +#define AHCI_CAP_SSS (0x8000000) +#define AHCI_CAP_SMPS (0x10000000) +#define AHCI_CAP_SSNTF (0x20000000) +#define AHCI_CAP_SNCQ (0x40000000) +#define AHCI_CAP_S64A (0x80000000) + +#define AHCI_GHC (1) +#define AHCI_GHC_HR (0x01) +#define AHCI_GHC_IE (0x02) +#define AHCI_GHC_MRSM (0x04) +#define AHCI_GHC_RESERVED (0x7FFFFFF8) +#define AHCI_GHC_AE (0x80000000) + +#define AHCI_IS (2) +#define AHCI_PI (3) +#define AHCI_VS (4) + +#define AHCI_CCCCTL (5) +#define AHCI_CCCCTL_EN (0x01) +#define AHCI_CCCCTL_RESERVED (0x06) +#define AHCI_CCCCTL_CC (0xFF00) +#define AHCI_CCCCTL_TV (0xFFFF0000) + +#define AHCI_CCCPORTS (6) +#define AHCI_EMLOC (7) + +#define AHCI_EMCTL (8) +#define AHCI_EMCTL_STSMR (0x01) +#define AHCI_EMCTL_CTLTM (0x100) +#define AHCI_EMCTL_CTLRST (0x200) +#define AHCI_EMCTL_RESERVED (0xF0F0FCFE) + +#define AHCI_CAP2 (9) +#define AHCI_CAP2_BOH (0x01) +#define AHCI_CAP2_NVMP (0x02) +#define AHCI_CAP2_APST (0x04) +#define AHCI_CAP2_RESERVED (0xFFFFFFF8) + +#define AHCI_BOHC (10) +#define AHCI_RESERVED (11) +#define AHCI_NVMHCI (24) +#define AHCI_VENDOR (40) +#define AHCI_PORTS (64) + +/*** Port Memory Offsets & Bitmasks ***/ +#define AHCI_PX_CLB (0) +#define AHCI_PX_CLB_RESERVED (0x1FF) + +#define AHCI_PX_CLBU (1) + +#define AHCI_PX_FB (2) +#define AHCI_PX_FB_RESERVED (0xFF) + +#define AHCI_PX_FBU (3) + +#define AHCI_PX_IS (4) +#define AHCI_PX_IS_DHRS (0x1) +#define AHCI_PX_IS_PSS (0x2) +#define AHCI_PX_IS_DSS (0x4) +#define AHCI_PX_IS_SDBS (0x8) +#define AHCI_PX_IS_UFS (0x10) +#define AHCI_PX_IS_DPS (0x20) +#define AHCI_PX_IS_PCS (0x40) +#define AHCI_PX_IS_DMPS (0x80) +#define AHCI_PX_IS_RESERVED (0x23FFF00) +#define AHCI_PX_IS_PRCS (0x400000) +#define AHCI_PX_IS_IPMS (0x800000) +#define AHCI_PX_IS_OFS (0x1000000) +#define AHCI_PX_IS_INFS (0x4000000) +#define AHCI_PX_IS_IFS (0x8000000) +#define AHCI_PX_IS_HBDS (0x10000000) +#define AHCI_PX_IS_HBFS (0x20000000) +#define AHCI_PX_IS_TFES (0x40000000) +#define AHCI_PX_IS_CPDS (0x80000000) + +#define AHCI_PX_IE (5) +#define AHCI_PX_IE_DHRE (0x1) +#define AHCI_PX_IE_PSE (0x2) +#define AHCI_PX_IE_DSE (0x4) +#define AHCI_PX_IE_SDBE (0x8) +#define AHCI_PX_IE_UFE (0x10) +#define AHCI_PX_IE_DPE (0x20) +#define AHCI_PX_IE_PCE (0x40) +#define AHCI_PX_IE_DMPE (0x80) +#define AHCI_PX_IE_RESERVED (0x23FFF00) +#define AHCI_PX_IE_PRCE (0x400000) +#define AHCI_PX_IE_IPME (0x800000) +#define AHCI_PX_IE_OFE (0x1000000) +#define AHCI_PX_IE_INFE (0x4000000) +#define AHCI_PX_IE_IFE (0x8000000) +#define AHCI_PX_IE_HBDE (0x10000000) +#define AHCI_PX_IE_HBFE (0x20000000) +#define AHCI_PX_IE_TFEE (0x40000000) +#define AHCI_PX_IE_CPDE (0x80000000) + +#define AHCI_PX_CMD (6) +#define AHCI_PX_CMD_ST (0x1) +#define AHCI_PX_CMD_SUD (0x2) +#define AHCI_PX_CMD_POD (0x4) +#define AHCI_PX_CMD_CLO (0x8) +#define AHCI_PX_CMD_FRE (0x10) +#define AHCI_PX_CMD_RESERVED (0xE0) +#define AHCI_PX_CMD_CCS (0x1F00) +#define AHCI_PX_CMD_MPSS (0x2000) +#define AHCI_PX_CMD_FR (0x4000) +#define AHCI_PX_CMD_CR (0x8000) +#define AHCI_PX_CMD_CPS (0x10000) +#define AHCI_PX_CMD_PMA (0x20000) +#define AHCI_PX_CMD_HPCP (0x40000) +#define AHCI_PX_CMD_MPSP (0x80000) +#define AHCI_PX_CMD_CPD (0x100000) +#define AHCI_PX_CMD_ESP (0x200000) +#define AHCI_PX_CMD_FBSCP (0x400000) +#define AHCI_PX_CMD_APSTE (0x800000) +#define AHCI_PX_CMD_ATAPI (0x1000000) +#define AHCI_PX_CMD_DLAE (0x2000000) +#define AHCI_PX_CMD_ALPE (0x4000000) +#define AHCI_PX_CMD_ASP (0x8000000) +#define AHCI_PX_CMD_ICC (0xF0000000) + +#define AHCI_PX_RES1 (7) + +#define AHCI_PX_TFD (8) +#define AHCI_PX_TFD_STS (0xFF) +#define AHCI_PX_TFD_STS_ERR (0x01) +#define AHCI_PX_TFD_STS_CS1 (0x06) +#define AHCI_PX_TFD_STS_DRQ (0x08) +#define AHCI_PX_TFD_STS_CS2 (0x70) +#define AHCI_PX_TFD_STS_BSY (0x80) +#define AHCI_PX_TFD_ERR (0xFF00) +#define AHCI_PX_TFD_RESERVED (0xFFFF0000) + +#define AHCI_PX_SIG (9) +#define AHCI_PX_SIG_SECTOR_COUNT (0xFF) +#define AHCI_PX_SIG_LBA_LOW (0xFF00) +#define AHCI_PX_SIG_LBA_MID (0xFF0000) +#define AHCI_PX_SIG_LBA_HIGH (0xFF000000) + +#define AHCI_PX_SSTS (10) +#define AHCI_PX_SSTS_DET (0x0F) +#define AHCI_PX_SSTS_SPD (0xF0) +#define AHCI_PX_SSTS_IPM (0xF00) +#define AHCI_PX_SSTS_RESERVED (0xFFFFF000) +#define SSTS_DET_NO_DEVICE (0x00) +#define SSTS_DET_PRESENT (0x01) +#define SSTS_DET_ESTABLISHED (0x03) +#define SSTS_DET_OFFLINE (0x04) + +#define AHCI_PX_SCTL (11) + +#define AHCI_PX_SERR (12) +#define AHCI_PX_SERR_ERR (0xFFFF) +#define AHCI_PX_SERR_DIAG (0xFFFF0000) +#define AHCI_PX_SERR_DIAG_X (0x04000000) + +#define AHCI_PX_SACT (13) +#define AHCI_PX_CI (14) +#define AHCI_PX_SNTF (15) + +#define AHCI_PX_FBS (16) +#define AHCI_PX_FBS_EN (0x1) +#define AHCI_PX_FBS_DEC (0x2) +#define AHCI_PX_FBS_SDE (0x4) +#define AHCI_PX_FBS_DEV (0xF00) +#define AHCI_PX_FBS_ADO (0xF000) +#define AHCI_PX_FBS_DWE (0xF0000) +#define AHCI_PX_FBS_RESERVED (0xFFF000F8) + +#define AHCI_PX_RES2 (17) +#define AHCI_PX_VS (28) + +#define HBA_DATA_REGION_SIZE (256) +#define HBA_PORT_DATA_SIZE (128) +#define HBA_PORT_NUM_REG (HBA_PORT_DATA_SIZE/4) + +#define AHCI_VERSION_0_95 (0x00000905) +#define AHCI_VERSION_1_0 (0x00010000) +#define AHCI_VERSION_1_1 (0x00010100) +#define AHCI_VERSION_1_2 (0x00010200) +#define AHCI_VERSION_1_3 (0x00010300) + +#define AHCI_SECTOR_SIZE (512) + +/* FIS types */ +enum { + REG_H2D_FIS = 0x27, + REG_D2H_FIS = 0x34, + DMA_ACTIVATE_FIS = 0x39, + DMA_SETUP_FIS = 0x41, + DATA_FIS = 0x46, + BIST_ACTIVATE_FIS = 0x58, + PIO_SETUP_FIS = 0x5F, + SDB_FIS = 0xA1 +}; + +/* FIS flags */ +#define REG_H2D_FIS_CMD 0x80 + +/* ATA Commands */ +enum { + /* DMA */ + CMD_READ_DMA = 0xC8, + CMD_READ_DMA_EXT = 0x25, + CMD_WRITE_DMA = 0xCA, + CMD_WRITE_DMA_EXT = 0x35, + /* PIO */ + CMD_READ_PIO = 0x20, + CMD_READ_PIO_EXT = 0x24, + CMD_WRITE_PIO = 0x30, + CMD_WRITE_PIO_EXT = 0x34, + /* Misc */ + CMD_READ_MAX = 0xF8, + CMD_READ_MAX_EXT = 0x27, + CMD_FLUSH_CACHE = 0xE7, + CMD_IDENTIFY = 0xEC +}; + +/* AHCI Command Header Flags & Masks*/ +#define CMDH_CFL (0x1F) +#define CMDH_ATAPI (0x20) +#define CMDH_WRITE (0x40) +#define CMDH_PREFETCH (0x80) +#define CMDH_RESET (0x100) +#define CMDH_BIST (0x200) +#define CMDH_CLR_BSY (0x400) +#define CMDH_RES (0x800) +#define CMDH_PMP (0xF000) + +/* ATA device register masks */ +#define ATA_DEVICE_MAGIC 0xA0 +#define ATA_DEVICE_LBA 0x40 +#define ATA_DEVICE_DRIVE 0x10 +#define ATA_DEVICE_HEAD 0x0F + +/*** Structures ***/ + +typedef struct AHCIPortQState { + uint64_t fb; + uint64_t clb; + uint64_t ctba[32]; + uint16_t prdtl[32]; + uint8_t next; /** Next Command Slot to Use **/ +} AHCIPortQState; + +typedef struct AHCIQState { + QOSState *parent; + QPCIDevice *dev; + void *hba_base; + uint64_t barsize; + uint32_t fingerprint; + uint32_t cap; + uint32_t cap2; + AHCIPortQState port[32]; +} AHCIQState; + +/** + * Generic FIS structure. + */ +typedef struct FIS { + uint8_t fis_type; + uint8_t flags; + char data[0]; +} __attribute__((__packed__)) FIS; + +/** + * Register device-to-host FIS structure. + */ +typedef struct RegD2HFIS { + /* DW0 */ + uint8_t fis_type; + uint8_t flags; + uint8_t status; + uint8_t error; + /* DW1 */ + uint8_t lba_lo[3]; + uint8_t device; + /* DW2 */ + uint8_t lba_hi[3]; + uint8_t res0; + /* DW3 */ + uint16_t count; + uint16_t res1; + /* DW4 */ + uint32_t res2; +} __attribute__((__packed__)) RegD2HFIS; + +/** + * Register device-to-host FIS structure; + * PIO Setup variety. + */ +typedef struct PIOSetupFIS { + /* DW0 */ + uint8_t fis_type; + uint8_t flags; + uint8_t status; + uint8_t error; + /* DW1 */ + uint8_t lba_lo[3]; + uint8_t device; + /* DW2 */ + uint8_t lba_hi[3]; + uint8_t res0; + /* DW3 */ + uint16_t count; + uint8_t res1; + uint8_t e_status; + /* DW4 */ + uint16_t tx_count; + uint16_t res2; +} __attribute__((__packed__)) PIOSetupFIS; + +/** + * Register host-to-device FIS structure. + */ +typedef struct RegH2DFIS { + /* DW0 */ + uint8_t fis_type; + uint8_t flags; + uint8_t command; + uint8_t feature_low; + /* DW1 */ + uint8_t lba_lo[3]; + uint8_t device; + /* DW2 */ + uint8_t lba_hi[3]; + uint8_t feature_high; + /* DW3 */ + uint16_t count; + uint8_t icc; + uint8_t control; + /* DW4 */ + uint8_t aux[4]; +} __attribute__((__packed__)) RegH2DFIS; + +/** + * Command List entry structure. + * The command list contains between 1-32 of these structures. + */ +typedef struct AHCICommandHeader { + uint16_t flags; /* Cmd-Fis-Len, PMP#, and flags. */ + uint16_t prdtl; /* Phys Region Desc. Table Length */ + uint32_t prdbc; /* Phys Region Desc. Byte Count */ + uint64_t ctba; /* Command Table Descriptor Base Address */ + uint32_t res[4]; +} __attribute__((__packed__)) AHCICommandHeader; + +/** + * Physical Region Descriptor; pointed to by the Command List Header, + * struct ahci_command. + */ +typedef struct PRD { + uint64_t dba; /* Data Base Address */ + uint32_t res; /* Reserved */ + uint32_t dbc; /* Data Byte Count (0-indexed) & Interrupt Flag (bit 2^31) */ +} __attribute__((__packed__)) PRD; + +/* Opaque, defined within ahci.c */ +typedef struct AHCICommand AHCICommand; + +/*** Macro Utilities ***/ +#define BITANY(data, mask) (((data) & (mask)) != 0) +#define BITSET(data, mask) (((data) & (mask)) == (mask)) +#define BITCLR(data, mask) (((data) & (mask)) == 0) +#define ASSERT_BIT_SET(data, mask) g_assert_cmphex((data) & (mask), ==, (mask)) +#define ASSERT_BIT_CLEAR(data, mask) g_assert_cmphex((data) & (mask), ==, 0) + +/* For calculating how big the PRD table needs to be: */ +#define CMD_TBL_SIZ(n) ((0x80 + ((n) * sizeof(PRD)) + 0x7F) & ~0x7F) + +/* Helpers for reading/writing AHCI HBA register values */ + +static inline uint32_t ahci_mread(AHCIQState *ahci, size_t offset) +{ + return qpci_io_readl(ahci->dev, ahci->hba_base + offset); +} + +static inline void ahci_mwrite(AHCIQState *ahci, size_t offset, uint32_t value) +{ + qpci_io_writel(ahci->dev, ahci->hba_base + offset, value); +} + +static inline uint32_t ahci_rreg(AHCIQState *ahci, uint32_t reg_num) +{ + return ahci_mread(ahci, 4 * reg_num); +} + +static inline void ahci_wreg(AHCIQState *ahci, uint32_t reg_num, uint32_t value) +{ + ahci_mwrite(ahci, 4 * reg_num, value); +} + +static inline void ahci_set(AHCIQState *ahci, uint32_t reg_num, uint32_t mask) +{ + ahci_wreg(ahci, reg_num, ahci_rreg(ahci, reg_num) | mask); +} + +static inline void ahci_clr(AHCIQState *ahci, uint32_t reg_num, uint32_t mask) +{ + ahci_wreg(ahci, reg_num, ahci_rreg(ahci, reg_num) & ~mask); +} + +static inline size_t ahci_px_offset(uint8_t port, uint32_t reg_num) +{ + return AHCI_PORTS + (HBA_PORT_NUM_REG * port) + reg_num; +} + +static inline uint32_t ahci_px_rreg(AHCIQState *ahci, uint8_t port, + uint32_t reg_num) +{ + return ahci_rreg(ahci, ahci_px_offset(port, reg_num)); +} + +static inline void ahci_px_wreg(AHCIQState *ahci, uint8_t port, + uint32_t reg_num, uint32_t value) +{ + ahci_wreg(ahci, ahci_px_offset(port, reg_num), value); +} + +static inline void ahci_px_set(AHCIQState *ahci, uint8_t port, + uint32_t reg_num, uint32_t mask) +{ + ahci_px_wreg(ahci, port, reg_num, + ahci_px_rreg(ahci, port, reg_num) | mask); +} + +static inline void ahci_px_clr(AHCIQState *ahci, uint8_t port, + uint32_t reg_num, uint32_t mask) +{ + ahci_px_wreg(ahci, port, reg_num, + ahci_px_rreg(ahci, port, reg_num) & ~mask); +} + +/*** Prototypes ***/ +uint64_t ahci_alloc(AHCIQState *ahci, size_t bytes); +void ahci_free(AHCIQState *ahci, uint64_t addr); +QPCIDevice *get_ahci_device(uint32_t *fingerprint); +void free_ahci_device(QPCIDevice *dev); +void ahci_clean_mem(AHCIQState *ahci); +void ahci_pci_enable(AHCIQState *ahci); +void start_ahci_device(AHCIQState *ahci); +void ahci_hba_enable(AHCIQState *ahci); +unsigned ahci_port_select(AHCIQState *ahci); +void ahci_port_clear(AHCIQState *ahci, uint8_t port); +void ahci_port_check_error(AHCIQState *ahci, uint8_t port); +void ahci_port_check_interrupts(AHCIQState *ahci, uint8_t port, + uint32_t intr_mask); +void ahci_port_check_nonbusy(AHCIQState *ahci, uint8_t port, uint8_t slot); +void ahci_port_check_d2h_sanity(AHCIQState *ahci, uint8_t port, uint8_t slot); +void ahci_port_check_pio_sanity(AHCIQState *ahci, uint8_t port, + uint8_t slot, size_t buffsize); +void ahci_port_check_cmd_sanity(AHCIQState *ahci, uint8_t port, + uint8_t slot, size_t buffsize); +void ahci_get_command_header(AHCIQState *ahci, uint8_t port, + uint8_t slot, AHCICommandHeader *cmd); +void ahci_set_command_header(AHCIQState *ahci, uint8_t port, + uint8_t slot, AHCICommandHeader *cmd); +void ahci_destroy_command(AHCIQState *ahci, uint8_t port, uint8_t slot); +void ahci_write_fis(AHCIQState *ahci, RegH2DFIS *fis, uint64_t addr); +unsigned ahci_pick_cmd(AHCIQState *ahci, uint8_t port); +unsigned size_to_prdtl(unsigned bytes, unsigned bytes_per_prd); +void ahci_guest_io(AHCIQState *ahci, uint8_t port, uint8_t ide_cmd, + uint64_t gbuffer, size_t size); +void ahci_io(AHCIQState *ahci, uint8_t port, uint8_t ide_cmd, + void *buffer, size_t bufsize); + +/* Command Lifecycle */ +AHCICommand *ahci_command_create(uint8_t command_name); +void ahci_command_commit(AHCIQState *ahci, AHCICommand *cmd, uint8_t port); +void ahci_command_issue(AHCIQState *ahci, AHCICommand *cmd); +void ahci_command_issue_async(AHCIQState *ahci, AHCICommand *cmd); +void ahci_command_wait(AHCIQState *ahci, AHCICommand *cmd); +void ahci_command_verify(AHCIQState *ahci, AHCICommand *cmd); +void ahci_command_free(AHCICommand *cmd); + +/* Command adjustments */ +void ahci_command_set_buffer(AHCICommand *cmd, uint64_t buffer); +void ahci_command_set_size(AHCICommand *cmd, uint64_t xbytes); +void ahci_command_set_prd_size(AHCICommand *cmd, unsigned prd_size); +void ahci_command_set_sizes(AHCICommand *cmd, uint64_t xbytes, + unsigned prd_size); + +/* Command Misc */ +uint8_t ahci_command_slot(AHCICommand *cmd); + +#endif diff --git a/tests/libqos/libqos-pc.c b/tests/libqos/libqos-pc.c new file mode 100644 index 0000000000..bbace893fb --- /dev/null +++ b/tests/libqos/libqos-pc.c @@ -0,0 +1,24 @@ +#include "libqos/libqos-pc.h" +#include "libqos/malloc-pc.h" + +static QOSOps qos_ops = { + .init_allocator = pc_alloc_init_flags, + .uninit_allocator = pc_alloc_uninit +}; + +QOSState *qtest_pc_boot(const char *cmdline_fmt, ...) +{ + QOSState *qs; + va_list ap; + + va_start(ap, cmdline_fmt); + qs = qtest_vboot(&qos_ops, cmdline_fmt, ap); + va_end(ap); + + return qs; +} + +void qtest_pc_shutdown(QOSState *qs) +{ + return qtest_shutdown(qs); +} diff --git a/tests/libqos/libqos-pc.h b/tests/libqos/libqos-pc.h new file mode 100644 index 0000000000..316857d32f --- /dev/null +++ b/tests/libqos/libqos-pc.h @@ -0,0 +1,9 @@ +#ifndef __libqos_pc_h +#define __libqos_pc_h + +#include "libqos/libqos.h" + +QOSState *qtest_pc_boot(const char *cmdline_fmt, ...); +void qtest_pc_shutdown(QOSState *qs); + +#endif diff --git a/tests/libqos/libqos.c b/tests/libqos/libqos.c new file mode 100644 index 0000000000..bc8beb281f --- /dev/null +++ b/tests/libqos/libqos.c @@ -0,0 +1,63 @@ +#include <stdio.h> +#include <stdlib.h> +#include <glib.h> +#include <unistd.h> +#include <fcntl.h> +#include <sys/wait.h> + +#include "libqtest.h" +#include "libqos/libqos.h" +#include "libqos/pci.h" + +/*** Test Setup & Teardown ***/ + +/** + * Launch QEMU with the given command line, + * and then set up interrupts and our guest malloc interface. + */ +QOSState *qtest_vboot(QOSOps *ops, const char *cmdline_fmt, va_list ap) +{ + char *cmdline; + + struct QOSState *qs = g_malloc(sizeof(QOSState)); + + cmdline = g_strdup_vprintf(cmdline_fmt, ap); + qs->qts = qtest_start(cmdline); + qs->ops = ops; + qtest_irq_intercept_in(global_qtest, "ioapic"); + if (ops && ops->init_allocator) { + qs->alloc = ops->init_allocator(ALLOC_NO_FLAGS); + } + + g_free(cmdline); + return qs; +} + +/** + * Launch QEMU with the given command line, + * and then set up interrupts and our guest malloc interface. + */ +QOSState *qtest_boot(QOSOps *ops, const char *cmdline_fmt, ...) +{ + QOSState *qs; + va_list ap; + + va_start(ap, cmdline_fmt); + qs = qtest_vboot(ops, cmdline_fmt, ap); + va_end(ap); + + return qs; +} + +/** + * Tear down the QEMU instance. + */ +void qtest_shutdown(QOSState *qs) +{ + if (qs->alloc && qs->ops && qs->ops->uninit_allocator) { + qs->ops->uninit_allocator(qs->alloc); + qs->alloc = NULL; + } + qtest_quit(qs->qts); + g_free(qs); +} diff --git a/tests/libqos/libqos.h b/tests/libqos/libqos.h new file mode 100644 index 0000000000..612d41e5e9 --- /dev/null +++ b/tests/libqos/libqos.h @@ -0,0 +1,33 @@ +#ifndef __libqos_h +#define __libqos_h + +#include "libqtest.h" +#include "libqos/pci.h" +#include "libqos/malloc-pc.h" + +typedef struct QOSOps { + QGuestAllocator *(*init_allocator)(QAllocOpts); + void (*uninit_allocator)(QGuestAllocator *); +} QOSOps; + +typedef struct QOSState { + QTestState *qts; + QGuestAllocator *alloc; + QOSOps *ops; +} QOSState; + +QOSState *qtest_vboot(QOSOps *ops, const char *cmdline_fmt, va_list ap); +QOSState *qtest_boot(QOSOps *ops, const char *cmdline_fmt, ...); +void qtest_shutdown(QOSState *qs); + +static inline uint64_t qmalloc(QOSState *q, size_t bytes) +{ + return guest_alloc(q->alloc, bytes); +} + +static inline void qfree(QOSState *q, uint64_t addr) +{ + guest_free(q->alloc, addr); +} + +#endif diff --git a/tests/libqos/malloc-pc.c b/tests/libqos/malloc-pc.c index c9c48fddc9..6e253b6877 100644 --- a/tests/libqos/malloc-pc.c +++ b/tests/libqos/malloc-pc.c @@ -32,31 +32,17 @@ void pc_alloc_uninit(QGuestAllocator *allocator) QGuestAllocator *pc_alloc_init_flags(QAllocOpts flags) { - QGuestAllocator *s = g_malloc0(sizeof(*s)); + QGuestAllocator *s; uint64_t ram_size; QFWCFG *fw_cfg = pc_fw_cfg_init(); - MemBlock *node; - - s->opts = flags; - s->page_size = PAGE_SIZE; ram_size = qfw_cfg_get_u64(fw_cfg, FW_CFG_RAM_SIZE); - - /* Start at 1MB */ - s->start = 1 << 20; - - /* Respect PCI hole */ - s->end = MIN(ram_size, 0xE0000000); + s = alloc_init_flags(flags, 1 << 20, MIN(ram_size, 0xE0000000)); + alloc_set_page_size(s, PAGE_SIZE); /* clean-up */ g_free(fw_cfg); - QTAILQ_INIT(&s->used); - QTAILQ_INIT(&s->free); - - node = mlist_new(s->start, s->end - s->start); - QTAILQ_INSERT_HEAD(&s->free, node, MLIST_ENTNAME); - return s; } diff --git a/tests/libqos/malloc.c b/tests/libqos/malloc.c index 5debf18497..67f31902fd 100644 --- a/tests/libqos/malloc.c +++ b/tests/libqos/malloc.c @@ -16,6 +16,26 @@ #include <inttypes.h> #include <glib.h> +typedef QTAILQ_HEAD(MemList, MemBlock) MemList; + +typedef struct MemBlock { + QTAILQ_ENTRY(MemBlock) MLIST_ENTNAME; + uint64_t size; + uint64_t addr; +} MemBlock; + +struct QGuestAllocator { + QAllocOpts opts; + uint64_t start; + uint64_t end; + uint32_t page_size; + + MemList used; + MemList free; +}; + +#define DEFAULT_PAGE_SIZE 4096 + static void mlist_delete(MemList *list, MemBlock *node) { g_assert(list && node); @@ -103,6 +123,21 @@ static void mlist_coalesce(MemList *head, MemBlock *node) } while (merge); } +static MemBlock *mlist_new(uint64_t addr, uint64_t size) +{ + MemBlock *block; + + if (!size) { + return NULL; + } + block = g_malloc0(sizeof(MemBlock)); + + block->addr = addr; + block->size = size; + + return block; +} + static uint64_t mlist_fulfill(QGuestAllocator *s, MemBlock *freenode, uint64_t size) { @@ -187,21 +222,6 @@ static void mlist_free(QGuestAllocator *s, uint64_t addr) mlist_coalesce(&s->free, node); } -MemBlock *mlist_new(uint64_t addr, uint64_t size) -{ - MemBlock *block; - - if (!size) { - return NULL; - } - block = g_malloc0(sizeof(MemBlock)); - - block->addr = addr; - block->size = size; - - return block; -} - /* * Mostly for valgrind happiness, but it does offer * a chokepoint for debugging guest memory leaks, too. @@ -268,3 +288,44 @@ void guest_free(QGuestAllocator *allocator, uint64_t addr) mlist_check(allocator); } } + +QGuestAllocator *alloc_init(uint64_t start, uint64_t end) +{ + QGuestAllocator *s = g_malloc0(sizeof(*s)); + MemBlock *node; + + s->start = start; + s->end = end; + + QTAILQ_INIT(&s->used); + QTAILQ_INIT(&s->free); + + node = mlist_new(s->start, s->end - s->start); + QTAILQ_INSERT_HEAD(&s->free, node, MLIST_ENTNAME); + + s->page_size = DEFAULT_PAGE_SIZE; + + return s; +} + +QGuestAllocator *alloc_init_flags(QAllocOpts opts, + uint64_t start, uint64_t end) +{ + QGuestAllocator *s = alloc_init(start, end); + s->opts = opts; + return s; +} + +void alloc_set_page_size(QGuestAllocator *allocator, size_t page_size) +{ + /* Can't alter the page_size for an allocator in-use */ + g_assert(QTAILQ_EMPTY(&allocator->used)); + + g_assert(is_power_of_2(page_size)); + allocator->page_size = page_size; +} + +void alloc_set_flags(QGuestAllocator *allocator, QAllocOpts opts) +{ + allocator->opts |= opts; +} diff --git a/tests/libqos/malloc.h b/tests/libqos/malloc.h index 465efeb8fb..71ac407dcd 100644 --- a/tests/libqos/malloc.h +++ b/tests/libqos/malloc.h @@ -17,8 +17,6 @@ #include <sys/types.h> #include "qemu/queue.h" -#define MLIST_ENTNAME entries - typedef enum { ALLOC_NO_FLAGS = 0x00, ALLOC_LEAK_WARN = 0x01, @@ -26,28 +24,18 @@ typedef enum { ALLOC_PARANOID = 0x04 } QAllocOpts; -typedef QTAILQ_HEAD(MemList, MemBlock) MemList; -typedef struct MemBlock { - QTAILQ_ENTRY(MemBlock) MLIST_ENTNAME; - uint64_t size; - uint64_t addr; -} MemBlock; - -typedef struct QGuestAllocator { - QAllocOpts opts; - uint64_t start; - uint64_t end; - uint32_t page_size; +typedef struct QGuestAllocator QGuestAllocator; - MemList used; - MemList free; -} QGuestAllocator; - -MemBlock *mlist_new(uint64_t addr, uint64_t size); void alloc_uninit(QGuestAllocator *allocator); /* Always returns page aligned values */ uint64_t guest_alloc(QGuestAllocator *allocator, size_t size); void guest_free(QGuestAllocator *allocator, uint64_t addr); +QGuestAllocator *alloc_init(uint64_t start, uint64_t end); +QGuestAllocator *alloc_init_flags(QAllocOpts flags, + uint64_t start, uint64_t end); +void alloc_set_page_size(QGuestAllocator *allocator, size_t page_size); +void alloc_set_flags(QGuestAllocator *allocator, QAllocOpts opts); + #endif diff --git a/tests/qemu-iotests/016.out b/tests/qemu-iotests/016.out deleted file mode 100644 index acbd60b4a3..0000000000 --- a/tests/qemu-iotests/016.out +++ /dev/null @@ -1,23 +0,0 @@ -QA output created by 016 -Formatting 'TEST_DIR/t.IMGFMT', fmt=IMGFMT size=134217728 - -== reading at EOF == -read 512/512 bytes at offset 134217728 -512 bytes, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec) - -== reading far past EOF == -read 512/512 bytes at offset 268435456 -512 bytes, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec) - -== writing at EOF == -wrote 512/512 bytes at offset 134217728 -512 bytes, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec) -read 512/512 bytes at offset 134217728 -512 bytes, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec) - -== writing far past EOF == -wrote 512/512 bytes at offset 268435456 -512 bytes, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec) -read 512/512 bytes at offset 268435456 -512 bytes, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec) -*** done diff --git a/tests/qemu-iotests/051 b/tests/qemu-iotests/051 index 11c858f27d..27138a2299 100755 --- a/tests/qemu-iotests/051 +++ b/tests/qemu-iotests/051 @@ -93,6 +93,7 @@ echo run_qemu -drive file="$TEST_IMG",format=foo run_qemu -drive file="$TEST_IMG",driver=foo run_qemu -drive file="$TEST_IMG",driver=raw,format=qcow2 +run_qemu -drive file="$TEST_IMG",driver=qcow2,format=qcow2 echo echo === Overriding backing file === diff --git a/tests/qemu-iotests/051.out b/tests/qemu-iotests/051.out index f497c5717b..bf52bf02d4 100644 --- a/tests/qemu-iotests/051.out +++ b/tests/qemu-iotests/051.out @@ -5,43 +5,46 @@ Formatting 'TEST_DIR/t.IMGFMT', fmt=IMGFMT size=134217728 backing_file='TEST_DIR === Unknown option === Testing: -drive file=TEST_DIR/t.qcow2,format=qcow2,unknown_opt= -QEMU_PROG: -drive file=TEST_DIR/t.qcow2,format=qcow2,unknown_opt=: could not open disk image TEST_DIR/t.qcow2: Block format 'qcow2' used by device 'ide0-hd0' doesn't support the option 'unknown_opt' +QEMU_PROG: -drive file=TEST_DIR/t.qcow2,format=qcow2,unknown_opt=: Block format 'qcow2' used by device 'ide0-hd0' doesn't support the option 'unknown_opt' Testing: -drive file=TEST_DIR/t.qcow2,format=qcow2,unknown_opt=on -QEMU_PROG: -drive file=TEST_DIR/t.qcow2,format=qcow2,unknown_opt=on: could not open disk image TEST_DIR/t.qcow2: Block format 'qcow2' used by device 'ide0-hd0' doesn't support the option 'unknown_opt' +QEMU_PROG: -drive file=TEST_DIR/t.qcow2,format=qcow2,unknown_opt=on: Block format 'qcow2' used by device 'ide0-hd0' doesn't support the option 'unknown_opt' Testing: -drive file=TEST_DIR/t.qcow2,format=qcow2,unknown_opt=1234 -QEMU_PROG: -drive file=TEST_DIR/t.qcow2,format=qcow2,unknown_opt=1234: could not open disk image TEST_DIR/t.qcow2: Block format 'qcow2' used by device 'ide0-hd0' doesn't support the option 'unknown_opt' +QEMU_PROG: -drive file=TEST_DIR/t.qcow2,format=qcow2,unknown_opt=1234: Block format 'qcow2' used by device 'ide0-hd0' doesn't support the option 'unknown_opt' Testing: -drive file=TEST_DIR/t.qcow2,format=qcow2,unknown_opt=foo -QEMU_PROG: -drive file=TEST_DIR/t.qcow2,format=qcow2,unknown_opt=foo: could not open disk image TEST_DIR/t.qcow2: Block format 'qcow2' used by device 'ide0-hd0' doesn't support the option 'unknown_opt' +QEMU_PROG: -drive file=TEST_DIR/t.qcow2,format=qcow2,unknown_opt=foo: Block format 'qcow2' used by device 'ide0-hd0' doesn't support the option 'unknown_opt' === Unknown protocol option === Testing: -drive file=TEST_DIR/t.qcow2,format=qcow2,file.unknown_opt= -QEMU_PROG: -drive file=TEST_DIR/t.qcow2,format=qcow2,file.unknown_opt=: could not open disk image TEST_DIR/t.qcow2: Block protocol 'file' doesn't support the option 'unknown_opt' +QEMU_PROG: -drive file=TEST_DIR/t.qcow2,format=qcow2,file.unknown_opt=: Block protocol 'file' doesn't support the option 'unknown_opt' Testing: -drive file=TEST_DIR/t.qcow2,format=qcow2,file.unknown_opt=on -QEMU_PROG: -drive file=TEST_DIR/t.qcow2,format=qcow2,file.unknown_opt=on: could not open disk image TEST_DIR/t.qcow2: Block protocol 'file' doesn't support the option 'unknown_opt' +QEMU_PROG: -drive file=TEST_DIR/t.qcow2,format=qcow2,file.unknown_opt=on: Block protocol 'file' doesn't support the option 'unknown_opt' Testing: -drive file=TEST_DIR/t.qcow2,format=qcow2,file.unknown_opt=1234 -QEMU_PROG: -drive file=TEST_DIR/t.qcow2,format=qcow2,file.unknown_opt=1234: could not open disk image TEST_DIR/t.qcow2: Block protocol 'file' doesn't support the option 'unknown_opt' +QEMU_PROG: -drive file=TEST_DIR/t.qcow2,format=qcow2,file.unknown_opt=1234: Block protocol 'file' doesn't support the option 'unknown_opt' Testing: -drive file=TEST_DIR/t.qcow2,format=qcow2,file.unknown_opt=foo -QEMU_PROG: -drive file=TEST_DIR/t.qcow2,format=qcow2,file.unknown_opt=foo: could not open disk image TEST_DIR/t.qcow2: Block protocol 'file' doesn't support the option 'unknown_opt' +QEMU_PROG: -drive file=TEST_DIR/t.qcow2,format=qcow2,file.unknown_opt=foo: Block protocol 'file' doesn't support the option 'unknown_opt' === Invalid format === Testing: -drive file=TEST_DIR/t.qcow2,format=foo -QEMU_PROG: -drive file=TEST_DIR/t.qcow2,format=foo: 'foo' invalid format +QEMU_PROG: -drive file=TEST_DIR/t.qcow2,format=foo: Unknown driver 'foo' Testing: -drive file=TEST_DIR/t.qcow2,driver=foo -QEMU_PROG: -drive file=TEST_DIR/t.qcow2,driver=foo: could not open disk image TEST_DIR/t.qcow2: Unknown driver 'foo' +QEMU_PROG: -drive file=TEST_DIR/t.qcow2,driver=foo: Unknown driver 'foo' Testing: -drive file=TEST_DIR/t.qcow2,driver=raw,format=qcow2 -QEMU_PROG: -drive file=TEST_DIR/t.qcow2,driver=raw,format=qcow2: could not open disk image TEST_DIR/t.qcow2: Driver specified twice +QEMU_PROG: -drive file=TEST_DIR/t.qcow2,driver=raw,format=qcow2: Cannot specify both 'driver' and 'format' + +Testing: -drive file=TEST_DIR/t.qcow2,driver=qcow2,format=qcow2 +QEMU_PROG: -drive file=TEST_DIR/t.qcow2,driver=qcow2,format=qcow2: Cannot specify both 'driver' and 'format' === Overriding backing file === @@ -55,13 +58,13 @@ ide0-hd0: TEST_DIR/t.qcow2 (qcow2) (qemu) q[K[Dqu[K[D[Dqui[K[D[D[Dquit[K Testing: -drive file=TEST_DIR/t.qcow2,driver=raw,backing.file.filename=TEST_DIR/t.qcow2.orig -QEMU_PROG: -drive file=TEST_DIR/t.qcow2,driver=raw,backing.file.filename=TEST_DIR/t.qcow2.orig: could not open disk image TEST_DIR/t.qcow2: Driver doesn't support backing files +QEMU_PROG: -drive file=TEST_DIR/t.qcow2,driver=raw,backing.file.filename=TEST_DIR/t.qcow2.orig: Driver doesn't support backing files Testing: -drive file=TEST_DIR/t.qcow2,file.backing.driver=file,file.backing.filename=TEST_DIR/t.qcow2.orig -QEMU_PROG: -drive file=TEST_DIR/t.qcow2,file.backing.driver=file,file.backing.filename=TEST_DIR/t.qcow2.orig: could not open disk image TEST_DIR/t.qcow2: Driver doesn't support backing files +QEMU_PROG: -drive file=TEST_DIR/t.qcow2,file.backing.driver=file,file.backing.filename=TEST_DIR/t.qcow2.orig: Driver doesn't support backing files Testing: -drive file=TEST_DIR/t.qcow2,file.backing.driver=qcow2,file.backing.file.filename=TEST_DIR/t.qcow2.orig -QEMU_PROG: -drive file=TEST_DIR/t.qcow2,file.backing.driver=qcow2,file.backing.file.filename=TEST_DIR/t.qcow2.orig: could not open disk image TEST_DIR/t.qcow2: Driver doesn't support backing files +QEMU_PROG: -drive file=TEST_DIR/t.qcow2,file.backing.driver=qcow2,file.backing.file.filename=TEST_DIR/t.qcow2.orig: Driver doesn't support backing files === Enable and disable lazy refcounting on the command line, plus some invalid values === @@ -75,20 +78,20 @@ QEMU X.Y.Z monitor - type 'help' for more information (qemu) q[K[Dqu[K[D[Dqui[K[D[D[Dquit[K Testing: -drive file=TEST_DIR/t.qcow2,format=qcow2,lazy-refcounts= -QEMU_PROG: -drive file=TEST_DIR/t.qcow2,format=qcow2,lazy-refcounts=: could not open disk image TEST_DIR/t.qcow2: Parameter 'lazy-refcounts' expects 'on' or 'off' +QEMU_PROG: -drive file=TEST_DIR/t.qcow2,format=qcow2,lazy-refcounts=: Parameter 'lazy-refcounts' expects 'on' or 'off' Testing: -drive file=TEST_DIR/t.qcow2,format=qcow2,lazy-refcounts=42 -QEMU_PROG: -drive file=TEST_DIR/t.qcow2,format=qcow2,lazy-refcounts=42: could not open disk image TEST_DIR/t.qcow2: Parameter 'lazy-refcounts' expects 'on' or 'off' +QEMU_PROG: -drive file=TEST_DIR/t.qcow2,format=qcow2,lazy-refcounts=42: Parameter 'lazy-refcounts' expects 'on' or 'off' Testing: -drive file=TEST_DIR/t.qcow2,format=qcow2,lazy-refcounts=foo -QEMU_PROG: -drive file=TEST_DIR/t.qcow2,format=qcow2,lazy-refcounts=foo: could not open disk image TEST_DIR/t.qcow2: Parameter 'lazy-refcounts' expects 'on' or 'off' +QEMU_PROG: -drive file=TEST_DIR/t.qcow2,format=qcow2,lazy-refcounts=foo: Parameter 'lazy-refcounts' expects 'on' or 'off' === With version 2 images enabling lazy refcounts must fail === Formatting 'TEST_DIR/t.IMGFMT', fmt=IMGFMT size=134217728 Testing: -drive file=TEST_DIR/t.qcow2,format=qcow2,lazy-refcounts=on -QEMU_PROG: -drive file=TEST_DIR/t.qcow2,format=qcow2,lazy-refcounts=on: could not open disk image TEST_DIR/t.qcow2: Lazy refcounts require a qcow2 image with at least qemu 1.1 compatibility level +QEMU_PROG: -drive file=TEST_DIR/t.qcow2,format=qcow2,lazy-refcounts=on: Lazy refcounts require a qcow2 image with at least qemu 1.1 compatibility level Testing: -drive file=TEST_DIR/t.qcow2,format=qcow2,lazy-refcounts=off QEMU X.Y.Z monitor - type 'help' for more information @@ -248,31 +251,31 @@ QEMU X.Y.Z monitor - type 'help' for more information (qemu) q[K[Dqu[K[D[Dqui[K[D[D[Dquit[K Testing: -drive file=TEST_DIR/t.qcow2,file.driver=qcow2 -QEMU_PROG: -drive file=TEST_DIR/t.qcow2,file.driver=qcow2: could not open disk image TEST_DIR/t.qcow2: Block format 'qcow2' used by device '' doesn't support the option 'filename' +QEMU_PROG: -drive file=TEST_DIR/t.qcow2,file.driver=qcow2: Block format 'qcow2' used by device '' doesn't support the option 'filename' === Leaving out required options === Testing: -drive driver=file -QEMU_PROG: -drive driver=file: could not open disk image ide0-hd0: The 'file' block driver requires a file name +QEMU_PROG: -drive driver=file: The 'file' block driver requires a file name Testing: -drive driver=nbd -QEMU_PROG: -drive driver=nbd: could not open disk image ide0-hd0: one of path and host must be specified. +QEMU_PROG: -drive driver=nbd: one of path and host must be specified. Testing: -drive driver=raw -QEMU_PROG: -drive driver=raw: could not open disk image ide0-hd0: Can't use 'raw' as a block driver for the protocol level +QEMU_PROG: -drive driver=raw: Can't use 'raw' as a block driver for the protocol level Testing: -drive file.driver=file -QEMU_PROG: -drive file.driver=file: could not open disk image ide0-hd0: The 'file' block driver requires a file name +QEMU_PROG: -drive file.driver=file: The 'file' block driver requires a file name Testing: -drive file.driver=nbd -QEMU_PROG: -drive file.driver=nbd: could not open disk image ide0-hd0: one of path and host must be specified. +QEMU_PROG: -drive file.driver=nbd: one of path and host must be specified. Testing: -drive file.driver=raw -QEMU_PROG: -drive file.driver=raw: could not open disk image ide0-hd0: Can't use 'raw' as a block driver for the protocol level +QEMU_PROG: -drive file.driver=raw: Can't use 'raw' as a block driver for the protocol level Testing: -drive foo=bar -QEMU_PROG: -drive foo=bar: could not open disk image ide0-hd0: Must specify either driver or file +QEMU_PROG: -drive foo=bar: Must specify either driver or file === Specifying both an option and its legacy alias === @@ -323,13 +326,13 @@ QEMU_PROG: -drive file=TEST_DIR/t.qcow2,readonly=on,read-only=off: 'read-only' a === Parsing protocol from file name === Testing: -hda foo:bar -QEMU_PROG: -hda foo:bar: could not open disk image foo:bar: Unknown protocol +QEMU_PROG: -hda foo:bar: Unknown protocol 'foo' Testing: -drive file=foo:bar -QEMU_PROG: -drive file=foo:bar: could not open disk image foo:bar: Unknown protocol +QEMU_PROG: -drive file=foo:bar: Unknown protocol 'foo' Testing: -drive file.filename=foo:bar -QEMU_PROG: -drive file.filename=foo:bar: could not open disk image ide0-hd0: Could not open 'foo:bar': No such file or directory +QEMU_PROG: -drive file.filename=foo:bar: Could not open 'foo:bar': No such file or directory Testing: -hda file:TEST_DIR/t.qcow2 QEMU X.Y.Z monitor - type 'help' for more information @@ -340,7 +343,7 @@ QEMU X.Y.Z monitor - type 'help' for more information (qemu) q[K[Dqu[K[D[Dqui[K[D[D[Dquit[K Testing: -drive file.filename=file:TEST_DIR/t.qcow2 -QEMU_PROG: -drive file.filename=file:TEST_DIR/t.qcow2: could not open disk image ide0-hd0: Could not open 'file:TEST_DIR/t.qcow2': No such file or directory +QEMU_PROG: -drive file.filename=file:TEST_DIR/t.qcow2: Could not open 'file:TEST_DIR/t.qcow2': No such file or directory === Snapshot mode === diff --git a/tests/qemu-iotests/087.out b/tests/qemu-iotests/087.out index 91f4ea1a8b..0ba2e43b40 100644 --- a/tests/qemu-iotests/087.out +++ b/tests/qemu-iotests/087.out @@ -21,9 +21,9 @@ QMP_VERSION {"return": {}} {"error": {"class": "GenericError", "desc": "Device with id 'disk' already exists"}} {"error": {"class": "GenericError", "desc": "Device name 'test-node' conflicts with an existing node name"}} -{"error": {"class": "GenericError", "desc": "could not open disk image disk2: node-name=disk is conflicting with a device id"}} -{"error": {"class": "GenericError", "desc": "could not open disk image disk2: Duplicate node name"}} -{"error": {"class": "GenericError", "desc": "could not open disk image disk3: node-name=disk3 is conflicting with a device id"}} +{"error": {"class": "GenericError", "desc": "node-name=disk is conflicting with a device id"}} +{"error": {"class": "GenericError", "desc": "Duplicate node name"}} +{"error": {"class": "GenericError", "desc": "node-name=disk3 is conflicting with a device id"}} {"return": {}} {"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "SHUTDOWN"} {"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "DEVICE_TRAY_MOVED", "data": {"device": "ide1-cd0", "tray-open": true}} @@ -57,7 +57,7 @@ QMP_VERSION Testing: QMP_VERSION {"return": {}} -{"error": {"class": "GenericError", "desc": "could not open disk image disk: Guest must be stopped for opening of encrypted image"}} +{"error": {"class": "GenericError", "desc": "Guest must be stopped for opening of encrypted image"}} {"return": {}} {"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "SHUTDOWN"} {"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "DEVICE_TRAY_MOVED", "data": {"device": "ide1-cd0", "tray-open": true}} diff --git a/tests/qemu-iotests/093 b/tests/qemu-iotests/093 new file mode 100755 index 0000000000..b9096a55d4 --- /dev/null +++ b/tests/qemu-iotests/093 @@ -0,0 +1,114 @@ +#!/usr/bin/env python +# +# Tests for IO throttling +# +# Copyright (C) 2015 Red Hat, Inc. +# +# This program is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation; either version 2 of the License, or +# (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with this program. If not, see <http://www.gnu.org/licenses/>. +# + +import iotests + +class ThrottleTestCase(iotests.QMPTestCase): + test_img = "null-aio://" + + def blockstats(self, device): + result = self.vm.qmp("query-blockstats") + for r in result['return']: + if r['device'] == device: + stat = r['stats'] + return stat['rd_bytes'], stat['rd_operations'], stat['wr_bytes'], stat['wr_operations'] + raise Exception("Device not found for blockstats: %s" % device) + + def setUp(self): + self.vm = iotests.VM().add_drive(self.test_img) + self.vm.launch() + + def tearDown(self): + self.vm.shutdown() + + def do_test_throttle(self, seconds, params): + def check_limit(limit, num): + # IO throttling algorithm is discrete, allow 10% error so the test + # is more robust + return limit == 0 or \ + (num < seconds * limit * 1.1 + and num > seconds * limit * 0.9) + + nsec_per_sec = 1000000000 + + params['device'] = 'drive0' + + result = self.vm.qmp("block_set_io_throttle", conv_keys=False, **params) + self.assert_qmp(result, 'return', {}) + + # Set vm clock to a known value + ns = seconds * nsec_per_sec + self.vm.qtest("clock_step %d" % ns) + + # Submit enough requests. They will drain bps_max and iops_max, but the + # rest requests won't get executed until we advance the virtual clock + # with qtest interface + rq_size = 512 + rd_nr = max(params['bps'] / rq_size / 2, + params['bps_rd'] / rq_size, + params['iops'] / 2, + params['iops_rd']) + rd_nr *= seconds * 2 + wr_nr = max(params['bps'] / rq_size / 2, + params['bps_wr'] / rq_size, + params['iops'] / 2, + params['iops_wr']) + wr_nr *= seconds * 2 + for i in range(rd_nr): + self.vm.hmp_qemu_io("drive0", "aio_read %d %d" % (i * rq_size, rq_size)) + for i in range(wr_nr): + self.vm.hmp_qemu_io("drive0", "aio_write %d %d" % (i * rq_size, rq_size)) + + start_rd_bytes, start_rd_iops, start_wr_bytes, start_wr_iops = self.blockstats('drive0') + + self.vm.qtest("clock_step %d" % ns) + end_rd_bytes, end_rd_iops, end_wr_bytes, end_wr_iops = self.blockstats('drive0') + + rd_bytes = end_rd_bytes - start_rd_bytes + rd_iops = end_rd_iops - start_rd_iops + wr_bytes = end_wr_bytes - start_wr_bytes + wr_iops = end_wr_iops - start_wr_iops + + self.assertTrue(check_limit(params['bps'], rd_bytes + wr_bytes)) + self.assertTrue(check_limit(params['bps_rd'], rd_bytes)) + self.assertTrue(check_limit(params['bps_wr'], wr_bytes)) + self.assertTrue(check_limit(params['iops'], rd_iops + wr_iops)) + self.assertTrue(check_limit(params['iops_rd'], rd_iops)) + self.assertTrue(check_limit(params['iops_wr'], wr_iops)) + + def test_all(self): + params = {"bps": 4096, + "bps_rd": 4096, + "bps_wr": 4096, + "iops": 10, + "iops_rd": 10, + "iops_wr": 10, + } + # Pick each out of all possible params and test + for tk in params: + limits = dict([(k, 0) for k in params]) + limits[tk] = params[tk] + self.do_test_throttle(5, limits) + +class ThrottleTestCoroutine(ThrottleTestCase): + test_img = "null-co://" + +if __name__ == '__main__': + iotests.main(supported_fmts=["raw"]) diff --git a/tests/qemu-iotests/093.out b/tests/qemu-iotests/093.out new file mode 100644 index 0000000000..fbc63e62f8 --- /dev/null +++ b/tests/qemu-iotests/093.out @@ -0,0 +1,5 @@ +.. +---------------------------------------------------------------------- +Ran 2 tests + +OK diff --git a/tests/qemu-iotests/094 b/tests/qemu-iotests/094 new file mode 100755 index 0000000000..27a2be2569 --- /dev/null +++ b/tests/qemu-iotests/094 @@ -0,0 +1,81 @@ +#!/bin/bash +# +# Test case for drive-mirror to NBD (especially bdrv_swap() on NBD BDS) +# +# Copyright (C) 2015 Red Hat, Inc. +# +# This program is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation; either version 2 of the License, or +# (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with this program. If not, see <http://www.gnu.org/licenses/>. +# + +# creator +owner=mreitz@redhat.com + +seq="$(basename $0)" +echo "QA output created by $seq" + +here="$PWD" +tmp=/tmp/$$ +status=1 # failure is the default! + +trap "exit \$status" 0 1 2 3 15 + +# get standard environment, filters and checks +. ./common.rc +. ./common.filter +. ./common.qemu + +_supported_fmt generic +_supported_proto nbd +_supported_os Linux +_unsupported_imgopts "subformat=monolithicFlat" "subformat=twoGbMaxExtentFlat" + +_make_test_img 64M +$QEMU_IMG create -f $IMGFMT "$TEST_DIR/source.$IMGFMT" 64M | _filter_img_create + +_launch_qemu -drive if=none,id=src,file="$TEST_DIR/source.$IMGFMT",format=raw \ + -nodefaults + +_send_qemu_cmd $QEMU_HANDLE \ + "{'execute': 'qmp_capabilities'}" \ + 'return' + +# 'format': 'nbd' is not actually "correct", but this is probably the only way +# to test bdrv_swap() on an NBD BDS +_send_qemu_cmd $QEMU_HANDLE \ + "{'execute': 'drive-mirror', + 'arguments': {'device': 'src', + 'target': '$TEST_IMG', + 'format': 'nbd', + 'sync':'full', + 'mode':'existing'}}" \ + 'BLOCK_JOB_READY' + +_send_qemu_cmd $QEMU_HANDLE \ + "{'execute': 'block-job-complete', + 'arguments': {'device': 'src'}}" \ + 'BLOCK_JOB_COMPLETE' + +_send_qemu_cmd $QEMU_HANDLE \ + "{'execute': 'quit'}" \ + 'return' + +wait=1 _cleanup_qemu + +_cleanup_test_img +rm -f "$TEST_DIR/source.$IMGFMT" + +# success, all done +echo '*** done' +rm -f $seq.full +status=0 diff --git a/tests/qemu-iotests/094.out b/tests/qemu-iotests/094.out new file mode 100644 index 0000000000..b66dc0787d --- /dev/null +++ b/tests/qemu-iotests/094.out @@ -0,0 +1,11 @@ +QA output created by 094 +Formatting 'TEST_DIR/t.IMGFMT', fmt=IMGFMT size=67108864 +Formatting 'TEST_DIR/source.IMGFMT', fmt=IMGFMT size=67108864 +{"return": {}} +{"return": {}} +{"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "BLOCK_JOB_READY", "data": {"device": "src", "len": 67108864, "offset": 67108864, "speed": 0, "type": "mirror"}} +{"return": {}} +{"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "BLOCK_JOB_COMPLETED", "data": {"device": "src", "len": 67108864, "offset": 67108864, "speed": 0, "type": "mirror"}} +{"return": {}} +{"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "SHUTDOWN"} +*** done diff --git a/tests/qemu-iotests/016 b/tests/qemu-iotests/123 index 52397aa80e..ad608035d1 100755 --- a/tests/qemu-iotests/016 +++ b/tests/qemu-iotests/123 @@ -1,8 +1,8 @@ #!/bin/bash # -# Test I/O after EOF for growable images. +# Test case for qemu-img convert to NBD # -# Copyright (C) 2009 Red Hat, Inc. +# Copyright (C) 2015 Red Hat, Inc. # # This program is free software; you can redistribute it and/or modify # it under the terms of the GNU General Public License as published by @@ -19,18 +19,19 @@ # # creator -owner=hch@lst.de +owner=mreitz@redhat.com -seq=`basename $0` +seq="$(basename $0)" echo "QA output created by $seq" -here=`pwd` +here="$PWD" tmp=/tmp/$$ status=1 # failure is the default! _cleanup() { - _cleanup_test_img + _cleanup_test_img + rm -f "$SRC_IMG" } trap "_cleanup; exit \$status" 0 1 2 3 15 @@ -39,35 +40,23 @@ trap "_cleanup; exit \$status" 0 1 2 3 15 . ./common.filter _supported_fmt raw -_supported_proto file sheepdog nfs +_supported_proto nbd _supported_os Linux +SRC_IMG="$TEST_DIR/source.$IMGFMT" -# No -f, use probing for the protocol driver -QEMU_IO_PROTO="$QEMU_IO_PROG -g --cache $CACHEMODE" +_make_test_img 1M +$QEMU_IMG create -f $IMGFMT "$SRC_IMG" 1M | _filter_img_create -size=128M -_make_test_img $size +$QEMU_IO -c 'write -P 42 0 1M' "$SRC_IMG" | _filter_qemu_io -echo -echo "== reading at EOF ==" -$QEMU_IO_PROTO -c "read -P 0 $size 512" "$TEST_IMG" | _filter_qemu_io - -echo -echo "== reading far past EOF ==" -$QEMU_IO_PROTO -c "read -P 0 256M 512" "$TEST_IMG" | _filter_qemu_io +$QEMU_IMG convert -n -f $IMGFMT -O raw "$SRC_IMG" "$TEST_IMG" -echo -echo "== writing at EOF ==" -$QEMU_IO_PROTO -c "write -P 66 $size 512" "$TEST_IMG" | _filter_qemu_io -$QEMU_IO -c "read -P 66 $size 512" "$TEST_IMG" | _filter_qemu_io +$QEMU_IO -c 'read -P 42 0 1M' "$TEST_IMG" | _filter_qemu_io -echo -echo "== writing far past EOF ==" -$QEMU_IO_PROTO -c "write -P 66 256M 512" "$TEST_IMG" | _filter_qemu_io -$QEMU_IO -c "read -P 66 256M 512" "$TEST_IMG" | _filter_qemu_io # success, all done -echo "*** done" +echo +echo '*** done' rm -f $seq.full status=0 diff --git a/tests/qemu-iotests/123.out b/tests/qemu-iotests/123.out new file mode 100644 index 0000000000..0b818d34c4 --- /dev/null +++ b/tests/qemu-iotests/123.out @@ -0,0 +1,9 @@ +QA output created by 123 +Formatting 'TEST_DIR/t.IMGFMT', fmt=IMGFMT size=1048576 +Formatting 'TEST_DIR/source.IMGFMT', fmt=IMGFMT size=1048576 +wrote 1048576/1048576 bytes at offset 0 +1 MiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec) +read 1048576/1048576 bytes at offset 0 +1 MiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec) + +*** done diff --git a/tests/qemu-iotests/common.qemu b/tests/qemu-iotests/common.qemu index 8e618b5149..4e1996c3ec 100644 --- a/tests/qemu-iotests/common.qemu +++ b/tests/qemu-iotests/common.qemu @@ -187,13 +187,23 @@ function _launch_qemu() # Silenty kills the QEMU process +# +# If $wait is set to anything other than the empty string, the process will not +# be killed but only waited for, and any output will be forwarded to stdout. If +# $wait is empty, the process will be killed and all output will be suppressed. function _cleanup_qemu() { # QEMU_PID[], QEMU_IN[], QEMU_OUT[] all use same indices for i in "${!QEMU_OUT[@]}" do - kill -KILL ${QEMU_PID[$i]} 2>/dev/null + if [ -z "${wait}" ]; then + kill -KILL ${QEMU_PID[$i]} 2>/dev/null + fi wait ${QEMU_PID[$i]} 2>/dev/null # silent kill + if [ -n "${wait}" ]; then + cat <&${QEMU_OUT[$i]} | _filter_testdir | _filter_qemu \ + | _filter_qemu_io | _filter_qmp + fi rm -f "${QEMU_FIFO_IN}_${i}" "${QEMU_FIFO_OUT}_${i}" eval "exec ${QEMU_IN[$i]}<&-" # close file descriptors eval "exec ${QEMU_OUT[$i]}<&-" diff --git a/tests/qemu-iotests/group b/tests/qemu-iotests/group index 4b2b93bc19..0d3b95c258 100644 --- a/tests/qemu-iotests/group +++ b/tests/qemu-iotests/group @@ -22,7 +22,7 @@ 013 rw auto 014 rw auto 015 rw snapshot auto -016 rw auto quick +# 016 was removed, do not reuse 017 rw backing auto quick 018 rw backing auto quick 019 rw backing auto quick @@ -99,6 +99,8 @@ 090 rw auto quick 091 rw auto 092 rw auto quick +093 auto +094 rw auto quick 095 rw auto quick 097 rw auto backing 098 rw auto backing quick @@ -117,3 +119,4 @@ 113 rw auto quick 114 rw auto quick 116 rw auto quick +123 rw auto quick diff --git a/tests/qemu-iotests/iotests.py b/tests/qemu-iotests/iotests.py index 241b5ee9dd..14028540b3 100644 --- a/tests/qemu-iotests/iotests.py +++ b/tests/qemu-iotests/iotests.py @@ -21,8 +21,11 @@ import re import subprocess import string import unittest -import sys; sys.path.append(os.path.join(os.path.dirname(__file__), '..', '..', 'scripts', 'qmp')) +import sys +sys.path.append(os.path.join(os.path.dirname(__file__), '..', '..', 'scripts')) +sys.path.append(os.path.join(os.path.dirname(__file__), '..', '..', 'scripts', 'qmp')) import qmp +import qtest import struct __all__ = ['imgfmt', 'imgproto', 'test_dir' 'qemu_img', 'qemu_io', @@ -81,10 +84,12 @@ class VM(object): def __init__(self): self._monitor_path = os.path.join(test_dir, 'qemu-mon.%d' % os.getpid()) self._qemu_log_path = os.path.join(test_dir, 'qemu-log.%d' % os.getpid()) + self._qtest_path = os.path.join(test_dir, 'qemu-qtest.%d' % os.getpid()) self._args = qemu_args + ['-chardev', 'socket,id=mon,path=' + self._monitor_path, '-mon', 'chardev=mon,mode=control', - '-qtest', 'stdio', '-machine', 'accel=qtest', + '-qtest', 'unix:path=' + self._qtest_path, + '-machine', 'accel=qtest', '-display', 'none', '-vga', 'none'] self._num_drives = 0 @@ -160,9 +165,11 @@ class VM(object): qemulog = open(self._qemu_log_path, 'wb') try: self._qmp = qmp.QEMUMonitorProtocol(self._monitor_path, server=True) + self._qtest = qtest.QEMUQtestProtocol(self._qtest_path, server=True) self._popen = subprocess.Popen(self._args, stdin=devnull, stdout=qemulog, stderr=subprocess.STDOUT) self._qmp.accept() + self._qtest.accept() except: os.remove(self._monitor_path) raise @@ -173,18 +180,26 @@ class VM(object): self._qmp.cmd('quit') self._popen.wait() os.remove(self._monitor_path) + os.remove(self._qtest_path) os.remove(self._qemu_log_path) self._popen = None underscore_to_dash = string.maketrans('_', '-') - def qmp(self, cmd, **args): + def qmp(self, cmd, conv_keys=True, **args): '''Invoke a QMP command and return the result dict''' qmp_args = dict() for k in args.keys(): - qmp_args[k.translate(self.underscore_to_dash)] = args[k] + if conv_keys: + qmp_args[k.translate(self.underscore_to_dash)] = args[k] + else: + qmp_args[k] = args[k] return self._qmp.cmd(cmd, args=qmp_args) + def qtest(self, cmd): + '''Send a qtest command to guest''' + return self._qtest.cmd(cmd) + def get_qmp_event(self, wait=False): '''Poll for one queued QMP events and return it''' return self._qmp.pull_event(wait=wait) diff --git a/tests/test-rcu-list.c b/tests/test-rcu-list.c new file mode 100644 index 0000000000..46b5e263e5 --- /dev/null +++ b/tests/test-rcu-list.c @@ -0,0 +1,306 @@ +/* + * rcuq_test.c + * + * usage: rcuq_test <readers> <duration> + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. + * + * Copyright (c) 2013 Mike D. Day, IBM Corporation. + */ + +#include <glib.h> +#include <stdlib.h> +#include <stdio.h> +#include <string.h> +#include "qemu/atomic.h" +#include "qemu/rcu.h" +#include "qemu/compiler.h" +#include "qemu/osdep.h" +#include "qemu/thread.h" +#include "qemu/rcu_queue.h" + +/* + * Test variables. + */ + +long long n_reads = 0LL; +long long n_updates = 0LL; +long long n_reclaims = 0LL; +long long n_nodes_removed = 0LL; +long long n_nodes = 0LL; +int g_test_in_charge = 0; + +int nthreadsrunning; + +char argsbuf[64]; + +#define GOFLAG_INIT 0 +#define GOFLAG_RUN 1 +#define GOFLAG_STOP 2 + +static volatile int goflag = GOFLAG_INIT; + +#define RCU_READ_RUN 1000 +#define RCU_UPDATE_RUN 10 +#define NR_THREADS 100 +#define RCU_Q_LEN 100 + +static QemuThread threads[NR_THREADS]; +static struct rcu_reader_data *data[NR_THREADS]; +static int n_threads; + +static int select_random_el(int max) +{ + return (rand() % max); +} + + +static void create_thread(void *(*func)(void *)) +{ + if (n_threads >= NR_THREADS) { + fprintf(stderr, "Thread limit of %d exceeded!\n", NR_THREADS); + exit(-1); + } + qemu_thread_create(&threads[n_threads], "test", func, &data[n_threads], + QEMU_THREAD_JOINABLE); + n_threads++; +} + +static void wait_all_threads(void) +{ + int i; + + for (i = 0; i < n_threads; i++) { + qemu_thread_join(&threads[i]); + } + n_threads = 0; +} + + +struct list_element { + QLIST_ENTRY(list_element) entry; + struct rcu_head rcu; + long long val; +}; + +static void reclaim_list_el(struct rcu_head *prcu) +{ + struct list_element *el = container_of(prcu, struct list_element, rcu); + g_free(el); + atomic_add(&n_reclaims, 1); +} + +static QLIST_HEAD(q_list_head, list_element) Q_list_head; + +static void *rcu_q_reader(void *arg) +{ + long long j, n_reads_local = 0; + struct list_element *el; + + *(struct rcu_reader_data **)arg = &rcu_reader; + atomic_inc(&nthreadsrunning); + while (goflag == GOFLAG_INIT) { + g_usleep(1000); + } + + while (goflag == GOFLAG_RUN) { + rcu_read_lock(); + QLIST_FOREACH_RCU(el, &Q_list_head, entry) { + j = atomic_read(&el->val); + (void)j; + n_reads_local++; + if (goflag == GOFLAG_STOP) { + break; + } + } + rcu_read_unlock(); + + g_usleep(100); + } + atomic_add(&n_reads, n_reads_local); + return NULL; +} + + +static void *rcu_q_updater(void *arg) +{ + int j, target_el; + long long n_updates_local = 0; + long long n_removed_local = 0; + struct list_element *el, *prev_el; + + *(struct rcu_reader_data **)arg = &rcu_reader; + atomic_inc(&nthreadsrunning); + while (goflag == GOFLAG_INIT) { + g_usleep(1000); + } + + while (goflag == GOFLAG_RUN) { + target_el = select_random_el(RCU_Q_LEN); + j = 0; + /* FOREACH_RCU could work here but let's use both macros */ + QLIST_FOREACH_SAFE_RCU(prev_el, &Q_list_head, entry, el) { + j++; + if (target_el == j) { + QLIST_REMOVE_RCU(prev_el, entry); + /* may be more than one updater in the future */ + call_rcu1(&prev_el->rcu, reclaim_list_el); + n_removed_local++; + break; + } + } + if (goflag == GOFLAG_STOP) { + break; + } + target_el = select_random_el(RCU_Q_LEN); + j = 0; + QLIST_FOREACH_RCU(el, &Q_list_head, entry) { + j++; + if (target_el == j) { + prev_el = g_new(struct list_element, 1); + atomic_add(&n_nodes, 1); + prev_el->val = atomic_read(&n_nodes); + QLIST_INSERT_BEFORE_RCU(el, prev_el, entry); + break; + } + } + + n_updates_local += 2; + synchronize_rcu(); + } + synchronize_rcu(); + atomic_add(&n_updates, n_updates_local); + atomic_add(&n_nodes_removed, n_removed_local); + return NULL; +} + +static void rcu_qtest_init(void) +{ + struct list_element *new_el; + int i; + nthreadsrunning = 0; + srand(time(0)); + for (i = 0; i < RCU_Q_LEN; i++) { + new_el = g_new(struct list_element, 1); + new_el->val = i; + QLIST_INSERT_HEAD_RCU(&Q_list_head, new_el, entry); + } + atomic_add(&n_nodes, RCU_Q_LEN); +} + +static void rcu_qtest_run(int duration, int nreaders) +{ + int nthreads = nreaders + 1; + while (atomic_read(&nthreadsrunning) < nthreads) { + g_usleep(1000); + } + + goflag = GOFLAG_RUN; + sleep(duration); + goflag = GOFLAG_STOP; + wait_all_threads(); +} + + +static void rcu_qtest(const char *test, int duration, int nreaders) +{ + int i; + long long n_removed_local = 0; + + struct list_element *el, *prev_el; + + rcu_qtest_init(); + for (i = 0; i < nreaders; i++) { + create_thread(rcu_q_reader); + } + create_thread(rcu_q_updater); + rcu_qtest_run(duration, nreaders); + + QLIST_FOREACH_SAFE_RCU(prev_el, &Q_list_head, entry, el) { + QLIST_REMOVE_RCU(prev_el, entry); + call_rcu1(&prev_el->rcu, reclaim_list_el); + n_removed_local++; + } + atomic_add(&n_nodes_removed, n_removed_local); + synchronize_rcu(); + while (n_nodes_removed > n_reclaims) { + g_usleep(100); + synchronize_rcu(); + } + if (g_test_in_charge) { + g_assert_cmpint(n_nodes_removed, ==, n_reclaims); + } else { + printf("%s: %d readers; 1 updater; nodes read: " \ + "%lld, nodes removed: %lld; nodes reclaimed: %lld\n", + test, nthreadsrunning - 1, n_reads, n_nodes_removed, n_reclaims); + exit(0); + } +} + +static void usage(int argc, char *argv[]) +{ + fprintf(stderr, "Usage: %s duration nreaders\n", argv[0]); + exit(-1); +} + +static int gtest_seconds; + +static void gtest_rcuq_one(void) +{ + rcu_qtest("rcuqtest", gtest_seconds / 4, 1); +} + +static void gtest_rcuq_few(void) +{ + rcu_qtest("rcuqtest", gtest_seconds / 4, 5); +} + +static void gtest_rcuq_many(void) +{ + rcu_qtest("rcuqtest", gtest_seconds / 2, 20); +} + + +int main(int argc, char *argv[]) +{ + int duration = 0, readers = 0; + + if (argc >= 2) { + if (argv[1][0] == '-') { + g_test_init(&argc, &argv, NULL); + if (g_test_quick()) { + gtest_seconds = 4; + } else { + gtest_seconds = 20; + } + g_test_add_func("/rcu/qlist/single-threaded", gtest_rcuq_one); + g_test_add_func("/rcu/qlist/short-few", gtest_rcuq_few); + g_test_add_func("/rcu/qlist/long-many", gtest_rcuq_many); + g_test_in_charge = 1; + return g_test_run(); + } + duration = strtoul(argv[1], NULL, 0); + } + if (argc >= 3) { + readers = strtoul(argv[2], NULL, 0); + } + if (duration && readers) { + rcu_qtest(argv[0], duration, readers); + return 0; + } + + usage(argc, argv); + return -1; +} @@ -1111,6 +1111,12 @@ static int vnc_update_client(VncState *vs, int has_dirty, bool sync) n += vnc_job_add_rect(job, x * VNC_DIRTY_PIXELS_PER_BIT, y, (x2 - x) * VNC_DIRTY_PIXELS_PER_BIT, h); } + if (!x && x2 == width / VNC_DIRTY_PIXELS_PER_BIT) { + y += h; + if (y == height) { + break; + } + } } vnc_job_push(job); @@ -3242,6 +3248,7 @@ char *vnc_display_local_addr(const char *id) { VncDisplay *vs = vnc_display_find(id); + assert(vs); return vnc_socket_local_addr("%s:%s", vs->lsock); } @@ -3272,6 +3279,15 @@ static QemuOptsList qemu_vnc_opts = { .name = "connections", .type = QEMU_OPT_NUMBER, },{ + .name = "to", + .type = QEMU_OPT_NUMBER, + },{ + .name = "ipv4", + .type = QEMU_OPT_BOOL, + },{ + .name = "ipv6", + .type = QEMU_OPT_BOOL, + },{ .name = "password", .type = QEMU_OPT_BOOL, },{ @@ -3307,19 +3323,24 @@ void vnc_display_open(const char *id, Error **errp) { VncDisplay *vs = vnc_display_find(id); QemuOpts *opts = qemu_opts_find(&qemu_vnc_opts, id); - const char *display, *share, *device_id; + const char *share, *device_id; QemuConsole *con; - int password = 0; - int reverse = 0; + bool password = false; + bool reverse = false; + const char *vnc; + const char *has_to; + char *display, *to = NULL; + bool has_ipv4 = false; + bool has_ipv6 = false; #ifdef CONFIG_VNC_WS const char *websocket; #endif #ifdef CONFIG_VNC_TLS - int tls = 0, x509 = 0; + bool tls = false, x509 = false; const char *path; #endif #ifdef CONFIG_VNC_SASL - int sasl = 0; + bool sasl = false; int saslErr; #endif #if defined(CONFIG_VNC_TLS) || defined(CONFIG_VNC_SASL) @@ -3336,10 +3357,21 @@ void vnc_display_open(const char *id, Error **errp) if (!opts) { return; } - display = qemu_opt_get(opts, "vnc"); - if (!display || strcmp(display, "none") == 0) { + vnc = qemu_opt_get(opts, "vnc"); + if (!vnc || strcmp(vnc, "none") == 0) { return; } + + has_to = qemu_opt_get(opts, "to"); + if (has_to) { + to = g_strdup_printf(",to=%s", has_to); + } + has_ipv4 = qemu_opt_get_bool(opts, "ipv4", false); + has_ipv6 = qemu_opt_get_bool(opts, "ipv6", false); + display = g_strdup_printf("%s%s%s%s", vnc, + has_to ? to : "", + has_ipv4 ? ",ipv4" : "", + has_ipv6 ? ",ipv6" : ""); vs->display = g_strdup(display); password = qemu_opt_get_bool(opts, "password", false); @@ -3360,7 +3392,7 @@ void vnc_display_open(const char *id, Error **errp) tls = qemu_opt_get_bool(opts, "tls", false); path = qemu_opt_get(opts, "x509"); if (path) { - x509 = 1; + x509 = true; vs->tls.x509verify = qemu_opt_get_bool(opts, "x509verify", false); if (vnc_tls_set_x509_creds_dir(vs, path) < 0) { error_setg(errp, "Failed to find x509 certificates/keys in %s", @@ -3619,6 +3651,8 @@ void vnc_display_open(const char *id, Error **errp) } #endif /* CONFIG_VNC_WS */ } + g_free(to); + g_free(display); g_free(vs->display); vs->display = dpy; qemu_set_fd_handler2(vs->lsock, NULL, @@ -3633,6 +3667,8 @@ void vnc_display_open(const char *id, Error **errp) return; fail: + g_free(to); + g_free(display); g_free(vs->display); vs->display = NULL; #ifdef CONFIG_VNC_WS @@ -3656,6 +3692,19 @@ QemuOpts *vnc_parse_func(const char *str) return qemu_opts_parse(qemu_find_opts("vnc"), str, 1); } +void vnc_auto_assign_id(QemuOptsList *olist, QemuOpts *opts) +{ + int i = 2; + char *id; + + id = g_strdup("default"); + while (qemu_opts_find(olist, id)) { + g_free(id); + id = g_strdup_printf("vnc%d", i++); + } + qemu_opts_set_id(opts, id); +} + int vnc_init_func(QemuOpts *opts, void *opaque) { Error *local_err = NULL; @@ -3664,13 +3713,8 @@ int vnc_init_func(QemuOpts *opts, void *opaque) if (!id) { /* auto-assign id if not present */ - int i = 2; - id = g_strdup("default"); - while (qemu_opts_find(olist, id)) { - g_free(id); - id = g_strdup_printf("vnc%d", i++); - } - qemu_opts_set_id(opts, id); + vnc_auto_assign_id(olist, opts); + id = (char *)qemu_opts_id(opts); } vnc_display_init(id); diff --git a/util/envlist.c b/util/envlist.c index ebc06cf0f3..099a544a41 100644 --- a/util/envlist.c +++ b/util/envlist.c @@ -94,30 +94,30 @@ envlist_parse(envlist_t *envlist, const char *env, { char *tmpenv, *envvar; char *envsave = NULL; - - assert(callback != NULL); + int ret = 0; + assert(callback != NULL); if ((envlist == NULL) || (env == NULL)) return (EINVAL); - /* - * We need to make temporary copy of the env string - * as strtok_r(3) modifies it while it tokenizes. - */ if ((tmpenv = strdup(env)) == NULL) return (errno); - - envvar = strtok_r(tmpenv, ",", &envsave); - while (envvar != NULL) { - if ((*callback)(envlist, envvar) != 0) { - free(tmpenv); - return (errno); + envsave = tmpenv; + + do { + envvar = strchr(tmpenv, ','); + if (envvar != NULL) { + *envvar = '\0'; + } + if ((*callback)(envlist, tmpenv) != 0) { + ret = errno; + break; } - envvar = strtok_r(NULL, ",", &envsave); - } + tmpenv = envvar + 1; + } while (envvar != NULL); - free(tmpenv); - return (0); + free(envsave); + return ret; } /* diff --git a/util/rcu.c b/util/rcu.c index c9c3e6e4ab..bd73b8eb47 100644 --- a/util/rcu.c +++ b/util/rcu.c @@ -35,6 +35,7 @@ #include "qemu/rcu.h" #include "qemu/atomic.h" #include "qemu/thread.h" +#include "qemu/main-loop.h" /* * Global grace period counter. Bit 0 is always one in rcu_gp_ctr. @@ -223,32 +224,38 @@ static void *call_rcu_thread(void *opaque) * Fetch rcu_call_count now, we only must process elements that were * added before synchronize_rcu() starts. */ - while (n < RCU_CALL_MIN_SIZE && ++tries <= 5) { - g_usleep(100000); - qemu_event_reset(&rcu_call_ready_event); - n = atomic_read(&rcu_call_count); - if (n < RCU_CALL_MIN_SIZE) { - qemu_event_wait(&rcu_call_ready_event); + while (n == 0 || (n < RCU_CALL_MIN_SIZE && ++tries <= 5)) { + g_usleep(10000); + if (n == 0) { + qemu_event_reset(&rcu_call_ready_event); n = atomic_read(&rcu_call_count); + if (n == 0) { + qemu_event_wait(&rcu_call_ready_event); + } } + n = atomic_read(&rcu_call_count); } atomic_sub(&rcu_call_count, n); synchronize_rcu(); + qemu_mutex_lock_iothread(); while (n > 0) { node = try_dequeue(); while (!node) { + qemu_mutex_unlock_iothread(); qemu_event_reset(&rcu_call_ready_event); node = try_dequeue(); if (!node) { qemu_event_wait(&rcu_call_ready_event); node = try_dequeue(); } + qemu_mutex_lock_iothread(); } n--; node->func(node); } + qemu_mutex_unlock_iothread(); } abort(); } |